libyuv: update to r1305

MIPS build fixes

https://code.google.com/p/webm/issues/detail?id=957

Change-Id: I9d53900af36d783c369b5dff27a7479cb94fd16b
This commit is contained in:
Johann
2015-03-02 15:19:19 -08:00
parent 1790d45252
commit 223bf29307
46 changed files with 7713 additions and 10701 deletions

View File

@@ -27,13 +27,11 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
pxor xmm0, xmm0
pxor xmm5, xmm5
align 4
wloop:
movdqa xmm1, [eax]
movdqu xmm1, [eax]
lea eax, [eax + 16]
movdqa xmm2, [edx]
movdqu xmm2, [edx]
lea edx, [edx + 16]
sub ecx, 16
movdqa xmm3, xmm1 // abs trick
psubusb xmm1, xmm2
psubusb xmm2, xmm3
@@ -45,6 +43,7 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
pmaddwd xmm2, xmm2
paddd xmm0, xmm1
paddd xmm0, xmm2
sub ecx, 16
jg wloop
pshufd xmm1, xmm0, 0xee
@@ -70,12 +69,10 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
sub edx, eax
align 4
wloop:
vmovdqu ymm1, [eax]
vmovdqu ymm2, [eax + edx]
lea eax, [eax + 32]
sub ecx, 32
vpsubusb ymm3, ymm1, ymm2 // abs difference trick
vpsubusb ymm2, ymm2, ymm1
vpor ymm1, ymm2, ymm3
@@ -85,6 +82,7 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
vpmaddwd ymm1, ymm1, ymm1
vpaddd ymm0, ymm0, ymm1
vpaddd ymm0, ymm0, ymm2
sub ecx, 32
jg wloop
vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes.
@@ -145,7 +143,6 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
pxor xmm7, xmm7 // constant 0 for unpck
movdqa xmm6, kHash16x33
align 4
wloop:
movdqu xmm1, [eax] // src[0-15]
lea eax, [eax + 16]
@@ -170,7 +167,6 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
pmulld(0xcd) // pmulld xmm1, xmm5
paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2
sub ecx, 16
paddd xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
@@ -178,6 +174,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
pshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2
paddd xmm0, xmm1
sub ecx, 16
jg wloop
movd eax, xmm0 // return hash
@@ -195,7 +192,6 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
movd xmm0, [esp + 12] // seed
movdqa xmm6, kHash16x33
align 4
wloop:
vpmovzxbd xmm3, dword ptr [eax] // src[0-3]
pmulld xmm0, xmm6 // hash *= 33 ^ 16
@@ -209,13 +205,13 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
pmulld xmm1, kHashMul3
paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2
sub ecx, 16
paddd xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
paddd xmm1, xmm2
pshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2
paddd xmm0, xmm1
sub ecx, 16
jg wloop
movd eax, xmm0 // return hash