Cross platform build fix for SSIM (part 2)
Data alignment fix for SSIM. WebRtc_UWord64[2] wasn't always aligned to 128 bytes, which is necessary for _mm_store_si128. By declaring the variable as __m128i it will always be 128 bytes aligned. Related to issue 239013. http://webrtc-codereview.appspot.com/239013/ Review URL: https://webrtc-codereview.appspot.com/375004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1582 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
26e8a58130
commit
d8f58a4ab0
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* Use of this source code is governed by a BSD-style license
|
* Use of this source code is governed by a BSD-style license
|
||||||
* that can be found in the LICENSE file in the root of the source
|
* that can be found in the LICENSE file in the root of the source
|
||||||
@ -80,15 +80,22 @@ VPMContentAnalysis::TemporalDiffMetric_SSE2()
|
|||||||
numPixels += (width_end - _border);
|
numPixels += (width_end - _border);
|
||||||
}
|
}
|
||||||
|
|
||||||
WebRtc_Word64 sad_final_64[2];
|
__m128i sad_final_128;
|
||||||
WebRtc_Word64 sum_final_64[2];
|
__m128i sum_final_128;
|
||||||
WebRtc_Word64 sqsum_final_64[2];
|
__m128i sqsum_final_128;
|
||||||
|
|
||||||
// bring sums out of vector registers and into integer register
|
// bring sums out of vector registers and into integer register
|
||||||
// domain, summing them along the way
|
// domain, summing them along the way
|
||||||
_mm_store_si128 ((__m128i*)sad_final_64, sad_64);
|
_mm_store_si128 (&sad_final_128, sad_64);
|
||||||
_mm_store_si128 ((__m128i*)sum_final_64, sum_64);
|
_mm_store_si128 (&sum_final_128, sum_64);
|
||||||
_mm_store_si128 ((__m128i*)sqsum_final_64, sqsum_64);
|
_mm_store_si128 (&sqsum_final_128, sqsum_64);
|
||||||
|
|
||||||
|
WebRtc_UWord64 *sad_final_64 =
|
||||||
|
reinterpret_cast<WebRtc_UWord64*>(&sad_final_128);
|
||||||
|
WebRtc_UWord64 *sum_final_64 =
|
||||||
|
reinterpret_cast<WebRtc_UWord64*>(&sum_final_128);
|
||||||
|
WebRtc_UWord64 *sqsum_final_64 =
|
||||||
|
reinterpret_cast<WebRtc_UWord64*>(&sqsum_final_128);
|
||||||
|
|
||||||
const WebRtc_UWord32 pixelSum = sum_final_64[0] + sum_final_64[1];
|
const WebRtc_UWord32 pixelSum = sum_final_64[0] + sum_final_64[1];
|
||||||
const WebRtc_UWord64 pixelSqSum = sqsum_final_64[0] + sqsum_final_64[1];
|
const WebRtc_UWord64 pixelSqSum = sqsum_final_64[0] + sqsum_final_64[1];
|
||||||
@ -238,26 +245,35 @@ VPMContentAnalysis::ComputeSpatialMetrics_SSE2()
|
|||||||
imgBuf += _width * _skipNum;
|
imgBuf += _width * _skipNum;
|
||||||
}
|
}
|
||||||
|
|
||||||
WebRtc_Word64 se_64[2];
|
__m128i se_128;
|
||||||
WebRtc_Word64 sev_64[2];
|
__m128i sev_128;
|
||||||
WebRtc_Word64 seh_64[2];
|
__m128i seh_128;
|
||||||
WebRtc_Word64 msa_64[2];
|
__m128i msa_128;
|
||||||
|
|
||||||
// bring sums out of vector registers and into integer register
|
// bring sums out of vector registers and into integer register
|
||||||
// domain, summing them along the way
|
// domain, summing them along the way
|
||||||
_mm_store_si128 ((__m128i*)se_64,
|
_mm_store_si128 (&se_128,
|
||||||
_mm_add_epi64(_mm_unpackhi_epi32(se_32,z),
|
_mm_add_epi64(_mm_unpackhi_epi32(se_32,z),
|
||||||
_mm_unpacklo_epi32(se_32,z)));
|
_mm_unpacklo_epi32(se_32,z)));
|
||||||
_mm_store_si128 ((__m128i*)sev_64,
|
_mm_store_si128 (&sev_128,
|
||||||
_mm_add_epi64(_mm_unpackhi_epi32(sev_32,z),
|
_mm_add_epi64(_mm_unpackhi_epi32(sev_32,z),
|
||||||
_mm_unpacklo_epi32(sev_32,z)));
|
_mm_unpacklo_epi32(sev_32,z)));
|
||||||
_mm_store_si128 ((__m128i*)seh_64,
|
_mm_store_si128 (&seh_128,
|
||||||
_mm_add_epi64(_mm_unpackhi_epi32(seh_32,z),
|
_mm_add_epi64(_mm_unpackhi_epi32(seh_32,z),
|
||||||
_mm_unpacklo_epi32(seh_32,z)));
|
_mm_unpacklo_epi32(seh_32,z)));
|
||||||
_mm_store_si128 ((__m128i*)msa_64,
|
_mm_store_si128 (&msa_128,
|
||||||
_mm_add_epi64(_mm_unpackhi_epi32(msa_32,z),
|
_mm_add_epi64(_mm_unpackhi_epi32(msa_32,z),
|
||||||
_mm_unpacklo_epi32(msa_32,z)));
|
_mm_unpacklo_epi32(msa_32,z)));
|
||||||
|
|
||||||
|
WebRtc_UWord64 *se_64 =
|
||||||
|
reinterpret_cast<WebRtc_UWord64*>(&se_128);
|
||||||
|
WebRtc_UWord64 *sev_64 =
|
||||||
|
reinterpret_cast<WebRtc_UWord64*>(&sev_128);
|
||||||
|
WebRtc_UWord64 *seh_64 =
|
||||||
|
reinterpret_cast<WebRtc_UWord64*>(&seh_128);
|
||||||
|
WebRtc_UWord64 *msa_64 =
|
||||||
|
reinterpret_cast<WebRtc_UWord64*>(&msa_128);
|
||||||
|
|
||||||
const WebRtc_UWord32 spatialErrSum = se_64[0] + se_64[1];
|
const WebRtc_UWord32 spatialErrSum = se_64[0] + se_64[1];
|
||||||
const WebRtc_UWord32 spatialErrVSum = sev_64[0] + sev_64[1];
|
const WebRtc_UWord32 spatialErrVSum = sev_64[0] + sev_64[1];
|
||||||
const WebRtc_UWord32 spatialErrHSum = seh_64[0] + seh_64[1];
|
const WebRtc_UWord32 spatialErrHSum = seh_64[0] + seh_64[1];
|
||||||
|
Loading…
Reference in New Issue
Block a user