Cross platform build fix for SSIM (part 2)

Data alignment fix for SSIM.

WebRtc_UWord64[2] wasn't always aligned to 128 bytes, which
is necessary for _mm_store_si128.  By declaring the 
variable as __m128i it will always be 128 bytes aligned.

Related to issue 239013.
http://webrtc-codereview.appspot.com/239013/
Review URL: https://webrtc-codereview.appspot.com/375004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@1582 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
frkoenig@google.com 2012-01-31 17:49:38 +00:00
parent 26e8a58130
commit d8f58a4ab0

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
* *
* Use of this source code is governed by a BSD-style license * Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source * that can be found in the LICENSE file in the root of the source
@ -80,15 +80,22 @@ VPMContentAnalysis::TemporalDiffMetric_SSE2()
numPixels += (width_end - _border); numPixels += (width_end - _border);
} }
WebRtc_Word64 sad_final_64[2]; __m128i sad_final_128;
WebRtc_Word64 sum_final_64[2]; __m128i sum_final_128;
WebRtc_Word64 sqsum_final_64[2]; __m128i sqsum_final_128;
// bring sums out of vector registers and into integer register // bring sums out of vector registers and into integer register
// domain, summing them along the way // domain, summing them along the way
_mm_store_si128 ((__m128i*)sad_final_64, sad_64); _mm_store_si128 (&sad_final_128, sad_64);
_mm_store_si128 ((__m128i*)sum_final_64, sum_64); _mm_store_si128 (&sum_final_128, sum_64);
_mm_store_si128 ((__m128i*)sqsum_final_64, sqsum_64); _mm_store_si128 (&sqsum_final_128, sqsum_64);
WebRtc_UWord64 *sad_final_64 =
reinterpret_cast<WebRtc_UWord64*>(&sad_final_128);
WebRtc_UWord64 *sum_final_64 =
reinterpret_cast<WebRtc_UWord64*>(&sum_final_128);
WebRtc_UWord64 *sqsum_final_64 =
reinterpret_cast<WebRtc_UWord64*>(&sqsum_final_128);
const WebRtc_UWord32 pixelSum = sum_final_64[0] + sum_final_64[1]; const WebRtc_UWord32 pixelSum = sum_final_64[0] + sum_final_64[1];
const WebRtc_UWord64 pixelSqSum = sqsum_final_64[0] + sqsum_final_64[1]; const WebRtc_UWord64 pixelSqSum = sqsum_final_64[0] + sqsum_final_64[1];
@ -238,26 +245,35 @@ VPMContentAnalysis::ComputeSpatialMetrics_SSE2()
imgBuf += _width * _skipNum; imgBuf += _width * _skipNum;
} }
WebRtc_Word64 se_64[2]; __m128i se_128;
WebRtc_Word64 sev_64[2]; __m128i sev_128;
WebRtc_Word64 seh_64[2]; __m128i seh_128;
WebRtc_Word64 msa_64[2]; __m128i msa_128;
// bring sums out of vector registers and into integer register // bring sums out of vector registers and into integer register
// domain, summing them along the way // domain, summing them along the way
_mm_store_si128 ((__m128i*)se_64, _mm_store_si128 (&se_128,
_mm_add_epi64(_mm_unpackhi_epi32(se_32,z), _mm_add_epi64(_mm_unpackhi_epi32(se_32,z),
_mm_unpacklo_epi32(se_32,z))); _mm_unpacklo_epi32(se_32,z)));
_mm_store_si128 ((__m128i*)sev_64, _mm_store_si128 (&sev_128,
_mm_add_epi64(_mm_unpackhi_epi32(sev_32,z), _mm_add_epi64(_mm_unpackhi_epi32(sev_32,z),
_mm_unpacklo_epi32(sev_32,z))); _mm_unpacklo_epi32(sev_32,z)));
_mm_store_si128 ((__m128i*)seh_64, _mm_store_si128 (&seh_128,
_mm_add_epi64(_mm_unpackhi_epi32(seh_32,z), _mm_add_epi64(_mm_unpackhi_epi32(seh_32,z),
_mm_unpacklo_epi32(seh_32,z))); _mm_unpacklo_epi32(seh_32,z)));
_mm_store_si128 ((__m128i*)msa_64, _mm_store_si128 (&msa_128,
_mm_add_epi64(_mm_unpackhi_epi32(msa_32,z), _mm_add_epi64(_mm_unpackhi_epi32(msa_32,z),
_mm_unpacklo_epi32(msa_32,z))); _mm_unpacklo_epi32(msa_32,z)));
WebRtc_UWord64 *se_64 =
reinterpret_cast<WebRtc_UWord64*>(&se_128);
WebRtc_UWord64 *sev_64 =
reinterpret_cast<WebRtc_UWord64*>(&sev_128);
WebRtc_UWord64 *seh_64 =
reinterpret_cast<WebRtc_UWord64*>(&seh_128);
WebRtc_UWord64 *msa_64 =
reinterpret_cast<WebRtc_UWord64*>(&msa_128);
const WebRtc_UWord32 spatialErrSum = se_64[0] + se_64[1]; const WebRtc_UWord32 spatialErrSum = se_64[0] + se_64[1];
const WebRtc_UWord32 spatialErrVSum = sev_64[0] + sev_64[1]; const WebRtc_UWord32 spatialErrVSum = sev_64[0] + sev_64[1];
const WebRtc_UWord32 spatialErrHSum = seh_64[0] + seh_64[1]; const WebRtc_UWord32 spatialErrHSum = seh_64[0] + seh_64[1];