From d8f58a4ab02f1a91869aaadbef20f36eaa3aaa17 Mon Sep 17 00:00:00 2001 From: "frkoenig@google.com" Date: Tue, 31 Jan 2012 17:49:38 +0000 Subject: [PATCH] Cross platform build fix for SSIM (part 2) Data alignment fix for SSIM. WebRtc_UWord64[2] wasn't always aligned to 128 bytes, which is necessary for _mm_store_si128. By declaring the variable as __m128i it will always be 128 bytes aligned. Related to issue 239013. http://webrtc-codereview.appspot.com/239013/ Review URL: https://webrtc-codereview.appspot.com/375004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1582 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../main/source/content_analysis_sse2.cc | 46 +++++++++++++------ 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/src/modules/video_processing/main/source/content_analysis_sse2.cc b/src/modules/video_processing/main/source/content_analysis_sse2.cc index 347fa5b60..810c3cc0f 100644 --- a/src/modules/video_processing/main/source/content_analysis_sse2.cc +++ b/src/modules/video_processing/main/source/content_analysis_sse2.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -80,15 +80,22 @@ VPMContentAnalysis::TemporalDiffMetric_SSE2() numPixels += (width_end - _border); } - WebRtc_Word64 sad_final_64[2]; - WebRtc_Word64 sum_final_64[2]; - WebRtc_Word64 sqsum_final_64[2]; + __m128i sad_final_128; + __m128i sum_final_128; + __m128i sqsum_final_128; // bring sums out of vector registers and into integer register // domain, summing them along the way - _mm_store_si128 ((__m128i*)sad_final_64, sad_64); - _mm_store_si128 ((__m128i*)sum_final_64, sum_64); - _mm_store_si128 ((__m128i*)sqsum_final_64, sqsum_64); + _mm_store_si128 (&sad_final_128, sad_64); + _mm_store_si128 (&sum_final_128, sum_64); + _mm_store_si128 (&sqsum_final_128, sqsum_64); + + WebRtc_UWord64 *sad_final_64 = + reinterpret_cast(&sad_final_128); + WebRtc_UWord64 *sum_final_64 = + reinterpret_cast(&sum_final_128); + WebRtc_UWord64 *sqsum_final_64 = + reinterpret_cast(&sqsum_final_128); const WebRtc_UWord32 pixelSum = sum_final_64[0] + sum_final_64[1]; const WebRtc_UWord64 pixelSqSum = sqsum_final_64[0] + sqsum_final_64[1]; @@ -238,26 +245,35 @@ VPMContentAnalysis::ComputeSpatialMetrics_SSE2() imgBuf += _width * _skipNum; } - WebRtc_Word64 se_64[2]; - WebRtc_Word64 sev_64[2]; - WebRtc_Word64 seh_64[2]; - WebRtc_Word64 msa_64[2]; + __m128i se_128; + __m128i sev_128; + __m128i seh_128; + __m128i msa_128; // bring sums out of vector registers and into integer register // domain, summing them along the way - _mm_store_si128 ((__m128i*)se_64, + _mm_store_si128 (&se_128, _mm_add_epi64(_mm_unpackhi_epi32(se_32,z), _mm_unpacklo_epi32(se_32,z))); - _mm_store_si128 ((__m128i*)sev_64, + _mm_store_si128 (&sev_128, _mm_add_epi64(_mm_unpackhi_epi32(sev_32,z), _mm_unpacklo_epi32(sev_32,z))); - _mm_store_si128 ((__m128i*)seh_64, + _mm_store_si128 (&seh_128, _mm_add_epi64(_mm_unpackhi_epi32(seh_32,z), _mm_unpacklo_epi32(seh_32,z))); - _mm_store_si128 ((__m128i*)msa_64, + _mm_store_si128 (&msa_128, _mm_add_epi64(_mm_unpackhi_epi32(msa_32,z), _mm_unpacklo_epi32(msa_32,z))); + WebRtc_UWord64 *se_64 = + reinterpret_cast(&se_128); + WebRtc_UWord64 *sev_64 = + reinterpret_cast(&sev_128); + WebRtc_UWord64 *seh_64 = + reinterpret_cast(&seh_128); + WebRtc_UWord64 *msa_64 = + reinterpret_cast(&msa_128); + const WebRtc_UWord32 spatialErrSum = se_64[0] + se_64[1]; const WebRtc_UWord32 spatialErrVSum = sev_64[0] + sev_64[1]; const WebRtc_UWord32 spatialErrHSum = seh_64[0] + seh_64[1];