Revert "libyuv: update to b8ddb5a2"
This reverts commit b8f83282f8.
Update was to wrong version and still has: 
BUG=webm:1252
Change-Id: I80f3a7c0581ab5e2dd1a84f7840e51d7c362afac
			
			
This commit is contained in:
		
							
								
								
									
										12
									
								
								third_party/libyuv/README.libvpx
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								third_party/libyuv/README.libvpx
									
									
									
									
										vendored
									
									
								
							| @@ -1,6 +1,6 @@ | ||||
| Name: libyuv | ||||
| URL: http://code.google.com/p/libyuv/ | ||||
| Version: b8ddb5a2 | ||||
| Version: 1456 | ||||
| License: BSD | ||||
| License File: LICENSE | ||||
|  | ||||
| @@ -13,13 +13,3 @@ which down-samples the original input video (f.g. 1280x720) a number of times | ||||
| in order to encode multiple resolution bit streams. | ||||
|  | ||||
| Local Modifications: | ||||
|  | ||||
| Remove files unnecessary to libvpx build. | ||||
|  | ||||
| rm -rf .gitignore .gn AUTHORS Android.mk BUILD.gn CMakeLists.txt DEPS LICENSE \ | ||||
|   LICENSE_THIRD_PARTY OWNERS PATENTS PRESUBMIT.py README.chromium README.md \ | ||||
|   all.gyp build_overrides/ chromium/ codereview.settings docs/ \ | ||||
|   download_vs_toolchain.py gyp_libyuv gyp_libyuv.py include/libyuv.h \ | ||||
|   include/libyuv/compare_row.h libyuv.gyp libyuv.gypi libyuv_nacl.gyp \ | ||||
|   libyuv_test.gyp linux.mk public.mk setup_links.py sync_chromium.py \ | ||||
|   third_party/ tools/ unit_test/ util/ winarm.mk | ||||
|   | ||||
							
								
								
									
										6
									
								
								third_party/libyuv/include/libyuv/convert.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								third_party/libyuv/include/libyuv/convert.h
									
									
									
									
										vendored
									
									
								
							| @@ -12,8 +12,10 @@ | ||||
| #define INCLUDE_LIBYUV_CONVERT_H_ | ||||
|  | ||||
| #include "libyuv/basic_types.h" | ||||
|  | ||||
| #include "libyuv/rotate.h"  // For enum RotationMode. | ||||
| // TODO(fbarchard): Remove the following headers includes. | ||||
| #include "libyuv/convert_from.h" | ||||
| #include "libyuv/planar_functions.h" | ||||
| #include "libyuv/rotate.h" | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| namespace libyuv { | ||||
|   | ||||
							
								
								
									
										88
									
								
								third_party/libyuv/include/libyuv/convert_argb.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										88
									
								
								third_party/libyuv/include/libyuv/convert_argb.h
									
									
									
									
										vendored
									
									
								
							| @@ -12,8 +12,10 @@ | ||||
| #define INCLUDE_LIBYUV_CONVERT_ARGB_H_ | ||||
|  | ||||
| #include "libyuv/basic_types.h" | ||||
|  | ||||
| #include "libyuv/rotate.h"  // For enum RotationMode. | ||||
| // TODO(fbarchard): Remove the following headers includes | ||||
| #include "libyuv/convert_from.h" | ||||
| #include "libyuv/planar_functions.h" | ||||
| #include "libyuv/rotate.h" | ||||
|  | ||||
| // TODO(fbarchard): This set of functions should exactly match convert.h | ||||
| // TODO(fbarchard): Add tests. Create random content of right size and convert | ||||
| @@ -58,22 +60,6 @@ int I444ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height); | ||||
|  | ||||
| // Convert J444 to ARGB. | ||||
| LIBYUV_API | ||||
| int J444ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height); | ||||
|  | ||||
| // Convert I444 to ABGR. | ||||
| LIBYUV_API | ||||
| int I444ToABGR(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_abgr, int dst_stride_abgr, | ||||
|                int width, int height); | ||||
|  | ||||
| // Convert I411 to ARGB. | ||||
| LIBYUV_API | ||||
| int I411ToARGB(const uint8* src_y, int src_stride_y, | ||||
| @@ -82,24 +68,6 @@ int I411ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height); | ||||
|  | ||||
| // Convert I420 with Alpha to preattenuated ARGB. | ||||
| LIBYUV_API | ||||
| int I420AlphaToARGB(const uint8* src_y, int src_stride_y, | ||||
|                     const uint8* src_u, int src_stride_u, | ||||
|                     const uint8* src_v, int src_stride_v, | ||||
|                     const uint8* src_a, int src_stride_a, | ||||
|                     uint8* dst_argb, int dst_stride_argb, | ||||
|                     int width, int height, int attenuate); | ||||
|  | ||||
| // Convert I420 with Alpha to preattenuated ABGR. | ||||
| LIBYUV_API | ||||
| int I420AlphaToABGR(const uint8* src_y, int src_stride_y, | ||||
|                     const uint8* src_u, int src_stride_u, | ||||
|                     const uint8* src_v, int src_stride_v, | ||||
|                     const uint8* src_a, int src_stride_a, | ||||
|                     uint8* dst_abgr, int dst_stride_abgr, | ||||
|                     int width, int height, int attenuate); | ||||
|  | ||||
| // Convert I400 (grey) to ARGB.  Reverse of ARGBToI400. | ||||
| LIBYUV_API | ||||
| int I400ToARGB(const uint8* src_y, int src_stride_y, | ||||
| @@ -163,54 +131,6 @@ int J422ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height); | ||||
|  | ||||
| // Convert J420 to ABGR. | ||||
| LIBYUV_API | ||||
| int J420ToABGR(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_abgr, int dst_stride_abgr, | ||||
|                int width, int height); | ||||
|  | ||||
| // Convert J422 to ABGR. | ||||
| LIBYUV_API | ||||
| int J422ToABGR(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_abgr, int dst_stride_abgr, | ||||
|                int width, int height); | ||||
|  | ||||
| // Convert H420 to ARGB. | ||||
| LIBYUV_API | ||||
| int H420ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height); | ||||
|  | ||||
| // Convert H422 to ARGB. | ||||
| LIBYUV_API | ||||
| int H422ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height); | ||||
|  | ||||
| // Convert H420 to ABGR. | ||||
| LIBYUV_API | ||||
| int H420ToABGR(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_abgr, int dst_stride_abgr, | ||||
|                int width, int height); | ||||
|  | ||||
| // Convert H422 to ABGR. | ||||
| LIBYUV_API | ||||
| int H422ToABGR(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_abgr, int dst_stride_abgr, | ||||
|                int width, int height); | ||||
|  | ||||
| // BGRA little endian (argb in memory) to ARGB. | ||||
| LIBYUV_API | ||||
| int BGRAToARGB(const uint8* src_frame, int src_stride_frame, | ||||
|   | ||||
| @@ -56,6 +56,8 @@ int I400Copy(const uint8* src_y, int src_stride_y, | ||||
|              uint8* dst_y, int dst_stride_y, | ||||
|              int width, int height); | ||||
|  | ||||
| // TODO(fbarchard): I420ToM420 | ||||
|  | ||||
| LIBYUV_API | ||||
| int I420ToNV12(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|   | ||||
							
								
								
									
										11
									
								
								third_party/libyuv/include/libyuv/cpu_id.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										11
									
								
								third_party/libyuv/include/libyuv/cpu_id.h
									
									
									
									
										vendored
									
									
								
							| @@ -18,8 +18,9 @@ namespace libyuv { | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| // TODO(fbarchard): Consider overlapping bits for different architectures. | ||||
| // Internal flag to indicate cpuid requires initialization. | ||||
| static const int kCpuInitialized = 0x1; | ||||
| #define kCpuInit 0x1 | ||||
|  | ||||
| // These flags are only valid on ARM processors. | ||||
| static const int kCpuHasARM = 0x2; | ||||
| @@ -36,12 +37,12 @@ static const int kCpuHasAVX = 0x200; | ||||
| static const int kCpuHasAVX2 = 0x400; | ||||
| static const int kCpuHasERMS = 0x800; | ||||
| static const int kCpuHasFMA3 = 0x1000; | ||||
| static const int kCpuHasAVX3 = 0x2000; | ||||
| // 0x2000, 0x4000, 0x8000 reserved for future X86 flags. | ||||
|  | ||||
| // These flags are only valid on MIPS processors. | ||||
| static const int kCpuHasMIPS = 0x10000; | ||||
| static const int kCpuHasDSPR2 = 0x20000; | ||||
| static const int kCpuHasMIPS_DSP = 0x20000; | ||||
| static const int kCpuHasMIPS_DSPR2 = 0x40000; | ||||
|  | ||||
| // Internal function used to auto-init. | ||||
| LIBYUV_API | ||||
| @@ -56,13 +57,13 @@ int ArmCpuCaps(const char* cpuinfo_name); | ||||
| // returns non-zero if instruction set is detected | ||||
| static __inline int TestCpuFlag(int test_flag) { | ||||
|   LIBYUV_API extern int cpu_info_; | ||||
|   return (!cpu_info_ ? InitCpuFlags() : cpu_info_) & test_flag; | ||||
|   return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag; | ||||
| } | ||||
|  | ||||
| // For testing, allow CPU flags to be disabled. | ||||
| // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3. | ||||
| // MaskCpuFlags(-1) to enable all cpu specific optimizations. | ||||
| // MaskCpuFlags(1) to disable all cpu specific optimizations. | ||||
| // MaskCpuFlags(0) to disable all cpu specific optimizations. | ||||
| LIBYUV_API | ||||
| void MaskCpuFlags(int enable_flags); | ||||
|  | ||||
|   | ||||
| @@ -145,6 +145,13 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, | ||||
|                  uint8* dst_rgb565, int dst_stride_rgb565, | ||||
|                  int width, int height); | ||||
|  | ||||
| // Convert NV21 to RGB565. | ||||
| LIBYUV_API | ||||
| int NV21ToRGB565(const uint8* src_y, int src_stride_y, | ||||
|                  const uint8* src_uv, int src_stride_uv, | ||||
|                  uint8* dst_rgb565, int dst_stride_rgb565, | ||||
|                  int width, int height); | ||||
|  | ||||
| // I422ToARGB is in convert_argb.h | ||||
| // Convert I422 to BGRA. | ||||
| LIBYUV_API | ||||
| @@ -170,14 +177,6 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y, | ||||
|                uint8* dst_rgba, int dst_stride_rgba, | ||||
|                int width, int height); | ||||
|  | ||||
| // Alias | ||||
| #define RGB24ToRAW RAWToRGB24 | ||||
|  | ||||
| LIBYUV_API | ||||
| int RAWToRGB24(const uint8* src_raw, int src_stride_raw, | ||||
|                uint8* dst_rgb24, int dst_stride_rgb24, | ||||
|                int width, int height); | ||||
|  | ||||
| // Draw a rectangle into I420. | ||||
| LIBYUV_API | ||||
| int I420Rect(uint8* dst_y, int dst_stride_y, | ||||
| @@ -282,19 +281,13 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb, | ||||
|              uint8* dst_argb, int dst_stride_argb, | ||||
|              int width, int height); | ||||
|  | ||||
| // Copy Alpha channel of ARGB to alpha of ARGB. | ||||
| // Copy ARGB to ARGB. | ||||
| LIBYUV_API | ||||
| int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, | ||||
|                   uint8* dst_argb, int dst_stride_argb, | ||||
|                   int width, int height); | ||||
|  | ||||
| // Extract the alpha channel from ARGB. | ||||
| LIBYUV_API | ||||
| int ARGBExtractAlpha(const uint8* src_argb, int src_stride_argb, | ||||
|                      uint8* dst_a, int dst_stride_a, | ||||
|                      int width, int height); | ||||
|  | ||||
| // Copy Y channel to Alpha of ARGB. | ||||
| // Copy ARGB to ARGB. | ||||
| LIBYUV_API | ||||
| int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, | ||||
|                      uint8* dst_argb, int dst_stride_argb, | ||||
| @@ -308,7 +301,6 @@ LIBYUV_API | ||||
| ARGBBlendRow GetARGBBlend(); | ||||
|  | ||||
| // Alpha Blend ARGB images and store to destination. | ||||
| // Source is pre-multiplied by alpha using ARGBAttenuate. | ||||
| // Alpha of destination is set to 255. | ||||
| LIBYUV_API | ||||
| int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, | ||||
| @@ -316,31 +308,6 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, | ||||
|               uint8* dst_argb, int dst_stride_argb, | ||||
|               int width, int height); | ||||
|  | ||||
| // Alpha Blend plane and store to destination. | ||||
| // Source is not pre-multiplied by alpha. | ||||
| LIBYUV_API | ||||
| int BlendPlane(const uint8* src_y0, int src_stride_y0, | ||||
|                const uint8* src_y1, int src_stride_y1, | ||||
|                const uint8* alpha, int alpha_stride, | ||||
|                uint8* dst_y, int dst_stride_y, | ||||
|                int width, int height); | ||||
|  | ||||
| // Alpha Blend YUV images and store to destination. | ||||
| // Source is not pre-multiplied by alpha. | ||||
| // Alpha is full width x height and subsampled to half size to apply to UV. | ||||
| LIBYUV_API | ||||
| int I420Blend(const uint8* src_y0, int src_stride_y0, | ||||
|               const uint8* src_u0, int src_stride_u0, | ||||
|               const uint8* src_v0, int src_stride_v0, | ||||
|               const uint8* src_y1, int src_stride_y1, | ||||
|               const uint8* src_u1, int src_stride_u1, | ||||
|               const uint8* src_v1, int src_stride_v1, | ||||
|               const uint8* alpha, int alpha_stride, | ||||
|               uint8* dst_y, int dst_stride_y, | ||||
|               uint8* dst_u, int dst_stride_u, | ||||
|               uint8* dst_v, int dst_stride_v, | ||||
|               int width, int height); | ||||
|  | ||||
| // Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255. | ||||
| LIBYUV_API | ||||
| int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, | ||||
| @@ -390,6 +357,12 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, | ||||
|                     uint8* dst_argb, int dst_stride_argb, | ||||
|                     int width, int height); | ||||
|  | ||||
| // Convert MJPG to ARGB. | ||||
| LIBYUV_API | ||||
| int MJPGToARGB(const uint8* sample, size_t sample_size, | ||||
|                uint8* argb, int argb_stride, | ||||
|                int w, int h, int dw, int dh); | ||||
|  | ||||
| // Internal function - do not call directly. | ||||
| // Computes table of cumulative sum for image where the value is the sum | ||||
| // of all values above and to the left of the entry. Used by ARGBBlur. | ||||
| @@ -416,49 +389,22 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb, | ||||
|               uint8* dst_argb, int dst_stride_argb, | ||||
|               int width, int height, uint32 value); | ||||
|  | ||||
| // Interpolate between two images using specified amount of interpolation | ||||
| // (0 to 255) and store to destination. | ||||
| // 'interpolation' is specified as 8 bit fraction where 0 means 100% src0 | ||||
| // and 255 means 1% src0 and 99% src1. | ||||
| LIBYUV_API | ||||
| int InterpolatePlane(const uint8* src0, int src_stride0, | ||||
|                      const uint8* src1, int src_stride1, | ||||
|                      uint8* dst, int dst_stride, | ||||
|                      int width, int height, int interpolation); | ||||
|  | ||||
| // Interpolate between two ARGB images using specified amount of interpolation | ||||
| // Internally calls InterpolatePlane with width * 4 (bpp). | ||||
| // (0 to 255) and store to destination. | ||||
| // 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0 | ||||
| // and 255 means 1% src_argb0 and 99% src_argb1. | ||||
| // Internally uses ARGBScale bilinear filtering. | ||||
| // Caveat: This function will write up to 16 bytes beyond the end of dst_argb. | ||||
| LIBYUV_API | ||||
| int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, | ||||
|                     const uint8* src_argb1, int src_stride_argb1, | ||||
|                     uint8* dst_argb, int dst_stride_argb, | ||||
|                     int width, int height, int interpolation); | ||||
|  | ||||
| // Interpolate between two YUV images using specified amount of interpolation | ||||
| // Internally calls InterpolatePlane on each plane where the U and V planes | ||||
| // are half width and half height. | ||||
| LIBYUV_API | ||||
| int I420Interpolate(const uint8* src0_y, int src0_stride_y, | ||||
|                     const uint8* src0_u, int src0_stride_u, | ||||
|                     const uint8* src0_v, int src0_stride_v, | ||||
|                     const uint8* src1_y, int src1_stride_y, | ||||
|                     const uint8* src1_u, int src1_stride_u, | ||||
|                     const uint8* src1_v, int src1_stride_v, | ||||
|                     uint8* dst_y, int dst_stride_y, | ||||
|                     uint8* dst_u, int dst_stride_u, | ||||
|                     uint8* dst_v, int dst_stride_v, | ||||
|                     int width, int height, int interpolation); | ||||
|  | ||||
| #if defined(__pnacl__) || defined(__CLR_VER) || \ | ||||
|     (defined(__i386__) && !defined(__SSE2__)) | ||||
| #define LIBYUV_DISABLE_X86 | ||||
| #endif | ||||
| // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 | ||||
| #if defined(__has_feature) | ||||
| #if __has_feature(memory_sanitizer) | ||||
| #define LIBYUV_DISABLE_X86 | ||||
| #endif | ||||
| #endif | ||||
| // The following are available on all x86 platforms: | ||||
| #if !defined(LIBYUV_DISABLE_X86) && \ | ||||
|     (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) | ||||
|   | ||||
							
								
								
									
										65
									
								
								third_party/libyuv/include/libyuv/rotate_row.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										65
									
								
								third_party/libyuv/include/libyuv/rotate_row.h
									
									
									
									
										vendored
									
									
								
							| @@ -22,24 +22,53 @@ extern "C" { | ||||
|     (defined(__i386__) && !defined(__SSE2__)) | ||||
| #define LIBYUV_DISABLE_X86 | ||||
| #endif | ||||
| // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 | ||||
| #if defined(__has_feature) | ||||
| #if __has_feature(memory_sanitizer) | ||||
| #define LIBYUV_DISABLE_X86 | ||||
|  | ||||
| // Visual C 2012 required for AVX2. | ||||
| #if defined(_M_IX86) && !defined(__clang__) && \ | ||||
|     defined(_MSC_VER) && _MSC_VER >= 1700 | ||||
| #define VISUALC_HAS_AVX2 1 | ||||
| #endif  // VisualStudio >= 2012 | ||||
|  | ||||
| // TODO(fbarchard): switch to standard form of inline; fails on clangcl. | ||||
| #if !defined(LIBYUV_DISABLE_X86) && \ | ||||
|     (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) | ||||
| #if defined(__APPLE__) && defined(__i386__) | ||||
| #define DECLARE_FUNCTION(name)                                                 \ | ||||
|     ".text                                     \n"                             \ | ||||
|     ".private_extern _" #name "                \n"                             \ | ||||
|     ".align 4,0x90                             \n"                             \ | ||||
| "_" #name ":                                   \n" | ||||
| #elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__) | ||||
| #define DECLARE_FUNCTION(name)                                                 \ | ||||
|     ".text                                     \n"                             \ | ||||
|     ".align 4,0x90                             \n"                             \ | ||||
| "_" #name ":                                   \n" | ||||
| #else | ||||
| #define DECLARE_FUNCTION(name)                                                 \ | ||||
|     ".text                                     \n"                             \ | ||||
|     ".align 4,0x90                             \n"                             \ | ||||
| #name ":                                       \n" | ||||
| #endif | ||||
| #endif | ||||
| // The following are available for Visual C and clangcl 32 bit: | ||||
| #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) | ||||
|  | ||||
| // The following are available for Visual C: | ||||
| #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ | ||||
|     defined(_MSC_VER) && !defined(__clang__) | ||||
| #define HAS_TRANSPOSEWX8_SSSE3 | ||||
| #define HAS_TRANSPOSEUVWX8_SSE2 | ||||
| #endif | ||||
|  | ||||
| // The following are available for GCC 32 or 64 bit but not NaCL for 64 bit: | ||||
| // The following are available for GCC but not NaCL: | ||||
| #if !defined(LIBYUV_DISABLE_X86) && \ | ||||
|     (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__))) | ||||
| #define HAS_TRANSPOSEWX8_SSSE3 | ||||
| #endif | ||||
|  | ||||
| // The following are available for 32 bit GCC: | ||||
| #if !defined(LIBYUV_DISABLE_X86) && defined(__i386__)  && !defined(__clang__) | ||||
| #define HAS_TRANSPOSEUVWX8_SSE2 | ||||
| #endif | ||||
|  | ||||
| // The following are available for 64 bit GCC but not NaCL: | ||||
| #if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \ | ||||
|     defined(__x86_64__) | ||||
| @@ -56,8 +85,8 @@ extern "C" { | ||||
| #if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \ | ||||
|     defined(__mips__) && \ | ||||
|     defined(__mips_dsp) && (__mips_dsp_rev >= 2) | ||||
| #define HAS_TRANSPOSEWX8_DSPR2 | ||||
| #define HAS_TRANSPOSEUVWX8_DSPR2 | ||||
| #define HAS_TRANSPOSEWX8_MIPS_DSPR2 | ||||
| #define HAS_TRANSPOSEUVWx8_MIPS_DSPR2 | ||||
| #endif  // defined(__mips__) | ||||
|  | ||||
| void TransposeWxH_C(const uint8* src, int src_stride, | ||||
| @@ -71,9 +100,7 @@ void TransposeWx8_SSSE3(const uint8* src, int src_stride, | ||||
|                         uint8* dst, int dst_stride, int width); | ||||
| void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride, | ||||
|                              uint8* dst, int dst_stride, int width); | ||||
| void TransposeWx8_DSPR2(const uint8* src, int src_stride, | ||||
|                         uint8* dst, int dst_stride, int width); | ||||
| void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride, | ||||
| void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride, | ||||
|                              uint8* dst, int dst_stride, int width); | ||||
|  | ||||
| void TransposeWx8_Any_NEON(const uint8* src, int src_stride, | ||||
| @@ -82,7 +109,7 @@ void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride, | ||||
|                             uint8* dst, int dst_stride, int width); | ||||
| void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride, | ||||
|                                  uint8* dst, int dst_stride, int width); | ||||
| void TransposeWx8_Any_DSPR2(const uint8* src, int src_stride, | ||||
| void TransposeWx8_Any_MIPS_DSPR2(const uint8* src, int src_stride, | ||||
|                                  uint8* dst, int dst_stride, int width); | ||||
|  | ||||
| void TransposeUVWxH_C(const uint8* src, int src_stride, | ||||
| @@ -99,17 +126,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride, | ||||
| void TransposeUVWx8_NEON(const uint8* src, int src_stride, | ||||
|                          uint8* dst_a, int dst_stride_a, | ||||
|                          uint8* dst_b, int dst_stride_b, int width); | ||||
| void TransposeUVWx8_DSPR2(const uint8* src, int src_stride, | ||||
|                           uint8* dst_a, int dst_stride_a, | ||||
|                           uint8* dst_b, int dst_stride_b, int width); | ||||
|  | ||||
| void TransposeUVWx8_Any_SSE2(const uint8* src, int src_stride, | ||||
|                              uint8* dst_a, int dst_stride_a, | ||||
|                              uint8* dst_b, int dst_stride_b, int width); | ||||
| void TransposeUVWx8_Any_NEON(const uint8* src, int src_stride, | ||||
|                              uint8* dst_a, int dst_stride_a, | ||||
|                              uint8* dst_b, int dst_stride_b, int width); | ||||
| void TransposeUVWx8_Any_DSPR2(const uint8* src, int src_stride, | ||||
| void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride, | ||||
|                                uint8* dst_a, int dst_stride_a, | ||||
|                                uint8* dst_b, int dst_stride_b, int width); | ||||
|  | ||||
|   | ||||
							
								
								
									
										1123
									
								
								third_party/libyuv/include/libyuv/row.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1123
									
								
								third_party/libyuv/include/libyuv/row.h
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -35,6 +35,7 @@ int ARGBScaleClip(const uint8* src_argb, int src_stride_argb, | ||||
|                   int clip_x, int clip_y, int clip_width, int clip_height, | ||||
|                   enum FilterMode filtering); | ||||
|  | ||||
| // TODO(fbarchard): Implement this. | ||||
| // Scale with YUV conversion to ARGB and clipping. | ||||
| LIBYUV_API | ||||
| int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y, | ||||
|   | ||||
							
								
								
									
										95
									
								
								third_party/libyuv/include/libyuv/scale_row.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										95
									
								
								third_party/libyuv/include/libyuv/scale_row.h
									
									
									
									
										vendored
									
									
								
							| @@ -23,26 +23,6 @@ extern "C" { | ||||
|     (defined(__i386__) && !defined(__SSE2__)) | ||||
| #define LIBYUV_DISABLE_X86 | ||||
| #endif | ||||
| // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 | ||||
| #if defined(__has_feature) | ||||
| #if __has_feature(memory_sanitizer) | ||||
| #define LIBYUV_DISABLE_X86 | ||||
| #endif | ||||
| #endif | ||||
|  | ||||
| // GCC >= 4.7.0 required for AVX2. | ||||
| #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) | ||||
| #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) | ||||
| #define GCC_HAS_AVX2 1 | ||||
| #endif  // GNUC >= 4.7 | ||||
| #endif  // __GNUC__ | ||||
|  | ||||
| // clang >= 3.4.0 required for AVX2. | ||||
| #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) | ||||
| #if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4)) | ||||
| #define CLANG_HAS_AVX2 1 | ||||
| #endif  // clang >= 3.4 | ||||
| #endif  // __clang__ | ||||
|  | ||||
| // Visual C 2012 required for AVX2. | ||||
| #if defined(_M_IX86) && !defined(__clang__) && \ | ||||
| @@ -62,23 +42,24 @@ extern "C" { | ||||
| #define HAS_SCALEARGBROWDOWNEVEN_SSE2 | ||||
| #define HAS_SCALECOLSUP2_SSE2 | ||||
| #define HAS_SCALEFILTERCOLS_SSSE3 | ||||
| #define HAS_SCALEROWDOWN2_SSSE3 | ||||
| #define HAS_SCALEROWDOWN2_SSE2 | ||||
| #define HAS_SCALEROWDOWN34_SSSE3 | ||||
| #define HAS_SCALEROWDOWN38_SSSE3 | ||||
| #define HAS_SCALEROWDOWN4_SSSE3 | ||||
| #define HAS_SCALEADDROW_SSE2 | ||||
| #define HAS_SCALEROWDOWN4_SSE2 | ||||
| #endif | ||||
|  | ||||
| // The following are available on all x86 platforms, but | ||||
| // require VS2012, clang 3.4 or gcc 4.7. | ||||
| // The code supports NaCL but requires a new compiler and validator. | ||||
| #if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \ | ||||
|     defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) | ||||
| // The following are available on VS2012: | ||||
| #if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2) | ||||
| #define HAS_SCALEADDROW_AVX2 | ||||
| #define HAS_SCALEROWDOWN2_AVX2 | ||||
| #define HAS_SCALEROWDOWN4_AVX2 | ||||
| #endif | ||||
|  | ||||
| // The following are available on Visual C: | ||||
| #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && !defined(__clang__) | ||||
| #define HAS_SCALEADDROW_SSE2 | ||||
| #endif | ||||
|  | ||||
| // The following are available on Neon platforms: | ||||
| #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ | ||||
|     (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) | ||||
| @@ -96,10 +77,10 @@ extern "C" { | ||||
| // The following are available on Mips platforms: | ||||
| #if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \ | ||||
|     defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) | ||||
| #define HAS_SCALEROWDOWN2_DSPR2 | ||||
| #define HAS_SCALEROWDOWN4_DSPR2 | ||||
| #define HAS_SCALEROWDOWN34_DSPR2 | ||||
| #define HAS_SCALEROWDOWN38_DSPR2 | ||||
| #define HAS_SCALEROWDOWN2_MIPS_DSPR2 | ||||
| #define HAS_SCALEROWDOWN4_MIPS_DSPR2 | ||||
| #define HAS_SCALEROWDOWN34_MIPS_DSPR2 | ||||
| #define HAS_SCALEROWDOWN38_MIPS_DSPR2 | ||||
| #endif | ||||
|  | ||||
| // Scale ARGB vertically with bilinear interpolation. | ||||
| @@ -152,8 +133,6 @@ void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride, | ||||
|                               uint16* dst, int dst_width); | ||||
| void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                         uint8* dst, int dst_width); | ||||
| void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                             uint8* dst, int dst_width); | ||||
| void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, | ||||
|                            uint16* dst, int dst_width); | ||||
| void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| @@ -235,11 +214,11 @@ void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, | ||||
|                              int dst_width, int x, int dx); | ||||
|  | ||||
| // Specialized scalers for x86. | ||||
| void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                         uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                               uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                            uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                         uint8* dst_ptr, int dst_width); | ||||
| @@ -247,9 +226,9 @@ void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                               uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                            uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                         uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                            uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                         uint8* dst_ptr, int dst_width); | ||||
| @@ -272,13 +251,11 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, | ||||
| void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, | ||||
|                                 ptrdiff_t src_stride, | ||||
|                                 uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                             uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2Linear_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                   uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                 uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown2Box_Odd_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                             uint8* dst_ptr, int dst_width); | ||||
| @@ -286,11 +263,9 @@ void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                   uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                            uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown2Box_Odd_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown4_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                             uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                              uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown4Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                             uint8* dst_ptr, int dst_width); | ||||
| @@ -443,8 +418,6 @@ void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                   uint8* dst, int dst_width); | ||||
| void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                uint8* dst, int dst_width); | ||||
| void ScaleRowDown2Box_Odd_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                uint8* dst, int dst_width); | ||||
| void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                             uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| @@ -474,25 +447,27 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, | ||||
| void ScaleFilterCols_Any_NEON(uint8* dst_ptr, const uint8* src_ptr, | ||||
|                               int dst_width, int x, int dx); | ||||
|  | ||||
| void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|  | ||||
| void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                               uint8* dst, int dst_width); | ||||
| void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                  uint8* dst, int dst_width); | ||||
| void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                               uint8* dst, int dst_width); | ||||
| void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                  uint8* dst, int dst_width); | ||||
| void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                uint8* dst, int dst_width); | ||||
| void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                      uint8* d, int dst_width); | ||||
| void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                      uint8* d, int dst_width); | ||||
| void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                uint8* dst, int dst_width); | ||||
| void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                      uint8* dst_ptr, int dst_width); | ||||
| void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr, | ||||
|                                      ptrdiff_t src_stride, | ||||
|                                      uint8* dst_ptr, int dst_width); | ||||
|  | ||||
| #ifdef __cplusplus | ||||
|   | ||||
							
								
								
									
										2
									
								
								third_party/libyuv/include/libyuv/version.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								third_party/libyuv/include/libyuv/version.h
									
									
									
									
										vendored
									
									
								
							| @@ -11,6 +11,6 @@ | ||||
| #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT | ||||
| #define INCLUDE_LIBYUV_VERSION_H_ | ||||
|  | ||||
| #define LIBYUV_VERSION 1601 | ||||
| #define LIBYUV_VERSION 1456 | ||||
|  | ||||
| #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT | ||||
|   | ||||
| @@ -90,8 +90,7 @@ enum FourCC { | ||||
|   FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'), | ||||
|   FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'),  // Linux version of I420. | ||||
|   FOURCC_J420 = FOURCC('J', '4', '2', '0'), | ||||
|   FOURCC_J400 = FOURCC('J', '4', '0', '0'),  // unofficial fourcc | ||||
|   FOURCC_H420 = FOURCC('H', '4', '2', '0'),  // unofficial fourcc | ||||
|   FOURCC_J400 = FOURCC('J', '4', '0', '0'), | ||||
|  | ||||
|   // 14 Auxiliary aliases.  CanonicalFourCC() maps these to canonical fourcc. | ||||
|   FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'),  // Alias for I420. | ||||
| @@ -151,7 +150,6 @@ enum FourCCBpp { | ||||
|   FOURCC_BPP_YU12 = 12, | ||||
|   FOURCC_BPP_J420 = 12, | ||||
|   FOURCC_BPP_J400 = 8, | ||||
|   FOURCC_BPP_H420 = 12, | ||||
|   FOURCC_BPP_MJPG = 0,  // 0 means unknown. | ||||
|   FOURCC_BPP_H264 = 0, | ||||
|   FOURCC_BPP_IYUV = 12, | ||||
|   | ||||
							
								
								
									
										39
									
								
								third_party/libyuv/source/compare.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										39
									
								
								third_party/libyuv/source/compare.cc
									
									
									
									
										vendored
									
									
								
							| @@ -17,7 +17,6 @@ | ||||
| #endif | ||||
|  | ||||
| #include "libyuv/basic_types.h" | ||||
| #include "libyuv/compare_row.h" | ||||
| #include "libyuv/cpu_id.h" | ||||
| #include "libyuv/row.h" | ||||
| #include "libyuv/video_common.h" | ||||
| @@ -27,13 +26,30 @@ namespace libyuv { | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| // hash seed of 5381 recommended. | ||||
| // Internal C version of HashDjb2 with int sized count for efficiency. | ||||
| uint32 HashDjb2_C(const uint8* src, int count, uint32 seed); | ||||
|  | ||||
| // This module is for Visual C x86 | ||||
| #if !defined(LIBYUV_DISABLE_X86) && \ | ||||
|     (defined(_M_IX86) || \ | ||||
|     (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))) | ||||
| #define HAS_HASHDJB2_SSE41 | ||||
| uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed); | ||||
|  | ||||
| #ifdef VISUALC_HAS_AVX2 | ||||
| #define HAS_HASHDJB2_AVX2 | ||||
| uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed); | ||||
| #endif | ||||
|  | ||||
| #endif  // HAS_HASHDJB2_SSE41 | ||||
|  | ||||
| // hash seed of 5381 recommended. | ||||
| LIBYUV_API | ||||
| uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { | ||||
|   const int kBlockSize = 1 << 15;  // 32768; | ||||
|   int remainder; | ||||
|   uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = | ||||
|       HashDjb2_C; | ||||
|   uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C; | ||||
| #if defined(HAS_HASHDJB2_SSE41) | ||||
|   if (TestCpuFlag(kCpuHasSSE41)) { | ||||
|     HashDjb2_SSE = HashDjb2_SSE41; | ||||
| @@ -111,6 +127,23 @@ uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) { | ||||
|   return fourcc; | ||||
| } | ||||
|  | ||||
| uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count); | ||||
| #if !defined(LIBYUV_DISABLE_NEON) && \ | ||||
|     (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) | ||||
| #define HAS_SUMSQUAREERROR_NEON | ||||
| uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count); | ||||
| #endif | ||||
| #if !defined(LIBYUV_DISABLE_X86) && \ | ||||
|     (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) | ||||
| #define HAS_SUMSQUAREERROR_SSE2 | ||||
| uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count); | ||||
| #endif | ||||
|  | ||||
| #ifdef VISUALC_HAS_AVX2 | ||||
| #define HAS_SUMSQUAREERROR_AVX2 | ||||
| uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count); | ||||
| #endif | ||||
|  | ||||
| // TODO(fbarchard): Refactor into row function. | ||||
| LIBYUV_API | ||||
| uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, | ||||
|   | ||||
							
								
								
									
										2
									
								
								third_party/libyuv/source/compare_common.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								third_party/libyuv/source/compare_common.cc
									
									
									
									
										vendored
									
									
								
							| @@ -10,8 +10,6 @@ | ||||
|  | ||||
| #include "libyuv/basic_types.h" | ||||
|  | ||||
| #include "libyuv/compare_row.h" | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| namespace libyuv { | ||||
| extern "C" { | ||||
|   | ||||
							
								
								
									
										19
									
								
								third_party/libyuv/source/compare_gcc.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										19
									
								
								third_party/libyuv/source/compare_gcc.cc
									
									
									
									
										vendored
									
									
								
							| @@ -9,8 +9,6 @@ | ||||
|  */ | ||||
|  | ||||
| #include "libyuv/basic_types.h" | ||||
|  | ||||
| #include "libyuv/compare_row.h" | ||||
| #include "libyuv/row.h" | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| @@ -18,13 +16,11 @@ namespace libyuv { | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| // This module is for GCC x86 and x64. | ||||
| #if !defined(LIBYUV_DISABLE_X86) && \ | ||||
|     (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) | ||||
| #if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) | ||||
|  | ||||
| uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { | ||||
|   uint32 sse; | ||||
|   asm volatile ( | ||||
|   asm volatile (  // NOLINT | ||||
|     "pxor      %%xmm0,%%xmm0                   \n" | ||||
|     "pxor      %%xmm5,%%xmm5                   \n" | ||||
|     LABELALIGN | ||||
| @@ -58,10 +54,15 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { | ||||
|     "+r"(count),      // %2 | ||||
|     "=g"(sse)         // %3 | ||||
|   :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | ||||
|   ); | ||||
|   );  // NOLINT | ||||
|   return sse; | ||||
| } | ||||
|  | ||||
| #endif  // defined(__x86_64__) || defined(__i386__) | ||||
|  | ||||
| #if !defined(LIBYUV_DISABLE_X86) && \ | ||||
|     (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) | ||||
| #define HAS_HASHDJB2_SSE41 | ||||
| static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16 | ||||
| static uvec32 kHashMul0 = { | ||||
|   0x0c3525e1,  // 33 ^ 15 | ||||
| @@ -90,7 +91,7 @@ static uvec32 kHashMul3 = { | ||||
|  | ||||
| uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { | ||||
|   uint32 hash; | ||||
|   asm volatile ( | ||||
|   asm volatile (  // NOLINT | ||||
|     "movd      %2,%%xmm0                       \n" | ||||
|     "pxor      %%xmm7,%%xmm7                   \n" | ||||
|     "movdqa    %4,%%xmm6                       \n" | ||||
| @@ -139,7 +140,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { | ||||
|     "m"(kHashMul3)    // %8 | ||||
|   : "memory", "cc" | ||||
|     , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" | ||||
|   ); | ||||
|   );  // NOLINT | ||||
|   return hash; | ||||
| } | ||||
| #endif  // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) | ||||
|   | ||||
							
								
								
									
										3
									
								
								third_party/libyuv/source/compare_neon.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								third_party/libyuv/source/compare_neon.cc
									
									
									
									
										vendored
									
									
								
							| @@ -9,8 +9,6 @@ | ||||
|  */ | ||||
|  | ||||
| #include "libyuv/basic_types.h" | ||||
|  | ||||
| #include "libyuv/compare_row.h" | ||||
| #include "libyuv/row.h" | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| @@ -29,6 +27,7 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { | ||||
|     "vmov.u8    q9, #0                         \n" | ||||
|     "vmov.u8    q11, #0                        \n" | ||||
|  | ||||
|     ".p2align  2                               \n" | ||||
|   "1:                                          \n" | ||||
|     MEMACCESS(0) | ||||
|     "vld1.8     {q0}, [%0]!                    \n" | ||||
|   | ||||
							
								
								
									
										3
									
								
								third_party/libyuv/source/compare_neon64.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								third_party/libyuv/source/compare_neon64.cc
									
									
									
									
										vendored
									
									
								
							| @@ -9,8 +9,6 @@ | ||||
|  */ | ||||
|  | ||||
| #include "libyuv/basic_types.h" | ||||
|  | ||||
| #include "libyuv/compare_row.h" | ||||
| #include "libyuv/row.h" | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| @@ -28,6 +26,7 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { | ||||
|     "eor        v17.16b, v17.16b, v17.16b      \n" | ||||
|     "eor        v19.16b, v19.16b, v19.16b      \n" | ||||
|  | ||||
|     ".p2align  2                               \n" | ||||
|   "1:                                          \n" | ||||
|     MEMACCESS(0) | ||||
|     "ld1        {v0.16b}, [%0], #16            \n" | ||||
|   | ||||
							
								
								
									
										87
									
								
								third_party/libyuv/source/compare_win.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										87
									
								
								third_party/libyuv/source/compare_win.cc
									
									
									
									
										vendored
									
									
								
							| @@ -9,8 +9,6 @@ | ||||
|  */ | ||||
|  | ||||
| #include "libyuv/basic_types.h" | ||||
|  | ||||
| #include "libyuv/compare_row.h" | ||||
| #include "libyuv/row.h" | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| @@ -18,8 +16,9 @@ namespace libyuv { | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| // This module is for 32 bit Visual C x86 and clangcl | ||||
| #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) | ||||
| // This module is for Visual C x86. | ||||
| #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ | ||||
|     defined(_MSC_VER) && !defined(__clang__) | ||||
|  | ||||
| __declspec(naked) | ||||
| uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { | ||||
| @@ -101,32 +100,41 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) { | ||||
| } | ||||
| #endif  // _MSC_VER >= 1700 | ||||
|  | ||||
| uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16 | ||||
| uvec32 kHashMul0 = { | ||||
| #define HAS_HASHDJB2_SSE41 | ||||
| static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16 | ||||
| static uvec32 kHashMul0 = { | ||||
|   0x0c3525e1,  // 33 ^ 15 | ||||
|   0xa3476dc1,  // 33 ^ 14 | ||||
|   0x3b4039a1,  // 33 ^ 13 | ||||
|   0x4f5f0981,  // 33 ^ 12 | ||||
| }; | ||||
| uvec32 kHashMul1 = { | ||||
| static uvec32 kHashMul1 = { | ||||
|   0x30f35d61,  // 33 ^ 11 | ||||
|   0x855cb541,  // 33 ^ 10 | ||||
|   0x040a9121,  // 33 ^ 9 | ||||
|   0x747c7101,  // 33 ^ 8 | ||||
| }; | ||||
| uvec32 kHashMul2 = { | ||||
| static uvec32 kHashMul2 = { | ||||
|   0xec41d4e1,  // 33 ^ 7 | ||||
|   0x4cfa3cc1,  // 33 ^ 6 | ||||
|   0x025528a1,  // 33 ^ 5 | ||||
|   0x00121881,  // 33 ^ 4 | ||||
| }; | ||||
| uvec32 kHashMul3 = { | ||||
| static uvec32 kHashMul3 = { | ||||
|   0x00008c61,  // 33 ^ 3 | ||||
|   0x00000441,  // 33 ^ 2 | ||||
|   0x00000021,  // 33 ^ 1 | ||||
|   0x00000001,  // 33 ^ 0 | ||||
| }; | ||||
|  | ||||
| // 27: 66 0F 38 40 C6     pmulld      xmm0,xmm6 | ||||
| // 44: 66 0F 38 40 DD     pmulld      xmm3,xmm5 | ||||
| // 59: 66 0F 38 40 E5     pmulld      xmm4,xmm5 | ||||
| // 72: 66 0F 38 40 D5     pmulld      xmm2,xmm5 | ||||
| // 83: 66 0F 38 40 CD     pmulld      xmm1,xmm5 | ||||
| #define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \ | ||||
|     _asm _emit 0x40 _asm _emit reg | ||||
|  | ||||
| __declspec(naked) | ||||
| uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { | ||||
|   __asm { | ||||
| @@ -135,30 +143,30 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { | ||||
|     movd       xmm0, [esp + 12]  // seed | ||||
|  | ||||
|     pxor       xmm7, xmm7        // constant 0 for unpck | ||||
|     movdqa     xmm6, xmmword ptr kHash16x33 | ||||
|     movdqa     xmm6, kHash16x33 | ||||
|  | ||||
|   wloop: | ||||
|     movdqu     xmm1, [eax]       // src[0-15] | ||||
|     lea        eax, [eax + 16] | ||||
|     pmulld     xmm0, xmm6        // hash *= 33 ^ 16 | ||||
|     movdqa     xmm5, xmmword ptr kHashMul0 | ||||
|     pmulld(0xc6)                 // pmulld      xmm0,xmm6  hash *= 33 ^ 16 | ||||
|     movdqa     xmm5, kHashMul0 | ||||
|     movdqa     xmm2, xmm1 | ||||
|     punpcklbw  xmm2, xmm7        // src[0-7] | ||||
|     movdqa     xmm3, xmm2 | ||||
|     punpcklwd  xmm3, xmm7        // src[0-3] | ||||
|     pmulld     xmm3, xmm5 | ||||
|     movdqa     xmm5, xmmword ptr kHashMul1 | ||||
|     pmulld(0xdd)                 // pmulld     xmm3, xmm5 | ||||
|     movdqa     xmm5, kHashMul1 | ||||
|     movdqa     xmm4, xmm2 | ||||
|     punpckhwd  xmm4, xmm7        // src[4-7] | ||||
|     pmulld     xmm4, xmm5 | ||||
|     movdqa     xmm5, xmmword ptr kHashMul2 | ||||
|     pmulld(0xe5)                 // pmulld     xmm4, xmm5 | ||||
|     movdqa     xmm5, kHashMul2 | ||||
|     punpckhbw  xmm1, xmm7        // src[8-15] | ||||
|     movdqa     xmm2, xmm1 | ||||
|     punpcklwd  xmm2, xmm7        // src[8-11] | ||||
|     pmulld     xmm2, xmm5 | ||||
|     movdqa     xmm5, xmmword ptr kHashMul3 | ||||
|     pmulld(0xd5)                 // pmulld     xmm2, xmm5 | ||||
|     movdqa     xmm5, kHashMul3 | ||||
|     punpckhwd  xmm1, xmm7        // src[12-15] | ||||
|     pmulld     xmm1, xmm5 | ||||
|     pmulld(0xcd)                 // pmulld     xmm1, xmm5 | ||||
|     paddd      xmm3, xmm4        // add 16 results | ||||
|     paddd      xmm1, xmm2 | ||||
|     paddd      xmm1, xmm3 | ||||
| @@ -183,37 +191,36 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { | ||||
|   __asm { | ||||
|     mov        eax, [esp + 4]    // src | ||||
|     mov        ecx, [esp + 8]    // count | ||||
|     vmovd      xmm0, [esp + 12]  // seed | ||||
|     movd       xmm0, [esp + 12]  // seed | ||||
|     movdqa     xmm6, kHash16x33 | ||||
|  | ||||
|   wloop: | ||||
|     vpmovzxbd  xmm3, [eax]  // src[0-3] | ||||
|     vpmulld    xmm0, xmm0, xmmword ptr kHash16x33  // hash *= 33 ^ 16 | ||||
|     vpmovzxbd  xmm4, [eax + 4]  // src[4-7] | ||||
|     vpmulld    xmm3, xmm3, xmmword ptr kHashMul0 | ||||
|     vpmovzxbd  xmm2, [eax + 8]  // src[8-11] | ||||
|     vpmulld    xmm4, xmm4, xmmword ptr kHashMul1 | ||||
|     vpmovzxbd  xmm1, [eax + 12]  // src[12-15] | ||||
|     vpmulld    xmm2, xmm2, xmmword ptr kHashMul2 | ||||
|     vpmovzxbd  xmm3, dword ptr [eax]  // src[0-3] | ||||
|     pmulld     xmm0, xmm6  // hash *= 33 ^ 16 | ||||
|     vpmovzxbd  xmm4, dword ptr [eax + 4]  // src[4-7] | ||||
|     pmulld     xmm3, kHashMul0 | ||||
|     vpmovzxbd  xmm2, dword ptr [eax + 8]  // src[8-11] | ||||
|     pmulld     xmm4, kHashMul1 | ||||
|     vpmovzxbd  xmm1, dword ptr [eax + 12]  // src[12-15] | ||||
|     pmulld     xmm2, kHashMul2 | ||||
|     lea        eax, [eax + 16] | ||||
|     vpmulld    xmm1, xmm1, xmmword ptr kHashMul3 | ||||
|     vpaddd     xmm3, xmm3, xmm4        // add 16 results | ||||
|     vpaddd     xmm1, xmm1, xmm2 | ||||
|     vpaddd     xmm1, xmm1, xmm3 | ||||
|     vpshufd    xmm2, xmm1, 0x0e  // upper 2 dwords | ||||
|     vpaddd     xmm1, xmm1,xmm2 | ||||
|     vpshufd    xmm2, xmm1, 0x01 | ||||
|     vpaddd     xmm1, xmm1, xmm2 | ||||
|     vpaddd     xmm0, xmm0, xmm1 | ||||
|     pmulld     xmm1, kHashMul3 | ||||
|     paddd      xmm3, xmm4        // add 16 results | ||||
|     paddd      xmm1, xmm2 | ||||
|     paddd      xmm1, xmm3 | ||||
|     pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords | ||||
|     paddd      xmm1, xmm2 | ||||
|     pshufd     xmm2, xmm1, 0x01 | ||||
|     paddd      xmm1, xmm2 | ||||
|     paddd      xmm0, xmm1 | ||||
|     sub        ecx, 16 | ||||
|     jg         wloop | ||||
|  | ||||
|     vmovd      eax, xmm0         // return hash | ||||
|     vzeroupper | ||||
|     movd       eax, xmm0         // return hash | ||||
|     ret | ||||
|   } | ||||
| } | ||||
| #endif  // _MSC_VER >= 1700 | ||||
|  | ||||
| #endif  // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) | ||||
|  | ||||
| #ifdef __cplusplus | ||||
|   | ||||
							
								
								
									
										58
									
								
								third_party/libyuv/source/convert.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										58
									
								
								third_party/libyuv/source/convert.cc
									
									
									
									
										vendored
									
									
								
							| @@ -245,8 +245,8 @@ static int X420ToI420(const uint8* src_y, | ||||
|   int y; | ||||
|   int halfwidth = (width + 1) >> 1; | ||||
|   int halfheight = (height + 1) >> 1; | ||||
|   void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, | ||||
|                      int width) = SplitUVRow_C; | ||||
|   void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) = | ||||
|       SplitUVRow_C; | ||||
|   if (!src_y || !src_uv || | ||||
|       !dst_y || !dst_u || !dst_v || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -303,14 +303,14 @@ static int X420ToI420(const uint8* src_y, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_SPLITUVROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && | ||||
| #if defined(HAS_SPLITUVROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && | ||||
|       IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) && | ||||
|       IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) && | ||||
|       IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) { | ||||
|     SplitUVRow = SplitUVRow_Any_DSPR2; | ||||
|     SplitUVRow = SplitUVRow_Any_MIPS_DSPR2; | ||||
|     if (IS_ALIGNED(halfwidth, 16)) { | ||||
|       SplitUVRow = SplitUVRow_DSPR2; | ||||
|       SplitUVRow = SplitUVRow_MIPS_DSPR2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| @@ -390,9 +390,9 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = YUY2ToUVRow_C; | ||||
|       uint8* dst_u, uint8* dst_v, int pix) = YUY2ToUVRow_C; | ||||
|   void (*YUY2ToYRow)(const uint8* src_yuy2, | ||||
|       uint8* dst_y, int width) = YUY2ToYRow_C; | ||||
|       uint8* dst_y, int pix) = YUY2ToYRow_C; | ||||
|   // Negative height means invert the image. | ||||
|   if (height < 0) { | ||||
|     height = -height; | ||||
| @@ -455,9 +455,9 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = UYVYToUVRow_C; | ||||
|       uint8* dst_u, uint8* dst_v, int pix) = UYVYToUVRow_C; | ||||
|   void (*UYVYToYRow)(const uint8* src_uyvy, | ||||
|       uint8* dst_y, int width) = UYVYToYRow_C; | ||||
|       uint8* dst_y, int pix) = UYVYToYRow_C; | ||||
|   // Negative height means invert the image. | ||||
|   if (height < 0) { | ||||
|     height = -height; | ||||
| @@ -521,7 +521,7 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb, | ||||
|   int y; | ||||
|   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYRow_C; | ||||
|   if (!src_argb || | ||||
|       !dst_y || !dst_u || !dst_v || | ||||
| @@ -597,7 +597,7 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra, | ||||
|   int y; | ||||
|   void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = BGRAToUVRow_C; | ||||
|   void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int width) = | ||||
|   void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix) = | ||||
|       BGRAToYRow_C; | ||||
|   if (!src_bgra || | ||||
|       !dst_y || !dst_u || !dst_v || | ||||
| @@ -663,7 +663,7 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr, | ||||
|   int y; | ||||
|   void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = ABGRToUVRow_C; | ||||
|   void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int width) = | ||||
|   void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix) = | ||||
|       ABGRToYRow_C; | ||||
|   if (!src_abgr || | ||||
|       !dst_y || !dst_u || !dst_v || | ||||
| @@ -729,7 +729,7 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba, | ||||
|   int y; | ||||
|   void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = RGBAToUVRow_C; | ||||
|   void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int width) = | ||||
|   void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix) = | ||||
|       RGBAToYRow_C; | ||||
|   if (!src_rgba || | ||||
|       !dst_y || !dst_u || !dst_v || | ||||
| @@ -796,14 +796,14 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, | ||||
| #if defined(HAS_RGB24TOYROW_NEON) | ||||
|   void (*RGB24ToUVRow)(const uint8* src_rgb24, int src_stride_rgb24, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = RGB24ToUVRow_C; | ||||
|   void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int width) = | ||||
|   void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int pix) = | ||||
|       RGB24ToYRow_C; | ||||
| #else | ||||
|   void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = | ||||
|   void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = | ||||
|       RGB24ToARGBRow_C; | ||||
|   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYRow_C; | ||||
| #endif | ||||
|   if (!src_rgb24 || !dst_y || !dst_u || !dst_v || | ||||
| @@ -910,14 +910,14 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw, | ||||
| #if defined(HAS_RAWTOYROW_NEON) | ||||
|   void (*RAWToUVRow)(const uint8* src_raw, int src_stride_raw, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = RAWToUVRow_C; | ||||
|   void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int width) = | ||||
|   void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int pix) = | ||||
|       RAWToYRow_C; | ||||
| #else | ||||
|   void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = | ||||
|   void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = | ||||
|       RAWToARGBRow_C; | ||||
|   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYRow_C; | ||||
| #endif | ||||
|   if (!src_raw || !dst_y || !dst_u || !dst_v || | ||||
| @@ -1024,14 +1024,14 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, | ||||
| #if defined(HAS_RGB565TOYROW_NEON) | ||||
|   void (*RGB565ToUVRow)(const uint8* src_rgb565, int src_stride_rgb565, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = RGB565ToUVRow_C; | ||||
|   void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int width) = | ||||
|   void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int pix) = | ||||
|       RGB565ToYRow_C; | ||||
| #else | ||||
|   void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = | ||||
|   void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = | ||||
|       RGB565ToARGBRow_C; | ||||
|   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYRow_C; | ||||
| #endif | ||||
|   if (!src_rgb565 || !dst_y || !dst_u || !dst_v || | ||||
| @@ -1146,14 +1146,14 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, | ||||
| #if defined(HAS_ARGB1555TOYROW_NEON) | ||||
|   void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = ARGB1555ToUVRow_C; | ||||
|   void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int width) = | ||||
|   void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int pix) = | ||||
|       ARGB1555ToYRow_C; | ||||
| #else | ||||
|   void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = | ||||
|   void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = | ||||
|       ARGB1555ToARGBRow_C; | ||||
|   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYRow_C; | ||||
| #endif | ||||
|   if (!src_argb1555 || !dst_y || !dst_u || !dst_v || | ||||
| @@ -1270,14 +1270,14 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, | ||||
| #if defined(HAS_ARGB4444TOYROW_NEON) | ||||
|   void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = ARGB4444ToUVRow_C; | ||||
|   void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int width) = | ||||
|   void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int pix) = | ||||
|       ARGB4444ToYRow_C; | ||||
| #else | ||||
|   void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = | ||||
|   void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = | ||||
|       ARGB4444ToARGBRow_C; | ||||
|   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYRow_C; | ||||
| #endif | ||||
|   if (!src_argb4444 || !dst_y || !dst_u || !dst_v || | ||||
|   | ||||
							
								
								
									
										767
									
								
								third_party/libyuv/source/convert_argb.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										767
									
								
								third_party/libyuv/source/convert_argb.cc
									
									
									
									
										vendored
									
									
								
							| @@ -14,7 +14,6 @@ | ||||
| #ifdef HAVE_JPEG | ||||
| #include "libyuv/mjpeg_decoder.h" | ||||
| #endif | ||||
| #include "libyuv/planar_functions.h"  // For CopyPlane and ARGBShuffle. | ||||
| #include "libyuv/rotate_argb.h" | ||||
| #include "libyuv/row.h" | ||||
| #include "libyuv/video_common.h" | ||||
| @@ -45,347 +44,18 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb, | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert I422 to ARGB with matrix | ||||
| static int I420ToARGBMatrix(const uint8* src_y, int src_stride_y, | ||||
| // Convert I444 to ARGB. | ||||
| LIBYUV_API | ||||
| int I444ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                             const struct YuvConstants* yuvconstants, | ||||
|                             int width, int height) { | ||||
|   int y; | ||||
|   void (*I422ToARGBRow)(const uint8* y_buf, | ||||
|                         const uint8* u_buf, | ||||
|                         const uint8* v_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         const struct YuvConstants* yuvconstants, | ||||
|                         int width) = I422ToARGBRow_C; | ||||
|   if (!src_y || !src_u || !src_v || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
|   // Negative height means invert the image. | ||||
|   if (height < 0) { | ||||
|     height = -height; | ||||
|     dst_argb = dst_argb + (height - 1) * dst_stride_argb; | ||||
|     dst_stride_argb = -dst_stride_argb; | ||||
|   } | ||||
| #if defined(HAS_I422TOARGBROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422ToARGBRow = I422ToARGBRow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOARGBROW_AVX2) | ||||
|   if (TestCpuFlag(kCpuHasAVX2)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_Any_AVX2; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       I422ToARGBRow = I422ToARGBRow_AVX2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOARGBROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422ToARGBRow = I422ToARGBRow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOARGBROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && | ||||
|       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && | ||||
|       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && | ||||
|       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && | ||||
|       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); | ||||
|     dst_argb += dst_stride_argb; | ||||
|     src_y += src_stride_y; | ||||
|     if (y & 1) { | ||||
|       src_u += src_stride_u; | ||||
|       src_v += src_stride_v; | ||||
|     } | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert I420 to ARGB. | ||||
| LIBYUV_API | ||||
| int I420ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height) { | ||||
|   return I420ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_u, src_stride_u, | ||||
|                           src_v, src_stride_v, | ||||
|                           dst_argb, dst_stride_argb, | ||||
|                           &kYuvI601Constants, | ||||
|                           width, height); | ||||
| } | ||||
|  | ||||
| // Convert I420 to ABGR. | ||||
| LIBYUV_API | ||||
| int I420ToABGR(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_abgr, int dst_stride_abgr, | ||||
|                int width, int height) { | ||||
|   return I420ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_v, src_stride_v,  // Swap U and V | ||||
|                           src_u, src_stride_u, | ||||
|                           dst_abgr, dst_stride_abgr, | ||||
|                           &kYvuI601Constants,  // Use Yvu matrix | ||||
|                           width, height); | ||||
| } | ||||
|  | ||||
| // Convert J420 to ARGB. | ||||
| LIBYUV_API | ||||
| int J420ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height) { | ||||
|   return I420ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_u, src_stride_u, | ||||
|                           src_v, src_stride_v, | ||||
|                           dst_argb, dst_stride_argb, | ||||
|                           &kYuvJPEGConstants, | ||||
|                           width, height); | ||||
| } | ||||
|  | ||||
| // Convert J420 to ABGR. | ||||
| LIBYUV_API | ||||
| int J420ToABGR(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_abgr, int dst_stride_abgr, | ||||
|                int width, int height) { | ||||
|   return I420ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_v, src_stride_v,  // Swap U and V | ||||
|                           src_u, src_stride_u, | ||||
|                           dst_abgr, dst_stride_abgr, | ||||
|                           &kYvuJPEGConstants,  // Use Yvu matrix | ||||
|                           width, height); | ||||
| } | ||||
|  | ||||
| // Convert H420 to ARGB. | ||||
| LIBYUV_API | ||||
| int H420ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height) { | ||||
|   return I420ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_u, src_stride_u, | ||||
|                           src_v, src_stride_v, | ||||
|                           dst_argb, dst_stride_argb, | ||||
|                           &kYuvH709Constants, | ||||
|                           width, height); | ||||
| } | ||||
|  | ||||
| // Convert H420 to ABGR. | ||||
| LIBYUV_API | ||||
| int H420ToABGR(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_abgr, int dst_stride_abgr, | ||||
|                int width, int height) { | ||||
|   return I420ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_v, src_stride_v,  // Swap U and V | ||||
|                           src_u, src_stride_u, | ||||
|                           dst_abgr, dst_stride_abgr, | ||||
|                           &kYvuH709Constants,  // Use Yvu matrix | ||||
|                           width, height); | ||||
| } | ||||
|  | ||||
| // Convert I422 to ARGB with matrix | ||||
| static int I422ToARGBMatrix(const uint8* src_y, int src_stride_y, | ||||
|                             const uint8* src_u, int src_stride_u, | ||||
|                             const uint8* src_v, int src_stride_v, | ||||
|                             uint8* dst_argb, int dst_stride_argb, | ||||
|                             const struct YuvConstants* yuvconstants, | ||||
|                             int width, int height) { | ||||
|   int y; | ||||
|   void (*I422ToARGBRow)(const uint8* y_buf, | ||||
|                         const uint8* u_buf, | ||||
|                         const uint8* v_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         const struct YuvConstants* yuvconstants, | ||||
|                         int width) = I422ToARGBRow_C; | ||||
|   if (!src_y || !src_u || !src_v || | ||||
|       !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
|   // Negative height means invert the image. | ||||
|   if (height < 0) { | ||||
|     height = -height; | ||||
|     dst_argb = dst_argb + (height - 1) * dst_stride_argb; | ||||
|     dst_stride_argb = -dst_stride_argb; | ||||
|   } | ||||
|   // Coalesce rows. | ||||
|   if (src_stride_y == width && | ||||
|       src_stride_u * 2 == width && | ||||
|       src_stride_v * 2 == width && | ||||
|       dst_stride_argb == width * 4) { | ||||
|     width *= height; | ||||
|     height = 1; | ||||
|     src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; | ||||
|   } | ||||
| #if defined(HAS_I422TOARGBROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422ToARGBRow = I422ToARGBRow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOARGBROW_AVX2) | ||||
|   if (TestCpuFlag(kCpuHasAVX2)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_Any_AVX2; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       I422ToARGBRow = I422ToARGBRow_AVX2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOARGBROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422ToARGBRow = I422ToARGBRow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOARGBROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && | ||||
|       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && | ||||
|       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && | ||||
|       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && | ||||
|       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); | ||||
|     dst_argb += dst_stride_argb; | ||||
|     src_y += src_stride_y; | ||||
|     src_u += src_stride_u; | ||||
|     src_v += src_stride_v; | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert I422 to ARGB. | ||||
| LIBYUV_API | ||||
| int I422ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height) { | ||||
|   return I422ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_u, src_stride_u, | ||||
|                           src_v, src_stride_v, | ||||
|                           dst_argb, dst_stride_argb, | ||||
|                           &kYuvI601Constants, | ||||
|                           width, height); | ||||
| } | ||||
|  | ||||
| // Convert I422 to ABGR. | ||||
| LIBYUV_API | ||||
| int I422ToABGR(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_abgr, int dst_stride_abgr, | ||||
|                int width, int height) { | ||||
|   return I422ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_v, src_stride_v,  // Swap U and V | ||||
|                           src_u, src_stride_u, | ||||
|                           dst_abgr, dst_stride_abgr, | ||||
|                           &kYvuI601Constants,  // Use Yvu matrix | ||||
|                           width, height); | ||||
| } | ||||
|  | ||||
| // Convert J422 to ARGB. | ||||
| LIBYUV_API | ||||
| int J422ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height) { | ||||
|   return I422ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_u, src_stride_u, | ||||
|                           src_v, src_stride_v, | ||||
|                           dst_argb, dst_stride_argb, | ||||
|                           &kYuvJPEGConstants, | ||||
|                           width, height); | ||||
| } | ||||
|  | ||||
| // Convert J422 to ABGR. | ||||
| LIBYUV_API | ||||
| int J422ToABGR(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_abgr, int dst_stride_abgr, | ||||
|                int width, int height) { | ||||
|   return I422ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_v, src_stride_v,  // Swap U and V | ||||
|                           src_u, src_stride_u, | ||||
|                           dst_abgr, dst_stride_abgr, | ||||
|                           &kYvuJPEGConstants,  // Use Yvu matrix | ||||
|                           width, height); | ||||
| } | ||||
|  | ||||
| // Convert H422 to ARGB. | ||||
| LIBYUV_API | ||||
| int H422ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height) { | ||||
|   return I422ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_u, src_stride_u, | ||||
|                           src_v, src_stride_v, | ||||
|                           dst_argb, dst_stride_argb, | ||||
|                           &kYuvH709Constants, | ||||
|                           width, height); | ||||
| } | ||||
|  | ||||
| // Convert H422 to ABGR. | ||||
| LIBYUV_API | ||||
| int H422ToABGR(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_abgr, int dst_stride_abgr, | ||||
|                int width, int height) { | ||||
|   return I422ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_v, src_stride_v,  // Swap U and V | ||||
|                           src_u, src_stride_u, | ||||
|                           dst_abgr, dst_stride_abgr, | ||||
|                           &kYvuH709Constants,  // Use Yvu matrix | ||||
|                           width, height); | ||||
| } | ||||
|  | ||||
| // Convert I444 to ARGB with matrix | ||||
| static int I444ToARGBMatrix(const uint8* src_y, int src_stride_y, | ||||
|                             const uint8* src_u, int src_stride_u, | ||||
|                             const uint8* src_v, int src_stride_v, | ||||
|                             uint8* dst_argb, int dst_stride_argb, | ||||
|                             const struct YuvConstants* yuvconstants, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*I444ToARGBRow)(const uint8* y_buf, | ||||
|                         const uint8* u_buf, | ||||
|                         const uint8* v_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         const struct YuvConstants* yuvconstants, | ||||
|                         int width) = I444ToARGBRow_C; | ||||
|   if (!src_y || !src_u || !src_v || | ||||
|       !dst_argb || | ||||
| @@ -433,7 +103,7 @@ static int I444ToARGBMatrix(const uint8* src_y, int src_stride_y, | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); | ||||
|     I444ToARGBRow(src_y, src_u, src_v, dst_argb, width); | ||||
|     dst_argb += dst_stride_argb; | ||||
|     src_y += src_stride_y; | ||||
|     src_u += src_stride_u; | ||||
| @@ -442,49 +112,81 @@ static int I444ToARGBMatrix(const uint8* src_y, int src_stride_y, | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert I444 to ARGB. | ||||
| // Convert I422 to ARGB. | ||||
| LIBYUV_API | ||||
| int I444ToARGB(const uint8* src_y, int src_stride_y, | ||||
| int I422ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height) { | ||||
|   return I444ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_u, src_stride_u, | ||||
|                           src_v, src_stride_v, | ||||
|                           dst_argb, dst_stride_argb, | ||||
|                           &kYuvI601Constants, | ||||
|                           width, height); | ||||
|   int y; | ||||
|   void (*I422ToARGBRow)(const uint8* y_buf, | ||||
|                         const uint8* u_buf, | ||||
|                         const uint8* v_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         int width) = I422ToARGBRow_C; | ||||
|   if (!src_y || !src_u || !src_v || | ||||
|       !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
|  | ||||
| // Convert I444 to ABGR. | ||||
| LIBYUV_API | ||||
| int I444ToABGR(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_abgr, int dst_stride_abgr, | ||||
|                int width, int height) { | ||||
|   return I444ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_v, src_stride_v,  // Swap U and V | ||||
|                           src_u, src_stride_u, | ||||
|                           dst_abgr, dst_stride_abgr, | ||||
|                           &kYvuI601Constants,  // Use Yvu matrix | ||||
|                           width, height); | ||||
|   // Negative height means invert the image. | ||||
|   if (height < 0) { | ||||
|     height = -height; | ||||
|     dst_argb = dst_argb + (height - 1) * dst_stride_argb; | ||||
|     dst_stride_argb = -dst_stride_argb; | ||||
|   } | ||||
|   // Coalesce rows. | ||||
|   if (src_stride_y == width && | ||||
|       src_stride_u * 2 == width && | ||||
|       src_stride_v * 2 == width && | ||||
|       dst_stride_argb == width * 4) { | ||||
|     width *= height; | ||||
|     height = 1; | ||||
|     src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; | ||||
|   } | ||||
| #if defined(HAS_I422TOARGBROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422ToARGBRow = I422ToARGBRow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOARGBROW_AVX2) | ||||
|   if (TestCpuFlag(kCpuHasAVX2)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_Any_AVX2; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       I422ToARGBRow = I422ToARGBRow_AVX2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOARGBROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422ToARGBRow = I422ToARGBRow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOARGBROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && | ||||
|       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && | ||||
|       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && | ||||
|       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && | ||||
|       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
| // Convert J444 to ARGB. | ||||
| LIBYUV_API | ||||
| int J444ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height) { | ||||
|   return I444ToARGBMatrix(src_y, src_stride_y, | ||||
|                           src_u, src_stride_u, | ||||
|                           src_v, src_stride_v, | ||||
|                           dst_argb, dst_stride_argb, | ||||
|                           &kYuvJPEGConstants, | ||||
|                           width, height); | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I422ToARGBRow(src_y, src_u, src_v, dst_argb, width); | ||||
|     dst_argb += dst_stride_argb; | ||||
|     src_y += src_stride_y; | ||||
|     src_u += src_stride_u; | ||||
|     src_v += src_stride_v; | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert I411 to ARGB. | ||||
| @@ -499,7 +201,6 @@ int I411ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                         const uint8* u_buf, | ||||
|                         const uint8* v_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         const struct YuvConstants* yuvconstants, | ||||
|                         int width) = I411ToARGBRow_C; | ||||
|   if (!src_y || !src_u || !src_v || | ||||
|       !dst_argb || | ||||
| @@ -547,7 +248,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y, | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I411ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvI601Constants, width); | ||||
|     I411ToARGBRow(src_y, src_u, src_v, dst_argb, width); | ||||
|     dst_argb += dst_stride_argb; | ||||
|     src_y += src_stride_y; | ||||
|     src_u += src_stride_u; | ||||
| @@ -556,143 +257,6 @@ int I411ToARGB(const uint8* src_y, int src_stride_y, | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert I420 with Alpha to preattenuated ARGB. | ||||
| static int I420AlphaToARGBMatrix(const uint8* src_y, int src_stride_y, | ||||
|                                  const uint8* src_u, int src_stride_u, | ||||
|                                  const uint8* src_v, int src_stride_v, | ||||
|                                  const uint8* src_a, int src_stride_a, | ||||
|                                  uint8* dst_argb, int dst_stride_argb, | ||||
|                                  const struct YuvConstants* yuvconstants, | ||||
|                                  int width, int height, int attenuate) { | ||||
|   int y; | ||||
|   void (*I422AlphaToARGBRow)(const uint8* y_buf, | ||||
|                              const uint8* u_buf, | ||||
|                              const uint8* v_buf, | ||||
|                              const uint8* a_buf, | ||||
|                              uint8* dst_argb, | ||||
|                              const struct YuvConstants* yuvconstants, | ||||
|                              int width) = I422AlphaToARGBRow_C; | ||||
|   void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, | ||||
|                            int width) = ARGBAttenuateRow_C; | ||||
|   if (!src_y || !src_u || !src_v || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
|   // Negative height means invert the image. | ||||
|   if (height < 0) { | ||||
|     height = -height; | ||||
|     dst_argb = dst_argb + (height - 1) * dst_stride_argb; | ||||
|     dst_stride_argb = -dst_stride_argb; | ||||
|   } | ||||
| #if defined(HAS_I422ALPHATOARGBROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     I422AlphaToARGBRow = I422AlphaToARGBRow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422AlphaToARGBRow = I422AlphaToARGBRow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422ALPHATOARGBROW_AVX2) | ||||
|   if (TestCpuFlag(kCpuHasAVX2)) { | ||||
|     I422AlphaToARGBRow = I422AlphaToARGBRow_Any_AVX2; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       I422AlphaToARGBRow = I422AlphaToARGBRow_AVX2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422ALPHATOARGBROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     I422AlphaToARGBRow = I422AlphaToARGBRow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422AlphaToARGBRow = I422AlphaToARGBRow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422ALPHATOARGBROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && | ||||
|       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && | ||||
|       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && | ||||
|       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && | ||||
|       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { | ||||
|     I422AlphaToARGBRow = I422AlphaToARGBRow_DSPR2; | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBATTENUATEROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 4)) { | ||||
|       ARGBAttenuateRow = ARGBAttenuateRow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBATTENUATEROW_AVX2) | ||||
|   if (TestCpuFlag(kCpuHasAVX2)) { | ||||
|     ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       ARGBAttenuateRow = ARGBAttenuateRow_AVX2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBATTENUATEROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       ARGBAttenuateRow = ARGBAttenuateRow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants, | ||||
|                        width); | ||||
|     if (attenuate) { | ||||
|       ARGBAttenuateRow(dst_argb, dst_argb, width); | ||||
|     } | ||||
|     dst_argb += dst_stride_argb; | ||||
|     src_a += src_stride_a; | ||||
|     src_y += src_stride_y; | ||||
|     if (y & 1) { | ||||
|       src_u += src_stride_u; | ||||
|       src_v += src_stride_v; | ||||
|     } | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert I420 with Alpha to ARGB. | ||||
| LIBYUV_API | ||||
| int I420AlphaToARGB(const uint8* src_y, int src_stride_y, | ||||
|                     const uint8* src_u, int src_stride_u, | ||||
|                     const uint8* src_v, int src_stride_v, | ||||
|                     const uint8* src_a, int src_stride_a, | ||||
|                     uint8* dst_argb, int dst_stride_argb, | ||||
|                     int width, int height, int attenuate) { | ||||
|   return I420AlphaToARGBMatrix(src_y, src_stride_y, | ||||
|                                src_u, src_stride_u, | ||||
|                                src_v, src_stride_v, | ||||
|                                src_a, src_stride_a, | ||||
|                                dst_argb, dst_stride_argb, | ||||
|                                &kYuvI601Constants, | ||||
|                                width, height, attenuate); | ||||
| } | ||||
|  | ||||
| // Convert I420 with Alpha to ABGR. | ||||
| LIBYUV_API | ||||
| int I420AlphaToABGR(const uint8* src_y, int src_stride_y, | ||||
|                     const uint8* src_u, int src_stride_u, | ||||
|                     const uint8* src_v, int src_stride_v, | ||||
|                     const uint8* src_a, int src_stride_a, | ||||
|                     uint8* dst_abgr, int dst_stride_abgr, | ||||
|                     int width, int height, int attenuate) { | ||||
|   return I420AlphaToARGBMatrix(src_y, src_stride_y, | ||||
|                                src_v, src_stride_v,  // Swap U and V | ||||
|                                src_u, src_stride_u, | ||||
|                                src_a, src_stride_a, | ||||
|                                dst_abgr, dst_stride_abgr, | ||||
|                                &kYvuI601Constants,  // Use Yvu matrix | ||||
|                                width, height, attenuate); | ||||
| } | ||||
|  | ||||
| // Convert I400 to ARGB. | ||||
| LIBYUV_API | ||||
| int I400ToARGB(const uint8* src_y, int src_stride_y, | ||||
| @@ -758,7 +322,7 @@ int J400ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int width) = | ||||
|   void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) = | ||||
|       J400ToARGBRow_C; | ||||
|   if (!src_y || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -885,7 +449,7 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24, | ||||
|                 uint8* dst_argb, int dst_stride_argb, | ||||
|                 int width, int height) { | ||||
|   int y; | ||||
|   void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = | ||||
|   void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = | ||||
|       RGB24ToARGBRow_C; | ||||
|   if (!src_rgb24 || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -935,7 +499,7 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw, | ||||
|               uint8* dst_argb, int dst_stride_argb, | ||||
|               int width, int height) { | ||||
|   int y; | ||||
|   void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = | ||||
|   void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = | ||||
|       RAWToARGBRow_C; | ||||
|   if (!src_raw || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -985,7 +549,7 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565, | ||||
|                  uint8* dst_argb, int dst_stride_argb, | ||||
|                  int width, int height) { | ||||
|   int y; | ||||
|   void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int width) = | ||||
|   void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) = | ||||
|       RGB565ToARGBRow_C; | ||||
|   if (!src_rgb565 || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -1044,7 +608,7 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555, | ||||
|                    int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb, | ||||
|       int width) = ARGB1555ToARGBRow_C; | ||||
|       int pix) = ARGB1555ToARGBRow_C; | ||||
|   if (!src_argb1555 || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
|     return -1; | ||||
| @@ -1102,7 +666,7 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444, | ||||
|                    int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb, | ||||
|       int width) = ARGB4444ToARGBRow_C; | ||||
|       int pix) = ARGB4444ToARGBRow_C; | ||||
|   if (!src_argb4444 || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
|     return -1; | ||||
| @@ -1163,7 +727,6 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y, | ||||
|   void (*NV12ToARGBRow)(const uint8* y_buf, | ||||
|                         const uint8* uv_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         const struct YuvConstants* yuvconstants, | ||||
|                         int width) = NV12ToARGBRow_C; | ||||
|   if (!src_y || !src_uv || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -1201,7 +764,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y, | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width); | ||||
|     NV12ToARGBRow(src_y, src_uv, dst_argb, width); | ||||
|     dst_argb += dst_stride_argb; | ||||
|     src_y += src_stride_y; | ||||
|     if (y & 1) { | ||||
| @@ -1221,7 +784,6 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y, | ||||
|   void (*NV21ToARGBRow)(const uint8* y_buf, | ||||
|                         const uint8* uv_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         const struct YuvConstants* yuvconstants, | ||||
|                         int width) = NV21ToARGBRow_C; | ||||
|   if (!src_y || !src_uv || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -1259,7 +821,7 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y, | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     NV21ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width); | ||||
|     NV21ToARGBRow(src_y, src_uv, dst_argb, width); | ||||
|     dst_argb += dst_stride_argb; | ||||
|     src_y += src_stride_y; | ||||
|     if (y & 1) { | ||||
| @@ -1278,7 +840,6 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420, | ||||
|   void (*NV12ToARGBRow)(const uint8* y_buf, | ||||
|                         const uint8* uv_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         const struct YuvConstants* yuvconstants, | ||||
|                         int width) = NV12ToARGBRow_C; | ||||
|   if (!src_m420 || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -1316,16 +877,14 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420, | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height - 1; y += 2) { | ||||
|     NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, | ||||
|                   &kYuvI601Constants, width); | ||||
|     NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width); | ||||
|     NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2, | ||||
|                   dst_argb + dst_stride_argb, &kYuvI601Constants, width); | ||||
|                   dst_argb + dst_stride_argb, width); | ||||
|     dst_argb += dst_stride_argb * 2; | ||||
|     src_m420 += src_stride_m420 * 3; | ||||
|   } | ||||
|   if (height & 1) { | ||||
|     NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, | ||||
|                   &kYuvI601Constants, width); | ||||
|     NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width); | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
| @@ -1336,10 +895,7 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*YUY2ToARGBRow)(const uint8* src_yuy2, | ||||
|                         uint8* dst_argb, | ||||
|                         const struct YuvConstants* yuvconstants, | ||||
|                         int width) = | ||||
|   void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) = | ||||
|       YUY2ToARGBRow_C; | ||||
|   if (!src_yuy2 || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -1383,7 +939,7 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, | ||||
|   } | ||||
| #endif | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvI601Constants, width); | ||||
|     YUY2ToARGBRow(src_yuy2, dst_argb, width); | ||||
|     src_yuy2 += src_stride_yuy2; | ||||
|     dst_argb += dst_stride_argb; | ||||
|   } | ||||
| @@ -1396,10 +952,7 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*UYVYToARGBRow)(const uint8* src_uyvy, | ||||
|                         uint8* dst_argb, | ||||
|                         const struct YuvConstants* yuvconstants, | ||||
|                         int width) = | ||||
|   void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) = | ||||
|       UYVYToARGBRow_C; | ||||
|   if (!src_uyvy || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -1443,13 +996,159 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, | ||||
|   } | ||||
| #endif | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     UYVYToARGBRow(src_uyvy, dst_argb, &kYuvI601Constants, width); | ||||
|     UYVYToARGBRow(src_uyvy, dst_argb, width); | ||||
|     src_uyvy += src_stride_uyvy; | ||||
|     dst_argb += dst_stride_argb; | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert J420 to ARGB. | ||||
| LIBYUV_API | ||||
| int J420ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*J422ToARGBRow)(const uint8* y_buf, | ||||
|                         const uint8* u_buf, | ||||
|                         const uint8* v_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         int width) = J422ToARGBRow_C; | ||||
|   if (!src_y || !src_u || !src_v || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
|   // Negative height means invert the image. | ||||
|   if (height < 0) { | ||||
|     height = -height; | ||||
|     dst_argb = dst_argb + (height - 1) * dst_stride_argb; | ||||
|     dst_stride_argb = -dst_stride_argb; | ||||
|   } | ||||
| #if defined(HAS_J422TOARGBROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     J422ToARGBRow = J422ToARGBRow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       J422ToARGBRow = J422ToARGBRow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_J422TOARGBROW_AVX2) | ||||
|   if (TestCpuFlag(kCpuHasAVX2)) { | ||||
|     J422ToARGBRow = J422ToARGBRow_Any_AVX2; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       J422ToARGBRow = J422ToARGBRow_AVX2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_J422TOARGBROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     J422ToARGBRow = J422ToARGBRow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       J422ToARGBRow = J422ToARGBRow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_J422TOARGBROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && | ||||
|       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && | ||||
|       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && | ||||
|       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && | ||||
|       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { | ||||
|     J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     J422ToARGBRow(src_y, src_u, src_v, dst_argb, width); | ||||
|     dst_argb += dst_stride_argb; | ||||
|     src_y += src_stride_y; | ||||
|     if (y & 1) { | ||||
|       src_u += src_stride_u; | ||||
|       src_v += src_stride_v; | ||||
|     } | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert J422 to ARGB. | ||||
| LIBYUV_API | ||||
| int J422ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*J422ToARGBRow)(const uint8* y_buf, | ||||
|                         const uint8* u_buf, | ||||
|                         const uint8* v_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         int width) = J422ToARGBRow_C; | ||||
|   if (!src_y || !src_u || !src_v || | ||||
|       !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
|   // Negative height means invert the image. | ||||
|   if (height < 0) { | ||||
|     height = -height; | ||||
|     dst_argb = dst_argb + (height - 1) * dst_stride_argb; | ||||
|     dst_stride_argb = -dst_stride_argb; | ||||
|   } | ||||
|   // Coalesce rows. | ||||
|   if (src_stride_y == width && | ||||
|       src_stride_u * 2 == width && | ||||
|       src_stride_v * 2 == width && | ||||
|       dst_stride_argb == width * 4) { | ||||
|     width *= height; | ||||
|     height = 1; | ||||
|     src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; | ||||
|   } | ||||
| #if defined(HAS_J422TOARGBROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     J422ToARGBRow = J422ToARGBRow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       J422ToARGBRow = J422ToARGBRow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_J422TOARGBROW_AVX2) | ||||
|   if (TestCpuFlag(kCpuHasAVX2)) { | ||||
|     J422ToARGBRow = J422ToARGBRow_Any_AVX2; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       J422ToARGBRow = J422ToARGBRow_AVX2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_J422TOARGBROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     J422ToARGBRow = J422ToARGBRow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       J422ToARGBRow = J422ToARGBRow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_J422TOARGBROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && | ||||
|       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && | ||||
|       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && | ||||
|       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && | ||||
|       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { | ||||
|     J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     J422ToARGBRow(src_y, src_u, src_v, dst_argb, width); | ||||
|     dst_argb += dst_stride_argb; | ||||
|     src_y += src_stride_y; | ||||
|     src_u += src_stride_u; | ||||
|     src_v += src_stride_v; | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| }  // extern "C" | ||||
| }  // namespace libyuv | ||||
|   | ||||
							
								
								
									
										350
									
								
								third_party/libyuv/source/convert_from.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										350
									
								
								third_party/libyuv/source/convert_from.cc
									
									
									
									
										vendored
									
									
								
							| @@ -445,24 +445,221 @@ int I420ToNV21(const uint8* src_y, int src_stride_y, | ||||
|   return I420ToNV12(src_y, src_stride_y, | ||||
|                     src_v, src_stride_v, | ||||
|                     src_u, src_stride_u, | ||||
|                     dst_y, dst_stride_y, | ||||
|                     dst_y, src_stride_y, | ||||
|                     dst_vu, dst_stride_vu, | ||||
|                     width, height); | ||||
| } | ||||
|  | ||||
| // Convert I422 to RGBA with matrix | ||||
| static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y, | ||||
| // Convert I420 to ARGB. | ||||
| LIBYUV_API | ||||
| int I420ToARGB(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_argb, int dst_stride_argb, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*I422ToARGBRow)(const uint8* y_buf, | ||||
|                         const uint8* u_buf, | ||||
|                         const uint8* v_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         int width) = I422ToARGBRow_C; | ||||
|   if (!src_y || !src_u || !src_v || !dst_argb || | ||||
|       width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
|   // Negative height means invert the image. | ||||
|   if (height < 0) { | ||||
|     height = -height; | ||||
|     dst_argb = dst_argb + (height - 1) * dst_stride_argb; | ||||
|     dst_stride_argb = -dst_stride_argb; | ||||
|   } | ||||
| #if defined(HAS_I422TOARGBROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422ToARGBRow = I422ToARGBRow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOARGBROW_AVX2) | ||||
|   if (TestCpuFlag(kCpuHasAVX2)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_Any_AVX2; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       I422ToARGBRow = I422ToARGBRow_AVX2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOARGBROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422ToARGBRow = I422ToARGBRow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOARGBROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && | ||||
|       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && | ||||
|       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && | ||||
|       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && | ||||
|       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I422ToARGBRow(src_y, src_u, src_v, dst_argb, width); | ||||
|     dst_argb += dst_stride_argb; | ||||
|     src_y += src_stride_y; | ||||
|     if (y & 1) { | ||||
|       src_u += src_stride_u; | ||||
|       src_v += src_stride_v; | ||||
|     } | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert I420 to BGRA. | ||||
| LIBYUV_API | ||||
| int I420ToBGRA(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_bgra, int dst_stride_bgra, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*I422ToBGRARow)(const uint8* y_buf, | ||||
|                         const uint8* u_buf, | ||||
|                         const uint8* v_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         int width) = I422ToBGRARow_C; | ||||
|   if (!src_y || !src_u || !src_v || !dst_bgra || | ||||
|       width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
|   // Negative height means invert the image. | ||||
|   if (height < 0) { | ||||
|     height = -height; | ||||
|     dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra; | ||||
|     dst_stride_bgra = -dst_stride_bgra; | ||||
|   } | ||||
| #if defined(HAS_I422TOBGRAROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     I422ToBGRARow = I422ToBGRARow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422ToBGRARow = I422ToBGRARow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOBGRAROW_AVX2) | ||||
|   if (TestCpuFlag(kCpuHasAVX2)) { | ||||
|     I422ToBGRARow = I422ToBGRARow_Any_AVX2; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       I422ToBGRARow = I422ToBGRARow_AVX2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOBGRAROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     I422ToBGRARow = I422ToBGRARow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422ToBGRARow = I422ToBGRARow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOBGRAROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && | ||||
|       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && | ||||
|       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && | ||||
|       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && | ||||
|       IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) { | ||||
|     I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width); | ||||
|     dst_bgra += dst_stride_bgra; | ||||
|     src_y += src_stride_y; | ||||
|     if (y & 1) { | ||||
|       src_u += src_stride_u; | ||||
|       src_v += src_stride_v; | ||||
|     } | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert I420 to ABGR. | ||||
| LIBYUV_API | ||||
| int I420ToABGR(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_abgr, int dst_stride_abgr, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*I422ToABGRRow)(const uint8* y_buf, | ||||
|                         const uint8* u_buf, | ||||
|                         const uint8* v_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         int width) = I422ToABGRRow_C; | ||||
|   if (!src_y || !src_u || !src_v || !dst_abgr || | ||||
|       width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
|   // Negative height means invert the image. | ||||
|   if (height < 0) { | ||||
|     height = -height; | ||||
|     dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr; | ||||
|     dst_stride_abgr = -dst_stride_abgr; | ||||
|   } | ||||
| #if defined(HAS_I422TOABGRROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     I422ToABGRRow = I422ToABGRRow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422ToABGRRow = I422ToABGRRow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOABGRROW_AVX2) | ||||
|   if (TestCpuFlag(kCpuHasAVX2)) { | ||||
|     I422ToABGRRow = I422ToABGRRow_Any_AVX2; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       I422ToABGRRow = I422ToABGRRow_AVX2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOABGRROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     I422ToABGRRow = I422ToABGRRow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422ToABGRRow = I422ToABGRRow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width); | ||||
|     dst_abgr += dst_stride_abgr; | ||||
|     src_y += src_stride_y; | ||||
|     if (y & 1) { | ||||
|       src_u += src_stride_u; | ||||
|       src_v += src_stride_v; | ||||
|     } | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert I420 to RGBA. | ||||
| LIBYUV_API | ||||
| int I420ToRGBA(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_rgba, int dst_stride_rgba, | ||||
|                             const struct YuvConstants* yuvconstants, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*I422ToRGBARow)(const uint8* y_buf, | ||||
|                         const uint8* u_buf, | ||||
|                         const uint8* v_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         const struct YuvConstants* yuvconstants, | ||||
|                         int width) = I422ToRGBARow_C; | ||||
|   if (!src_y || !src_u || !src_v || !dst_rgba || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -498,18 +695,9 @@ static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TORGBAROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && | ||||
|       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && | ||||
|       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && | ||||
|       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && | ||||
|       IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) { | ||||
|     I422ToRGBARow = I422ToRGBARow_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width); | ||||
|     I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width); | ||||
|     dst_rgba += dst_stride_rgba; | ||||
|     src_y += src_stride_y; | ||||
|     if (y & 1) { | ||||
| @@ -520,49 +708,18 @@ static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y, | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert I420 to RGBA. | ||||
| // Convert I420 to RGB24. | ||||
| LIBYUV_API | ||||
| int I420ToRGBA(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_rgba, int dst_stride_rgba, | ||||
|                int width, int height) { | ||||
|   return I420ToRGBAMatrix(src_y, src_stride_y, | ||||
|                           src_u, src_stride_u, | ||||
|                           src_v, src_stride_v, | ||||
|                           dst_rgba, dst_stride_rgba, | ||||
|                           &kYuvI601Constants, | ||||
|                           width, height); | ||||
| } | ||||
|  | ||||
| // Convert I420 to BGRA. | ||||
| LIBYUV_API | ||||
| int I420ToBGRA(const uint8* src_y, int src_stride_y, | ||||
|                const uint8* src_u, int src_stride_u, | ||||
|                const uint8* src_v, int src_stride_v, | ||||
|                uint8* dst_bgra, int dst_stride_bgra, | ||||
|                int width, int height) { | ||||
|   return I420ToRGBAMatrix(src_y, src_stride_y, | ||||
|                           src_v, src_stride_v,  // Swap U and V | ||||
|                           src_u, src_stride_u, | ||||
|                           dst_bgra, dst_stride_bgra, | ||||
|                           &kYvuI601Constants,  // Use Yvu matrix | ||||
|                           width, height); | ||||
| } | ||||
|  | ||||
| // Convert I420 to RGB24 with matrix | ||||
| static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y, | ||||
| int I420ToRGB24(const uint8* src_y, int src_stride_y, | ||||
|                 const uint8* src_u, int src_stride_u, | ||||
|                 const uint8* src_v, int src_stride_v, | ||||
|                 uint8* dst_rgb24, int dst_stride_rgb24, | ||||
|                              const struct YuvConstants* yuvconstants, | ||||
|                 int width, int height) { | ||||
|   int y; | ||||
|   void (*I422ToRGB24Row)(const uint8* y_buf, | ||||
|                          const uint8* u_buf, | ||||
|                          const uint8* v_buf, | ||||
|                          uint8* rgb_buf, | ||||
|                          const struct YuvConstants* yuvconstants, | ||||
|                          int width) = I422ToRGB24Row_C; | ||||
|   if (!src_y || !src_u || !src_v || !dst_rgb24 || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -600,7 +757,7 @@ static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y, | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width); | ||||
|     I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width); | ||||
|     dst_rgb24 += dst_stride_rgb24; | ||||
|     src_y += src_stride_y; | ||||
|     if (y & 1) { | ||||
| @@ -611,21 +768,6 @@ static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y, | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert I420 to RGB24. | ||||
| LIBYUV_API | ||||
| int I420ToRGB24(const uint8* src_y, int src_stride_y, | ||||
|                 const uint8* src_u, int src_stride_u, | ||||
|                 const uint8* src_v, int src_stride_v, | ||||
|                 uint8* dst_rgb24, int dst_stride_rgb24, | ||||
|                 int width, int height) { | ||||
|   return I420ToRGB24Matrix(src_y, src_stride_y, | ||||
|                            src_u, src_stride_u, | ||||
|                            src_v, src_stride_v, | ||||
|                            dst_rgb24, dst_stride_rgb24, | ||||
|                            &kYuvI601Constants, | ||||
|                            width, height); | ||||
| } | ||||
|  | ||||
| // Convert I420 to RAW. | ||||
| LIBYUV_API | ||||
| int I420ToRAW(const uint8* src_y, int src_stride_y, | ||||
| @@ -633,12 +775,57 @@ int I420ToRAW(const uint8* src_y, int src_stride_y, | ||||
|                 const uint8* src_v, int src_stride_v, | ||||
|                 uint8* dst_raw, int dst_stride_raw, | ||||
|                 int width, int height) { | ||||
|   return I420ToRGB24Matrix(src_y, src_stride_y, | ||||
|                            src_v, src_stride_v,  // Swap U and V | ||||
|                            src_u, src_stride_u, | ||||
|                            dst_raw, dst_stride_raw, | ||||
|                            &kYvuI601Constants,  // Use Yvu matrix | ||||
|                            width, height); | ||||
|   int y; | ||||
|   void (*I422ToRAWRow)(const uint8* y_buf, | ||||
|                        const uint8* u_buf, | ||||
|                        const uint8* v_buf, | ||||
|                        uint8* rgb_buf, | ||||
|                        int width) = I422ToRAWRow_C; | ||||
|   if (!src_y || !src_u || !src_v || !dst_raw || | ||||
|       width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
|   // Negative height means invert the image. | ||||
|   if (height < 0) { | ||||
|     height = -height; | ||||
|     dst_raw = dst_raw + (height - 1) * dst_stride_raw; | ||||
|     dst_stride_raw = -dst_stride_raw; | ||||
|   } | ||||
| #if defined(HAS_I422TORAWROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     I422ToRAWRow = I422ToRAWRow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422ToRAWRow = I422ToRAWRow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TORAWROW_AVX2) | ||||
|   if (TestCpuFlag(kCpuHasAVX2)) { | ||||
|     I422ToRAWRow = I422ToRAWRow_Any_AVX2; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       I422ToRAWRow = I422ToRAWRow_AVX2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TORAWROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     I422ToRAWRow = I422ToRAWRow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|       I422ToRAWRow = I422ToRAWRow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I422ToRAWRow(src_y, src_u, src_v, dst_raw, width); | ||||
|     dst_raw += dst_stride_raw; | ||||
|     src_y += src_stride_y; | ||||
|     if (y & 1) { | ||||
|       src_u += src_stride_u; | ||||
|       src_v += src_stride_v; | ||||
|     } | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert I420 to ARGB1555. | ||||
| @@ -653,7 +840,6 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y, | ||||
|                             const uint8* u_buf, | ||||
|                             const uint8* v_buf, | ||||
|                             uint8* rgb_buf, | ||||
|                             const struct YuvConstants* yuvconstants, | ||||
|                             int width) = I422ToARGB1555Row_C; | ||||
|   if (!src_y || !src_u || !src_v || !dst_argb1555 || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -691,8 +877,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y, | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvI601Constants, | ||||
|                       width); | ||||
|     I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width); | ||||
|     dst_argb1555 += dst_stride_argb1555; | ||||
|     src_y += src_stride_y; | ||||
|     if (y & 1) { | ||||
| @@ -716,7 +901,6 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y, | ||||
|                             const uint8* u_buf, | ||||
|                             const uint8* v_buf, | ||||
|                             uint8* rgb_buf, | ||||
|                             const struct YuvConstants* yuvconstants, | ||||
|                             int width) = I422ToARGB4444Row_C; | ||||
|   if (!src_y || !src_u || !src_v || !dst_argb4444 || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -754,8 +938,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y, | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvI601Constants, | ||||
|                       width); | ||||
|     I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width); | ||||
|     dst_argb4444 += dst_stride_argb4444; | ||||
|     src_y += src_stride_y; | ||||
|     if (y & 1) { | ||||
| @@ -778,7 +961,6 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, | ||||
|                           const uint8* u_buf, | ||||
|                           const uint8* v_buf, | ||||
|                           uint8* rgb_buf, | ||||
|                           const struct YuvConstants* yuvconstants, | ||||
|                           int width) = I422ToRGB565Row_C; | ||||
|   if (!src_y || !src_u || !src_v || !dst_rgb565 || | ||||
|       width <= 0 || height == 0) { | ||||
| @@ -816,7 +998,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width); | ||||
|     I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width); | ||||
|     dst_rgb565 += dst_stride_rgb565; | ||||
|     src_y += src_stride_y; | ||||
|     if (y & 1) { | ||||
| @@ -847,10 +1029,9 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y, | ||||
|                         const uint8* u_buf, | ||||
|                         const uint8* v_buf, | ||||
|                         uint8* rgb_buf, | ||||
|                         const struct YuvConstants* yuvconstants, | ||||
|                         int width) = I422ToARGBRow_C; | ||||
|   void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb, | ||||
|       const uint32 dither4, int width) = ARGBToRGB565DitherRow_C; | ||||
|       const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C; | ||||
|   if (!src_y || !src_u || !src_v || !dst_rgb565 || | ||||
|       width <= 0 || height == 0) { | ||||
|     return -1; | ||||
| @@ -888,12 +1069,12 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOARGBROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && | ||||
| #if defined(HAS_I422TOARGBROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && | ||||
|       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && | ||||
|       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && | ||||
|       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_DSPR2; | ||||
|     I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) | ||||
| @@ -924,7 +1105,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y, | ||||
|     // Allocate a row of argb. | ||||
|     align_buffer_64(row_argb, width * 4); | ||||
|     for (y = 0; y < height; ++y) { | ||||
|       I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width); | ||||
|       I422ToARGBRow(src_y, src_u, src_v, row_argb, width); | ||||
|       ARGBToRGB565DitherRow(row_argb, dst_rgb565, | ||||
|                             *(uint32*)(dither4x4 + ((y & 3) << 2)), width); | ||||
|       dst_rgb565 += dst_stride_rgb565; | ||||
| @@ -1077,6 +1258,7 @@ int ConvertFromI420(const uint8* y, int y_stride, | ||||
|     // Triplanar formats | ||||
|     // TODO(fbarchard): halfstride instead of halfwidth | ||||
|     case FOURCC_I420: | ||||
|     case FOURCC_YU12: | ||||
|     case FOURCC_YV12: { | ||||
|       int halfwidth = (width + 1) / 2; | ||||
|       int halfheight = (height + 1) / 2; | ||||
|   | ||||
							
								
								
									
										211
									
								
								third_party/libyuv/source/convert_from_argb.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										211
									
								
								third_party/libyuv/source/convert_from_argb.cc
									
									
									
									
										vendored
									
									
								
							| @@ -28,10 +28,10 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb, | ||||
|                uint8* dst_v, int dst_stride_v, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYRow_C; | ||||
|   void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | ||||
|       int width) = ARGBToUV444Row_C; | ||||
|       int pix) = ARGBToUV444Row_C; | ||||
|   if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
| @@ -109,16 +109,13 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, | ||||
|                uint8* dst_v, int dst_stride_v, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = | ||||
|   void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | ||||
|       int pix) = ARGBToUV422Row_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYRow_C; | ||||
|   if (!src_argb || | ||||
|       !dst_y || !dst_u || !dst_v || | ||||
|       width <= 0 || height == 0) { | ||||
|   if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
|   // Negative height means invert the image. | ||||
|   if (height < 0) { | ||||
|     height = -height; | ||||
|     src_argb = src_argb + (height - 1) * src_stride_argb; | ||||
| @@ -133,22 +130,34 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, | ||||
|     height = 1; | ||||
|     src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; | ||||
|   } | ||||
| #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) | ||||
| #if defined(HAS_ARGBTOUV422ROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUV422Row = ARGBToUV422Row_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOUV422ROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     ARGBToUV422Row = ARGBToUV422Row_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUV422Row = ARGBToUV422Row_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOYROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     ARGBToUVRow = ARGBToUVRow_Any_SSSE3; | ||||
|     ARGBToYRow = ARGBToYRow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUVRow = ARGBToUVRow_SSSE3; | ||||
|       ARGBToYRow = ARGBToYRow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) | ||||
| #if defined(HAS_ARGBTOYROW_AVX2) | ||||
|   if (TestCpuFlag(kCpuHasAVX2)) { | ||||
|     ARGBToUVRow = ARGBToUVRow_Any_AVX2; | ||||
|     ARGBToYRow = ARGBToYRow_Any_AVX2; | ||||
|     if (IS_ALIGNED(width, 32)) { | ||||
|       ARGBToUVRow = ARGBToUVRow_AVX2; | ||||
|       ARGBToYRow = ARGBToYRow_AVX2; | ||||
|     } | ||||
|   } | ||||
| @@ -161,17 +170,9 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOUVROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     ARGBToUVRow = ARGBToUVRow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUVRow = ARGBToUVRow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); | ||||
|     ARGBToUV422Row(src_argb, dst_u, dst_v, width); | ||||
|     ARGBToYRow(src_argb, dst_y, width); | ||||
|     src_argb += src_stride_argb; | ||||
|     dst_y += dst_stride_y; | ||||
| @@ -190,8 +191,8 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | ||||
|       int width) = ARGBToUV411Row_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = | ||||
|       int pix) = ARGBToUV411Row_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYRow_C; | ||||
|   if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { | ||||
|     return -1; | ||||
| @@ -263,7 +264,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, | ||||
|   int halfwidth = (width + 1) >> 1; | ||||
|   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, | ||||
|                       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYRow_C; | ||||
|   void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, | ||||
|                       int width) = MergeUVRow_C; | ||||
| @@ -372,7 +373,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, | ||||
|   int halfwidth = (width + 1) >> 1; | ||||
|   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, | ||||
|                       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYRow_C; | ||||
|   void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, | ||||
|                       int width) = MergeUVRow_C; | ||||
| @@ -477,9 +478,9 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, | ||||
|                uint8* dst_yuy2, int dst_stride_yuy2, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = | ||||
|   void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | ||||
|       int pix) = ARGBToUV422Row_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYRow_C; | ||||
|   void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, | ||||
|       const uint8* src_v, uint8* dst_yuy2, int width) = I422ToYUY2Row_C; | ||||
| @@ -501,22 +502,34 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, | ||||
|     height = 1; | ||||
|     src_stride_argb = dst_stride_yuy2 = 0; | ||||
|   } | ||||
| #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) | ||||
| #if defined(HAS_ARGBTOUV422ROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUV422Row = ARGBToUV422Row_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOUV422ROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     ARGBToUV422Row = ARGBToUV422Row_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUV422Row = ARGBToUV422Row_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOYROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     ARGBToUVRow = ARGBToUVRow_Any_SSSE3; | ||||
|     ARGBToYRow = ARGBToYRow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUVRow = ARGBToUVRow_SSSE3; | ||||
|       ARGBToYRow = ARGBToYRow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) | ||||
| #if defined(HAS_ARGBTOYROW_AVX2) | ||||
|   if (TestCpuFlag(kCpuHasAVX2)) { | ||||
|     ARGBToUVRow = ARGBToUVRow_Any_AVX2; | ||||
|     ARGBToYRow = ARGBToYRow_Any_AVX2; | ||||
|     if (IS_ALIGNED(width, 32)) { | ||||
|       ARGBToUVRow = ARGBToUVRow_AVX2; | ||||
|       ARGBToYRow = ARGBToYRow_AVX2; | ||||
|     } | ||||
|   } | ||||
| @@ -529,14 +542,7 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOUVROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     ARGBToUVRow = ARGBToUVRow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUVRow = ARGBToUVRow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
|  | ||||
| #if defined(HAS_I422TOYUY2ROW_SSE2) | ||||
|   if (TestCpuFlag(kCpuHasSSE2)) { | ||||
|     I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; | ||||
| @@ -561,7 +567,7 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, | ||||
|     uint8* row_v = row_u + ((width + 63) & ~63) / 2; | ||||
|  | ||||
|     for (y = 0; y < height; ++y) { | ||||
|       ARGBToUVRow(src_argb, 0, row_u, row_v, width); | ||||
|       ARGBToUV422Row(src_argb, row_u, row_v, width); | ||||
|       ARGBToYRow(src_argb, row_y, width); | ||||
|       I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width); | ||||
|       src_argb += src_stride_argb; | ||||
| @@ -579,9 +585,9 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, | ||||
|                uint8* dst_uyvy, int dst_stride_uyvy, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb, | ||||
|       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = | ||||
|   void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | ||||
|       int pix) = ARGBToUV422Row_C; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYRow_C; | ||||
|   void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, | ||||
|       const uint8* src_v, uint8* dst_uyvy, int width) = I422ToUYVYRow_C; | ||||
| @@ -603,22 +609,34 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, | ||||
|     height = 1; | ||||
|     src_stride_argb = dst_stride_uyvy = 0; | ||||
|   } | ||||
| #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) | ||||
| #if defined(HAS_ARGBTOUV422ROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUV422Row = ARGBToUV422Row_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOUV422ROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     ARGBToUV422Row = ARGBToUV422Row_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUV422Row = ARGBToUV422Row_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOYROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     ARGBToUVRow = ARGBToUVRow_Any_SSSE3; | ||||
|     ARGBToYRow = ARGBToYRow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUVRow = ARGBToUVRow_SSSE3; | ||||
|       ARGBToYRow = ARGBToYRow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) | ||||
| #if defined(HAS_ARGBTOYROW_AVX2) | ||||
|   if (TestCpuFlag(kCpuHasAVX2)) { | ||||
|     ARGBToUVRow = ARGBToUVRow_Any_AVX2; | ||||
|     ARGBToYRow = ARGBToYRow_Any_AVX2; | ||||
|     if (IS_ALIGNED(width, 32)) { | ||||
|       ARGBToUVRow = ARGBToUVRow_AVX2; | ||||
|       ARGBToYRow = ARGBToYRow_AVX2; | ||||
|     } | ||||
|   } | ||||
| @@ -631,14 +649,7 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOUVROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     ARGBToUVRow = ARGBToUVRow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUVRow = ARGBToUVRow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
|  | ||||
| #if defined(HAS_I422TOUYVYROW_SSE2) | ||||
|   if (TestCpuFlag(kCpuHasSSE2)) { | ||||
|     I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; | ||||
| @@ -663,7 +674,7 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, | ||||
|     uint8* row_v = row_u + ((width + 63) & ~63) / 2; | ||||
|  | ||||
|     for (y = 0; y < height; ++y) { | ||||
|       ARGBToUVRow(src_argb, 0, row_u, row_v, width); | ||||
|       ARGBToUV422Row(src_argb, row_u, row_v, width); | ||||
|       ARGBToYRow(src_argb, row_y, width); | ||||
|       I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width); | ||||
|       src_argb += src_stride_argb; | ||||
| @@ -681,7 +692,7 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb, | ||||
|                uint8* dst_y, int dst_stride_y, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = | ||||
|   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYRow_C; | ||||
|   if (!src_argb || !dst_y || width <= 0 || height == 0) { | ||||
|     return -1; | ||||
| @@ -753,7 +764,7 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, | ||||
|                 uint8* dst_rgb24, int dst_stride_rgb24, | ||||
|                 int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int width) = | ||||
|   void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = | ||||
|       ARGBToRGB24Row_C; | ||||
|   if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) { | ||||
|     return -1; | ||||
| @@ -801,7 +812,7 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb, | ||||
|               uint8* dst_raw, int dst_stride_raw, | ||||
|               int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int width) = | ||||
|   void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) = | ||||
|       ARGBToRAWRow_C; | ||||
|   if (!src_argb || !dst_raw || width <= 0 || height == 0) { | ||||
|     return -1; | ||||
| @@ -858,7 +869,7 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, | ||||
|                        const uint8* dither4x4, int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb, | ||||
|       const uint32 dither4, int width) = ARGBToRGB565DitherRow_C; | ||||
|       const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C; | ||||
|   if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
| @@ -910,7 +921,7 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, | ||||
|                  uint8* dst_rgb565, int dst_stride_rgb565, | ||||
|                  int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int width) = | ||||
|   void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = | ||||
|       ARGBToRGB565Row_C; | ||||
|   if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { | ||||
|     return -1; | ||||
| @@ -966,7 +977,7 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, | ||||
|                    uint8* dst_argb1555, int dst_stride_argb1555, | ||||
|                    int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int width) = | ||||
|   void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = | ||||
|       ARGBToARGB1555Row_C; | ||||
|   if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) { | ||||
|     return -1; | ||||
| @@ -1022,7 +1033,7 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, | ||||
|                    uint8* dst_argb4444, int dst_stride_argb4444, | ||||
|                    int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int width) = | ||||
|   void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = | ||||
|       ARGBToARGB4444Row_C; | ||||
|   if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) { | ||||
|     return -1; | ||||
| @@ -1082,7 +1093,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb, | ||||
|   int y; | ||||
|   void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb, | ||||
|                        uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C; | ||||
|   void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) = | ||||
|   void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) = | ||||
|       ARGBToYJRow_C; | ||||
|   if (!src_argb || | ||||
|       !dst_yj || !dst_u || !dst_v || | ||||
| @@ -1146,24 +1157,21 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb, | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Convert ARGB to J422. (JPeg full range I422). | ||||
| // ARGB little endian (bgra in memory) to J422 | ||||
| LIBYUV_API | ||||
| int ARGBToJ422(const uint8* src_argb, int src_stride_argb, | ||||
|                uint8* dst_yj, int dst_stride_yj, | ||||
|                uint8* dst_y, int dst_stride_y, | ||||
|                uint8* dst_u, int dst_stride_u, | ||||
|                uint8* dst_v, int dst_stride_v, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb, | ||||
|                        uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C; | ||||
|   void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) = | ||||
|   void (*ARGBToUVJ422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | ||||
|       int pix) = ARGBToUVJ422Row_C; | ||||
|   void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_y, int pix) = | ||||
|       ARGBToYJRow_C; | ||||
|   if (!src_argb || | ||||
|       !dst_yj || !dst_u || !dst_v || | ||||
|       width <= 0 || height == 0) { | ||||
|   if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
|   // Negative height means invert the image. | ||||
|   if (height < 0) { | ||||
|     height = -height; | ||||
|     src_argb = src_argb + (height - 1) * src_stride_argb; | ||||
| @@ -1171,19 +1179,34 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb, | ||||
|   } | ||||
|   // Coalesce rows. | ||||
|   if (src_stride_argb == width * 4 && | ||||
|       dst_stride_yj == width && | ||||
|       dst_stride_y == width && | ||||
|       dst_stride_u * 2 == width && | ||||
|       dst_stride_v * 2 == width) { | ||||
|     width *= height; | ||||
|     height = 1; | ||||
|     src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0; | ||||
|     src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; | ||||
|   } | ||||
| #if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3) | ||||
| #if defined(HAS_ARGBTOUVJ422ROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     ARGBToUVJ422Row = ARGBToUVJ422Row_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUVJ422Row = ARGBToUVJ422Row_SSSE3; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOUVJ422ROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     ARGBToUVJ422Row = ARGBToUVJ422Row_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUVJ422Row = ARGBToUVJ422Row_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
|  | ||||
| #if defined(HAS_ARGBTOYJROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; | ||||
|     ARGBToYJRow = ARGBToYJRow_Any_SSSE3; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUVJRow = ARGBToUVJRow_SSSE3; | ||||
|       ARGBToYJRow = ARGBToYJRow_SSSE3; | ||||
|     } | ||||
|   } | ||||
| @@ -1204,20 +1227,12 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOUVJROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON)) { | ||||
|     ARGBToUVJRow = ARGBToUVJRow_Any_NEON; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       ARGBToUVJRow = ARGBToUVJRow_NEON; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   for (y = 0; y < height; ++y) { | ||||
|     ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width); | ||||
|     ARGBToYJRow(src_argb, dst_yj, width); | ||||
|     ARGBToUVJ422Row(src_argb, dst_u, dst_v, width); | ||||
|     ARGBToYJRow(src_argb, dst_y, width); | ||||
|     src_argb += src_stride_argb; | ||||
|     dst_yj += dst_stride_yj; | ||||
|     dst_y += dst_stride_y; | ||||
|     dst_u += dst_stride_u; | ||||
|     dst_v += dst_stride_v; | ||||
|   } | ||||
| @@ -1230,7 +1245,7 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb, | ||||
|                uint8* dst_yj, int dst_stride_yj, | ||||
|                int width, int height) { | ||||
|   int y; | ||||
|   void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) = | ||||
|   void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) = | ||||
|       ARGBToYJRow_C; | ||||
|   if (!src_argb || !dst_yj || width <= 0 || height == 0) { | ||||
|     return -1; | ||||
|   | ||||
							
								
								
									
										1
									
								
								third_party/libyuv/source/convert_jpeg.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								third_party/libyuv/source/convert_jpeg.cc
									
									
									
									
										vendored
									
									
								
							| @@ -9,7 +9,6 @@ | ||||
|  */ | ||||
|  | ||||
| #include "libyuv/convert.h" | ||||
| #include "libyuv/convert_argb.h" | ||||
|  | ||||
| #ifdef HAVE_JPEG | ||||
| #include "libyuv/mjpeg_decoder.h" | ||||
|   | ||||
							
								
								
									
										13
									
								
								third_party/libyuv/source/convert_to_argb.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										13
									
								
								third_party/libyuv/source/convert_to_argb.cc
									
									
									
									
										vendored
									
									
								
							| @@ -23,7 +23,7 @@ namespace libyuv { | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| // Convert camera sample to ARGB with cropping, rotation and vertical flip. | ||||
| // Convert camera sample to I420 with cropping, rotation and vertical flip. | ||||
| // src_width is used for source stride computation | ||||
| // src_height is used to compute location of planes, and indicate inversion | ||||
| // sample_size is measured in bytes and is the size of the frame. | ||||
| @@ -51,8 +51,8 @@ int ConvertToARGB(const uint8* sample, size_t sample_size, | ||||
|   // also enable temporary buffer. | ||||
|   LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) || | ||||
|       crop_argb == sample; | ||||
|   uint8* dest_argb = crop_argb; | ||||
|   int dest_argb_stride = argb_stride; | ||||
|   uint8* tmp_argb = crop_argb; | ||||
|   int tmp_argb_stride = argb_stride; | ||||
|   uint8* rotate_buffer = NULL; | ||||
|   int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; | ||||
|  | ||||
| @@ -66,13 +66,13 @@ int ConvertToARGB(const uint8* sample, size_t sample_size, | ||||
|   } | ||||
|  | ||||
|   if (need_buf) { | ||||
|     int argb_size = crop_width * 4 * abs_crop_height; | ||||
|     int argb_size = crop_width * abs_crop_height * 4; | ||||
|     rotate_buffer = (uint8*)malloc(argb_size); | ||||
|     if (!rotate_buffer) { | ||||
|       return 1;  // Out of memory runtime error. | ||||
|     } | ||||
|     crop_argb = rotate_buffer; | ||||
|     argb_stride = crop_width * 4; | ||||
|     argb_stride = crop_width; | ||||
|   } | ||||
|  | ||||
|   switch (format) { | ||||
| @@ -176,6 +176,7 @@ int ConvertToARGB(const uint8* sample, size_t sample_size, | ||||
|       break; | ||||
|     // Triplanar formats | ||||
|     case FOURCC_I420: | ||||
|     case FOURCC_YU12: | ||||
|     case FOURCC_YV12: { | ||||
|       const uint8* src_y = sample + (src_width * crop_y + crop_x); | ||||
|       const uint8* src_u; | ||||
| @@ -290,7 +291,7 @@ int ConvertToARGB(const uint8* sample, size_t sample_size, | ||||
|   if (need_buf) { | ||||
|     if (!r) { | ||||
|       r = ARGBRotate(crop_argb, argb_stride, | ||||
|                      dest_argb, dest_argb_stride, | ||||
|                      tmp_argb, tmp_argb_stride, | ||||
|                      crop_width, abs_crop_height, rotation); | ||||
|     } | ||||
|     free(rotate_buffer); | ||||
|   | ||||
							
								
								
									
										14
									
								
								third_party/libyuv/source/convert_to_i420.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										14
									
								
								third_party/libyuv/source/convert_to_i420.cc
									
									
									
									
										vendored
									
									
								
							| @@ -39,13 +39,12 @@ int ConvertToI420(const uint8* sample, | ||||
|   int aligned_src_width = (src_width + 1) & ~1; | ||||
|   const uint8* src; | ||||
|   const uint8* src_uv; | ||||
|   const int abs_src_height = (src_height < 0) ? -src_height : src_height; | ||||
|   // TODO(nisse): Why allow crop_height < 0? | ||||
|   const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; | ||||
|   int abs_src_height = (src_height < 0) ? -src_height : src_height; | ||||
|   int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height; | ||||
|   int r = 0; | ||||
|   LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 && | ||||
|       format != FOURCC_NV12 && format != FOURCC_NV21 && | ||||
|       format != FOURCC_YV12) || y == sample; | ||||
|       format != FOURCC_YU12 && format != FOURCC_YV12) || y == sample; | ||||
|   uint8* tmp_y = y; | ||||
|   uint8* tmp_u = u; | ||||
|   uint8* tmp_v = v; | ||||
| @@ -53,14 +52,16 @@ int ConvertToI420(const uint8* sample, | ||||
|   int tmp_u_stride = u_stride; | ||||
|   int tmp_v_stride = v_stride; | ||||
|   uint8* rotate_buffer = NULL; | ||||
|   const int inv_crop_height = | ||||
|       (src_height < 0) ? -abs_crop_height : abs_crop_height; | ||||
|   int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; | ||||
|  | ||||
|   if (!y || !u || !v || !sample || | ||||
|       src_width <= 0 || crop_width <= 0  || | ||||
|       src_height == 0 || crop_height == 0) { | ||||
|     return -1; | ||||
|   } | ||||
|   if (src_height < 0) { | ||||
|     inv_crop_height = -inv_crop_height; | ||||
|   } | ||||
|  | ||||
|   // One pass rotation is available for some formats. For the rest, convert | ||||
|   // to I420 (with optional vertical flipping) into a temporary I420 buffer, | ||||
| @@ -213,6 +214,7 @@ int ConvertToI420(const uint8* sample, | ||||
|       break; | ||||
|     // Triplanar formats | ||||
|     case FOURCC_I420: | ||||
|     case FOURCC_YU12: | ||||
|     case FOURCC_YV12: { | ||||
|       const uint8* src_y = sample + (src_width * crop_y + crop_x); | ||||
|       const uint8* src_u; | ||||
|   | ||||
							
								
								
									
										145
									
								
								third_party/libyuv/source/cpu_id.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										145
									
								
								third_party/libyuv/source/cpu_id.cc
									
									
									
									
										vendored
									
									
								
							| @@ -10,12 +10,12 @@ | ||||
|  | ||||
| #include "libyuv/cpu_id.h" | ||||
|  | ||||
| #if defined(_MSC_VER) | ||||
| #if (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__) | ||||
| #include <intrin.h>  // For __cpuidex() | ||||
| #endif | ||||
| #if !defined(__pnacl__) && !defined(__CLR_VER) && \ | ||||
|     !defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \ | ||||
|     defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) | ||||
|     defined(_MSC_VER) && !defined(__clang__) && (_MSC_FULL_VER >= 160040219) | ||||
| #include <immintrin.h>  // For _xgetbv() | ||||
| #endif | ||||
|  | ||||
| @@ -36,8 +36,7 @@ extern "C" { | ||||
|  | ||||
| // For functions that use the stack and have runtime checks for overflow, | ||||
| // use SAFEBUFFERS to avoid additional check. | ||||
| #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) && \ | ||||
|     !defined(__clang__) | ||||
| #if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219) | ||||
| #define SAFEBUFFERS __declspec(safebuffers) | ||||
| #else | ||||
| #define SAFEBUFFERS | ||||
| @@ -49,9 +48,9 @@ extern "C" { | ||||
|     !defined(__pnacl__) && !defined(__CLR_VER) | ||||
| LIBYUV_API | ||||
| void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { | ||||
| #if defined(_MSC_VER) | ||||
| #if (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__) | ||||
| // Visual C version uses intrinsic or inline x86 assembly. | ||||
| #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) | ||||
| #if (_MSC_FULL_VER >= 160040219) | ||||
|   __cpuidex((int*)(cpu_info), info_eax, info_ecx); | ||||
| #elif defined(_M_IX86) | ||||
|   __asm { | ||||
| @@ -64,7 +63,7 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { | ||||
|     mov        [edi + 8], ecx | ||||
|     mov        [edi + 12], edx | ||||
|   } | ||||
| #else  // Visual C but not x86 | ||||
| #else | ||||
|   if (info_ecx == 0) { | ||||
|     __cpuid((int*)(cpu_info), info_eax); | ||||
|   } else { | ||||
| @@ -72,9 +71,9 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { | ||||
|   } | ||||
| #endif | ||||
| // GCC version uses inline x86 assembly. | ||||
| #else  // defined(_MSC_VER) | ||||
| #else  // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__) | ||||
|   uint32 info_ebx, info_edx; | ||||
|   asm volatile ( | ||||
|   asm volatile (  // NOLINT | ||||
| #if defined( __i386__) && defined(__PIC__) | ||||
|     // Preserve ebx for fpic 32 bit. | ||||
|     "mov %%ebx, %%edi                          \n" | ||||
| @@ -90,7 +89,7 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { | ||||
|   cpu_info[1] = info_ebx; | ||||
|   cpu_info[2] = info_ecx; | ||||
|   cpu_info[3] = info_edx; | ||||
| #endif  // defined(_MSC_VER) | ||||
| #endif  // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__) | ||||
| } | ||||
| #else  // (defined(_M_IX86) || defined(_M_X64) ... | ||||
| LIBYUV_API | ||||
| @@ -99,37 +98,28 @@ void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) { | ||||
| } | ||||
| #endif | ||||
|  | ||||
| // For VS2010 and earlier emit can be used: | ||||
| //   _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0  // For VS2010 and earlier. | ||||
| //  __asm { | ||||
| //    xor        ecx, ecx    // xcr 0 | ||||
| //    xgetbv | ||||
| //    mov        xcr0, eax | ||||
| //  } | ||||
| // For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code. | ||||
| // https://code.google.com/p/libyuv/issues/detail?id=529 | ||||
| #if defined(_M_IX86) && (_MSC_VER < 1900) | ||||
| #pragma optimize("g", off) | ||||
| #endif | ||||
| // TODO(fbarchard): Enable xgetbv when validator supports it. | ||||
| #if (defined(_M_IX86) || defined(_M_X64) || \ | ||||
|     defined(__i386__) || defined(__x86_64__)) && \ | ||||
|     !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) | ||||
| #define HAS_XGETBV | ||||
| // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. | ||||
| int GetXCR0() { | ||||
| int TestOsSaveYmm() { | ||||
|   uint32 xcr0 = 0u; | ||||
| #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) | ||||
| #if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219) | ||||
|   xcr0 = (uint32)(_xgetbv(0));  // VS2010 SP1 required. | ||||
| #elif defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__) | ||||
|   __asm { | ||||
|     xor        ecx, ecx    // xcr 0 | ||||
|     _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0  // For VS2010 and earlier. | ||||
|     mov        xcr0, eax | ||||
|   } | ||||
| #elif defined(__i386__) || defined(__x86_64__) | ||||
|   asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx"); | ||||
| #endif  // defined(__i386__) || defined(__x86_64__) | ||||
|   return xcr0; | ||||
|   return((xcr0 & 6) == 6);  // Is ymm saved? | ||||
| } | ||||
| #endif  // defined(_M_IX86) || defined(_M_X64) .. | ||||
| // Return optimization to previous setting. | ||||
| #if defined(_M_IX86) && (_MSC_VER < 1900) | ||||
| #pragma optimize("g", on) | ||||
| #endif | ||||
|  | ||||
| // based on libvpx arm_cpudetect.c | ||||
| // For Arm, but public to allow testing on any CPU | ||||
| @@ -161,9 +151,30 @@ int ArmCpuCaps(const char* cpuinfo_name) { | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| #if defined(__mips__) && defined(__linux__) | ||||
| static int MipsCpuCaps(const char* search_string) { | ||||
|   char cpuinfo_line[512]; | ||||
|   const char* file_name = "/proc/cpuinfo"; | ||||
|   FILE* f = fopen(file_name, "r"); | ||||
|   if (!f) { | ||||
|     // Assume DSP if /proc/cpuinfo is unavailable. | ||||
|     // This will occur for Chrome sandbox for Pepper or Render process. | ||||
|     return kCpuHasMIPS_DSP; | ||||
|   } | ||||
|   while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f) != NULL) { | ||||
|     if (strstr(cpuinfo_line, search_string) != NULL) { | ||||
|       fclose(f); | ||||
|       return kCpuHasMIPS_DSP; | ||||
|     } | ||||
|   } | ||||
|   fclose(f); | ||||
|   return 0; | ||||
| } | ||||
| #endif | ||||
|  | ||||
| // CPU detect function for SIMD instruction sets. | ||||
| LIBYUV_API | ||||
| int cpu_info_ = 0;  // cpu_info is not initialized yet. | ||||
| int cpu_info_ = kCpuInit;  // cpu_info is not initialized yet. | ||||
|  | ||||
| // Test environment variable for disabling CPU features. Any non-zero value | ||||
| // to disable. Zero ignored to make it easy to set the variable on/off. | ||||
| @@ -186,9 +197,8 @@ static LIBYUV_BOOL TestEnv(const char*) { | ||||
|  | ||||
| LIBYUV_API SAFEBUFFERS | ||||
| int InitCpuFlags(void) { | ||||
|   // TODO(fbarchard): swap kCpuInit logic so 0 means uninitialized. | ||||
|   int cpu_info = 0; | ||||
| #if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86) | ||||
|  | ||||
|   uint32 cpu_info0[4] = { 0, 0, 0, 0 }; | ||||
|   uint32 cpu_info1[4] = { 0, 0, 0, 0 }; | ||||
|   uint32 cpu_info7[4] = { 0, 0, 0, 0 }; | ||||
| @@ -197,7 +207,7 @@ int InitCpuFlags(void) { | ||||
|   if (cpu_info0[0] >= 7) { | ||||
|     CpuId(7, 0, cpu_info7); | ||||
|   } | ||||
|   cpu_info = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | | ||||
|   cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | | ||||
|               ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | | ||||
|               ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | | ||||
|               ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | | ||||
| @@ -206,57 +216,57 @@ int InitCpuFlags(void) { | ||||
|               kCpuHasX86; | ||||
|  | ||||
| #ifdef HAS_XGETBV | ||||
|   // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv | ||||
|   if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) &&  // AVX and OSXSave | ||||
|       ((GetXCR0() & 6) == 6)) {  // Test OS saves YMM registers | ||||
|     cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX; | ||||
|  | ||||
|     // Detect AVX512bw | ||||
|     if ((GetXCR0() & 0xe0) == 0xe0) { | ||||
|       cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0; | ||||
|     } | ||||
|   if ((cpu_info1[2] & 0x18000000) == 0x18000000 &&  // AVX and OSSave | ||||
|       TestOsSaveYmm()) {  // Saves YMM. | ||||
|     cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | | ||||
|                  kCpuHasAVX; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   // Environment variable overrides for testing. | ||||
|   if (TestEnv("LIBYUV_DISABLE_X86")) { | ||||
|     cpu_info &= ~kCpuHasX86; | ||||
|     cpu_info_ &= ~kCpuHasX86; | ||||
|   } | ||||
|   if (TestEnv("LIBYUV_DISABLE_SSE2")) { | ||||
|     cpu_info &= ~kCpuHasSSE2; | ||||
|     cpu_info_ &= ~kCpuHasSSE2; | ||||
|   } | ||||
|   if (TestEnv("LIBYUV_DISABLE_SSSE3")) { | ||||
|     cpu_info &= ~kCpuHasSSSE3; | ||||
|     cpu_info_ &= ~kCpuHasSSSE3; | ||||
|   } | ||||
|   if (TestEnv("LIBYUV_DISABLE_SSE41")) { | ||||
|     cpu_info &= ~kCpuHasSSE41; | ||||
|     cpu_info_ &= ~kCpuHasSSE41; | ||||
|   } | ||||
|   if (TestEnv("LIBYUV_DISABLE_SSE42")) { | ||||
|     cpu_info &= ~kCpuHasSSE42; | ||||
|     cpu_info_ &= ~kCpuHasSSE42; | ||||
|   } | ||||
|   if (TestEnv("LIBYUV_DISABLE_AVX")) { | ||||
|     cpu_info &= ~kCpuHasAVX; | ||||
|     cpu_info_ &= ~kCpuHasAVX; | ||||
|   } | ||||
|   if (TestEnv("LIBYUV_DISABLE_AVX2")) { | ||||
|     cpu_info &= ~kCpuHasAVX2; | ||||
|     cpu_info_ &= ~kCpuHasAVX2; | ||||
|   } | ||||
|   if (TestEnv("LIBYUV_DISABLE_ERMS")) { | ||||
|     cpu_info &= ~kCpuHasERMS; | ||||
|     cpu_info_ &= ~kCpuHasERMS; | ||||
|   } | ||||
|   if (TestEnv("LIBYUV_DISABLE_FMA3")) { | ||||
|     cpu_info &= ~kCpuHasFMA3; | ||||
|   } | ||||
|   if (TestEnv("LIBYUV_DISABLE_AVX3")) { | ||||
|     cpu_info &= ~kCpuHasAVX3; | ||||
|     cpu_info_ &= ~kCpuHasFMA3; | ||||
|   } | ||||
| #endif | ||||
| #if defined(__mips__) && defined(__linux__) | ||||
|   // Linux mips parse text file for dsp detect. | ||||
|   cpu_info_ = MipsCpuCaps("dsp");  // set kCpuHasMIPS_DSP. | ||||
| #if defined(__mips_dspr2) | ||||
|   cpu_info |= kCpuHasDSPR2; | ||||
|   cpu_info_ |= kCpuHasMIPS_DSPR2; | ||||
| #endif | ||||
|   cpu_info |= kCpuHasMIPS; | ||||
|   if (getenv("LIBYUV_DISABLE_DSPR2")) { | ||||
|     cpu_info &= ~kCpuHasDSPR2; | ||||
|   cpu_info_ |= kCpuHasMIPS; | ||||
|  | ||||
|   if (getenv("LIBYUV_DISABLE_MIPS")) { | ||||
|     cpu_info_ &= ~kCpuHasMIPS; | ||||
|   } | ||||
|   if (getenv("LIBYUV_DISABLE_MIPS_DSP")) { | ||||
|     cpu_info_ &= ~kCpuHasMIPS_DSP; | ||||
|   } | ||||
|   if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) { | ||||
|     cpu_info_ &= ~kCpuHasMIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
| #if defined(__arm__) || defined(__aarch64__) | ||||
| @@ -264,31 +274,28 @@ int InitCpuFlags(void) { | ||||
| // __ARM_NEON__ generates code that requires Neon.  NaCL also requires Neon. | ||||
| // For Linux, /proc/cpuinfo can be tested but without that assume Neon. | ||||
| #if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__) | ||||
|   cpu_info = kCpuHasNEON; | ||||
|   cpu_info_ = kCpuHasNEON; | ||||
| // For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon | ||||
| // flag in it. | ||||
| // So for aarch64, neon enabling is hard coded here. | ||||
| #endif | ||||
| #if defined(__aarch64__) | ||||
|   cpu_info = kCpuHasNEON; | ||||
|   cpu_info_ = kCpuHasNEON; | ||||
| #else | ||||
|   // Linux arm parse text file for neon detect. | ||||
|   cpu_info = ArmCpuCaps("/proc/cpuinfo"); | ||||
|   cpu_info_ = ArmCpuCaps("/proc/cpuinfo"); | ||||
| #endif | ||||
|   cpu_info |= kCpuHasARM; | ||||
|   cpu_info_ |= kCpuHasARM; | ||||
|   if (TestEnv("LIBYUV_DISABLE_NEON")) { | ||||
|     cpu_info &= ~kCpuHasNEON; | ||||
|     cpu_info_ &= ~kCpuHasNEON; | ||||
|   } | ||||
| #endif  // __arm__ | ||||
|   if (TestEnv("LIBYUV_DISABLE_ASM")) { | ||||
|     cpu_info = 0; | ||||
|     cpu_info_ = 0; | ||||
|   } | ||||
|   cpu_info  |= kCpuInitialized; | ||||
|   cpu_info_ = cpu_info; | ||||
|   return cpu_info; | ||||
|   return cpu_info_; | ||||
| } | ||||
|  | ||||
| // Note that use of this function is not thread safe. | ||||
| LIBYUV_API | ||||
| void MaskCpuFlags(int enable_flags) { | ||||
|   cpu_info_ = InitCpuFlags() & enable_flags; | ||||
|   | ||||
							
								
								
									
										6
									
								
								third_party/libyuv/source/mjpeg_decoder.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								third_party/libyuv/source/mjpeg_decoder.cc
									
									
									
									
										vendored
									
									
								
							| @@ -59,7 +59,8 @@ const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK; | ||||
| // Methods that are passed to jpeglib. | ||||
| boolean fill_input_buffer(jpeg_decompress_struct* cinfo); | ||||
| void init_source(jpeg_decompress_struct* cinfo); | ||||
| void skip_input_data(jpeg_decompress_struct* cinfo, long num_bytes);  // NOLINT | ||||
| void skip_input_data(jpeg_decompress_struct* cinfo, | ||||
|                      long num_bytes);  // NOLINT | ||||
| void term_source(jpeg_decompress_struct* cinfo); | ||||
| void ErrorHandler(jpeg_common_struct* cinfo); | ||||
|  | ||||
| @@ -428,7 +429,8 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) { | ||||
|   return TRUE; | ||||
| } | ||||
|  | ||||
| void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {  // NOLINT | ||||
| void skip_input_data(j_decompress_ptr cinfo, | ||||
|                      long num_bytes) {  // NOLINT | ||||
|   cinfo->src->next_input_byte += num_bytes; | ||||
| } | ||||
|  | ||||
|   | ||||
							
								
								
									
										54
									
								
								third_party/libyuv/source/mjpeg_validate.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										54
									
								
								third_party/libyuv/source/mjpeg_validate.cc
									
									
									
									
										vendored
									
									
								
							| @@ -17,14 +17,44 @@ namespace libyuv { | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| // Helper function to scan for EOI marker (0xff 0xd9). | ||||
| // Enable this to try scasb implementation. | ||||
| // #define ENABLE_SCASB 1 | ||||
|  | ||||
| #ifdef ENABLE_SCASB | ||||
|  | ||||
| // Multiple of 1. | ||||
| __declspec(naked) | ||||
| const uint8* ScanRow_ERMS(const uint8* src, uint32 val, int count) { | ||||
|   __asm { | ||||
|     mov        edx, edi | ||||
|     mov        edi, [esp + 4]   // src | ||||
|     mov        eax, [esp + 8]   // val | ||||
|     mov        ecx, [esp + 12]  // count | ||||
|     repne scasb | ||||
|     jne        sr99 | ||||
|     mov        eax, edi | ||||
|     sub        eax, 1 | ||||
|     mov        edi, edx | ||||
|     ret | ||||
|  | ||||
|   sr99: | ||||
|     mov        eax, 0 | ||||
|     mov        edi, edx | ||||
|     ret | ||||
|   } | ||||
| } | ||||
| #endif | ||||
|  | ||||
| // Helper function to scan for EOI marker. | ||||
| static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) { | ||||
|   if (sample_size >= 2) { | ||||
|   const uint8* end = sample + sample_size - 1; | ||||
|   const uint8* it = sample; | ||||
|     while (it < end) { | ||||
|       // TODO(fbarchard): scan for 0xd9 instead. | ||||
|   for (;;) { | ||||
| #ifdef ENABLE_SCASB | ||||
|     it = ScanRow_ERMS(it, 0xff, end - it); | ||||
| #else | ||||
|     it = static_cast<const uint8*>(memchr(it, 0xff, end - it)); | ||||
| #endif | ||||
|     if (it == NULL) { | ||||
|       break; | ||||
|     } | ||||
| @@ -33,26 +63,26 @@ static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) { | ||||
|     } | ||||
|     ++it;  // Skip over current 0xff. | ||||
|   } | ||||
|   } | ||||
|   // ERROR: Invalid jpeg end code not found. Size sample_size | ||||
|   return LIBYUV_FALSE; | ||||
| } | ||||
|  | ||||
| // Helper function to validate the jpeg appears intact. | ||||
| LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) { | ||||
|   // Maximum size that ValidateJpeg will consider valid. | ||||
|   const size_t kMaxJpegSize = 0x7fffffffull; | ||||
|   const size_t kBackSearchSize = 1024; | ||||
|   if (sample_size < 64 || sample_size > kMaxJpegSize || !sample) { | ||||
|   if (sample_size < 64) { | ||||
|     // ERROR: Invalid jpeg size: sample_size | ||||
|     return LIBYUV_FALSE; | ||||
|   } | ||||
|   if (sample[0] != 0xff || sample[1] != 0xd8) {  // SOI marker | ||||
|   if (sample[0] != 0xff || sample[1] != 0xd8) {  // Start Of Image | ||||
|     // ERROR: Invalid jpeg initial start code | ||||
|     return LIBYUV_FALSE; | ||||
|   } | ||||
|   // Step over SOI marker. | ||||
|   sample += 2; | ||||
|   sample_size -= 2; | ||||
|  | ||||
|   // Look for the End Of Image (EOI) marker near the end of the buffer. | ||||
|   // Look for the End Of Image (EOI) marker in the end kilobyte of the buffer. | ||||
|   if (sample_size > kBackSearchSize) { | ||||
|     if (ScanEOI(sample + sample_size - kBackSearchSize, kBackSearchSize)) { | ||||
|       return LIBYUV_TRUE;  // Success: Valid jpeg. | ||||
| @@ -60,8 +90,8 @@ LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) { | ||||
|     // Reduce search size for forward search. | ||||
|     sample_size = sample_size - kBackSearchSize + 1; | ||||
|   } | ||||
|   // Step over SOI marker and scan for EOI. | ||||
|   return ScanEOI(sample + 2, sample_size - 2); | ||||
|   return ScanEOI(sample, sample_size); | ||||
|  | ||||
| } | ||||
|  | ||||
| #ifdef __cplusplus | ||||
|   | ||||
							
								
								
									
										724
									
								
								third_party/libyuv/source/planar_functions.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										724
									
								
								third_party/libyuv/source/planar_functions.cc
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										49
									
								
								third_party/libyuv/source/rotate.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										49
									
								
								third_party/libyuv/source/rotate.cc
									
									
									
									
										vendored
									
									
								
							| @@ -49,13 +49,13 @@ void TransposePlane(const uint8* src, int src_stride, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_TRANSPOSEWX8_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2)) { | ||||
| #if defined(HAS_TRANSPOSEWX8_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2)) { | ||||
|     if (IS_ALIGNED(width, 4) && | ||||
|         IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) { | ||||
|       TransposeWx8 = TransposeWx8_Fast_DSPR2; | ||||
|       TransposeWx8 = TransposeWx8_Fast_MIPS_DSPR2; | ||||
|     } else { | ||||
|       TransposeWx8 = TransposeWx8_DSPR2; | ||||
|       TransposeWx8 = TransposeWx8_MIPS_DSPR2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| @@ -117,6 +117,14 @@ void RotatePlane180(const uint8* src, int src_stride, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_MIRRORROW_SSE2) | ||||
|   if (TestCpuFlag(kCpuHasSSE2)) { | ||||
|     MirrorRow = MirrorRow_Any_SSE2; | ||||
|     if (IS_ALIGNED(width, 16)) { | ||||
|       MirrorRow = MirrorRow_SSE2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_MIRRORROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     MirrorRow = MirrorRow_Any_SSSE3; | ||||
| @@ -134,11 +142,11 @@ void RotatePlane180(const uint8* src, int src_stride, | ||||
|   } | ||||
| #endif | ||||
| // TODO(fbarchard): Mirror on mips handle unaligned memory. | ||||
| #if defined(HAS_MIRRORROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && | ||||
| #if defined(HAS_MIRRORROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && | ||||
|       IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) && | ||||
|       IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) { | ||||
|     MirrorRow = MirrorRow_DSPR2; | ||||
|     MirrorRow = MirrorRow_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_COPYROW_SSE2) | ||||
| @@ -196,17 +204,14 @@ void TransposeUV(const uint8* src, int src_stride, | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_TRANSPOSEUVWX8_SSE2) | ||||
|   if (TestCpuFlag(kCpuHasSSE2)) { | ||||
|     TransposeUVWx8 = TransposeUVWx8_Any_SSE2; | ||||
|     if (IS_ALIGNED(width, 8)) { | ||||
|   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) { | ||||
|     TransposeUVWx8 = TransposeUVWx8_SSE2; | ||||
|   } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_TRANSPOSEUVWX8_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) && | ||||
| #if defined(HAS_TRANSPOSEUVWx8_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) && | ||||
|       IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) { | ||||
|     TransposeUVWx8 = TransposeUVWx8_DSPR2; | ||||
|     TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
| @@ -267,22 +272,22 @@ void RotateUV180(const uint8* src, int src_stride, | ||||
|                  uint8* dst_b, int dst_stride_b, | ||||
|                  int width, int height) { | ||||
|   int i; | ||||
|   void (*MirrorUVRow)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) = | ||||
|   void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) = | ||||
|       MirrorUVRow_C; | ||||
| #if defined(HAS_MIRRORUVROW_NEON) | ||||
|   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { | ||||
|     MirrorUVRow = MirrorUVRow_NEON; | ||||
|     MirrorRowUV = MirrorUVRow_NEON; | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_MIRRORUVROW_SSSE3) | ||||
| #if defined(HAS_MIRRORROW_UV_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) { | ||||
|     MirrorUVRow = MirrorUVRow_SSSE3; | ||||
|     MirrorRowUV = MirrorUVRow_SSSE3; | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_MIRRORUVROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && | ||||
| #if defined(HAS_MIRRORUVROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && | ||||
|       IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) { | ||||
|     MirrorUVRow = MirrorUVRow_DSPR2; | ||||
|     MirrorRowUV = MirrorUVRow_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
| @@ -290,7 +295,7 @@ void RotateUV180(const uint8* src, int src_stride, | ||||
|   dst_b += dst_stride_b * (height - 1); | ||||
|  | ||||
|   for (i = 0; i < height; ++i) { | ||||
|     MirrorUVRow(src, dst_a, dst_b, width); | ||||
|     MirrorRowUV(src, dst_a, dst_b, width); | ||||
|     src += src_stride; | ||||
|     dst_a -= dst_stride_a; | ||||
|     dst_b -= dst_stride_b; | ||||
|   | ||||
							
								
								
									
										41
									
								
								third_party/libyuv/source/rotate_any.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										41
									
								
								third_party/libyuv/source/rotate_any.cc
									
									
									
									
										vendored
									
									
								
							| @@ -18,7 +18,7 @@ namespace libyuv { | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| #define TANY(NAMEANY, TPOS_SIMD, MASK)                                         \ | ||||
| #define TANY(NAMEANY, TPOS_SIMD, TPOS_C, MASK)                                 \ | ||||
|     void NAMEANY(const uint8* src, int src_stride,                             \ | ||||
|                  uint8* dst, int dst_stride, int width) {                      \ | ||||
|       int r = width & MASK;                                                    \ | ||||
| @@ -26,49 +26,24 @@ extern "C" { | ||||
|       if (n > 0) {                                                             \ | ||||
|         TPOS_SIMD(src, src_stride, dst, dst_stride, n);                        \ | ||||
|       }                                                                        \ | ||||
|       TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r);\ | ||||
|       TPOS_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r);        \ | ||||
|     } | ||||
|  | ||||
| #ifdef HAS_TRANSPOSEWX8_NEON | ||||
| TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7) | ||||
| TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, TransposeWx8_C, 7) | ||||
| #endif | ||||
| #ifdef HAS_TRANSPOSEWX8_SSSE3 | ||||
| TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7) | ||||
| TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, TransposeWx8_C, 7) | ||||
| #endif | ||||
| #ifdef HAS_TRANSPOSEWX8_FAST_SSSE3 | ||||
| TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15) | ||||
| TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, TransposeWx8_C, 15) | ||||
| #endif | ||||
| #ifdef HAS_TRANSPOSEWX8_DSPR2 | ||||
| TANY(TransposeWx8_Any_DSPR2, TransposeWx8_DSPR2, 7) | ||||
| #ifdef HAS_TRANSPOSEWX8_MIPS_DSPR2 | ||||
| TANY(TransposeWx8_Any_MIPS_DSPR2, TransposeWx8_MIPS_DSPR2, TransposeWx8_C, 7) | ||||
| #endif | ||||
|  | ||||
| #undef TANY | ||||
|  | ||||
| #define TUVANY(NAMEANY, TPOS_SIMD, MASK)                                       \ | ||||
|     void NAMEANY(const uint8* src, int src_stride,                             \ | ||||
|                 uint8* dst_a, int dst_stride_a,                                \ | ||||
|                 uint8* dst_b, int dst_stride_b, int width) {                   \ | ||||
|       int r = width & MASK;                                                    \ | ||||
|       int n = width - r;                                                       \ | ||||
|       if (n > 0) {                                                             \ | ||||
|         TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,   \ | ||||
|                   n);                                                          \ | ||||
|       }                                                                        \ | ||||
|       TransposeUVWx8_C(src + n * 2, src_stride,                                \ | ||||
|                        dst_a + n * dst_stride_a, dst_stride_a,                 \ | ||||
|                        dst_b + n * dst_stride_b, dst_stride_b, r);             \ | ||||
|     } | ||||
|  | ||||
| #ifdef HAS_TRANSPOSEUVWX8_NEON | ||||
| TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7) | ||||
| #endif | ||||
| #ifdef HAS_TRANSPOSEUVWX8_SSE2 | ||||
| TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7) | ||||
| #endif | ||||
| #ifdef HAS_TRANSPOSEUVWX8_DSPR2 | ||||
| TUVANY(TransposeUVWx8_Any_DSPR2, TransposeUVWx8_DSPR2, 7) | ||||
| #endif | ||||
| #undef TUVANY | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| }  // extern "C" | ||||
| }  // namespace libyuv | ||||
|   | ||||
							
								
								
									
										153
									
								
								third_party/libyuv/source/rotate_gcc.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										153
									
								
								third_party/libyuv/source/rotate_gcc.cc
									
									
									
									
										vendored
									
									
								
							| @@ -17,17 +17,16 @@ extern "C" { | ||||
| #endif | ||||
|  | ||||
| // This module is for GCC x86 and x64. | ||||
| #if !defined(LIBYUV_DISABLE_X86) && \ | ||||
|     (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) | ||||
| #if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) | ||||
|  | ||||
| // Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit. | ||||
| #if defined(HAS_TRANSPOSEWX8_SSSE3) | ||||
| #if !defined(LIBYUV_DISABLE_X86) && \ | ||||
|     (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__))) | ||||
| void TransposeWx8_SSSE3(const uint8* src, int src_stride, | ||||
|                         uint8* dst, int dst_stride, int width) { | ||||
|   asm volatile ( | ||||
|     // Read in the data from the source pointer. | ||||
|     // First round of bit swap. | ||||
|     LABELALIGN | ||||
|     ".p2align  2                                 \n" | ||||
|   "1:                                            \n" | ||||
|     "movq       (%0),%%xmm0                      \n" | ||||
|     "movq       (%0,%3),%%xmm1                   \n" | ||||
| @@ -106,16 +105,143 @@ void TransposeWx8_SSSE3(const uint8* src, int src_stride, | ||||
|       "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" | ||||
|   ); | ||||
| } | ||||
| #endif  // defined(HAS_TRANSPOSEWX8_SSSE3) | ||||
|  | ||||
| // Transpose 16x8. 64 bit | ||||
| #if defined(HAS_TRANSPOSEWX8_FAST_SSSE3) | ||||
| #if !defined(LIBYUV_DISABLE_X86) && defined(__i386__)  && !defined(__clang__) | ||||
| void TransposeUVWx8_SSE2(const uint8* src, int src_stride, | ||||
|                          uint8* dst_a, int dst_stride_a, | ||||
|                          uint8* dst_b, int dst_stride_b, int width); | ||||
|   asm ( | ||||
|     DECLARE_FUNCTION(TransposeUVWx8_SSE2) | ||||
|     "push   %ebx                               \n" | ||||
|     "push   %esi                               \n" | ||||
|     "push   %edi                               \n" | ||||
|     "push   %ebp                               \n" | ||||
|     "mov    0x14(%esp),%eax                    \n" | ||||
|     "mov    0x18(%esp),%edi                    \n" | ||||
|     "mov    0x1c(%esp),%edx                    \n" | ||||
|     "mov    0x20(%esp),%esi                    \n" | ||||
|     "mov    0x24(%esp),%ebx                    \n" | ||||
|     "mov    0x28(%esp),%ebp                    \n" | ||||
|     "mov    %esp,%ecx                          \n" | ||||
|     "sub    $0x14,%esp                         \n" | ||||
|     "and    $0xfffffff0,%esp                   \n" | ||||
|     "mov    %ecx,0x10(%esp)                    \n" | ||||
|     "mov    0x2c(%ecx),%ecx                    \n" | ||||
|  | ||||
| "1:                                            \n" | ||||
|     "movdqu (%eax),%xmm0                       \n" | ||||
|     "movdqu (%eax,%edi,1),%xmm1                \n" | ||||
|     "lea    (%eax,%edi,2),%eax                 \n" | ||||
|     "movdqa %xmm0,%xmm7                        \n" | ||||
|     "punpcklbw %xmm1,%xmm0                     \n" | ||||
|     "punpckhbw %xmm1,%xmm7                     \n" | ||||
|     "movdqa %xmm7,%xmm1                        \n" | ||||
|     "movdqu (%eax),%xmm2                       \n" | ||||
|     "movdqu (%eax,%edi,1),%xmm3                \n" | ||||
|     "lea    (%eax,%edi,2),%eax                 \n" | ||||
|     "movdqa %xmm2,%xmm7                        \n" | ||||
|     "punpcklbw %xmm3,%xmm2                     \n" | ||||
|     "punpckhbw %xmm3,%xmm7                     \n" | ||||
|     "movdqa %xmm7,%xmm3                        \n" | ||||
|     "movdqu (%eax),%xmm4                       \n" | ||||
|     "movdqu (%eax,%edi,1),%xmm5                \n" | ||||
|     "lea    (%eax,%edi,2),%eax                 \n" | ||||
|     "movdqa %xmm4,%xmm7                        \n" | ||||
|     "punpcklbw %xmm5,%xmm4                     \n" | ||||
|     "punpckhbw %xmm5,%xmm7                     \n" | ||||
|     "movdqa %xmm7,%xmm5                        \n" | ||||
|     "movdqu (%eax),%xmm6                       \n" | ||||
|     "movdqu (%eax,%edi,1),%xmm7                \n" | ||||
|     "lea    (%eax,%edi,2),%eax                 \n" | ||||
|     "movdqu %xmm5,(%esp)                       \n" | ||||
|     "neg    %edi                               \n" | ||||
|     "movdqa %xmm6,%xmm5                        \n" | ||||
|     "punpcklbw %xmm7,%xmm6                     \n" | ||||
|     "punpckhbw %xmm7,%xmm5                     \n" | ||||
|     "movdqa %xmm5,%xmm7                        \n" | ||||
|     "lea    0x10(%eax,%edi,8),%eax             \n" | ||||
|     "neg    %edi                               \n" | ||||
|     "movdqa %xmm0,%xmm5                        \n" | ||||
|     "punpcklwd %xmm2,%xmm0                     \n" | ||||
|     "punpckhwd %xmm2,%xmm5                     \n" | ||||
|     "movdqa %xmm5,%xmm2                        \n" | ||||
|     "movdqa %xmm1,%xmm5                        \n" | ||||
|     "punpcklwd %xmm3,%xmm1                     \n" | ||||
|     "punpckhwd %xmm3,%xmm5                     \n" | ||||
|     "movdqa %xmm5,%xmm3                        \n" | ||||
|     "movdqa %xmm4,%xmm5                        \n" | ||||
|     "punpcklwd %xmm6,%xmm4                     \n" | ||||
|     "punpckhwd %xmm6,%xmm5                     \n" | ||||
|     "movdqa %xmm5,%xmm6                        \n" | ||||
|     "movdqu (%esp),%xmm5                       \n" | ||||
|     "movdqu %xmm6,(%esp)                       \n" | ||||
|     "movdqa %xmm5,%xmm6                        \n" | ||||
|     "punpcklwd %xmm7,%xmm5                     \n" | ||||
|     "punpckhwd %xmm7,%xmm6                     \n" | ||||
|     "movdqa %xmm6,%xmm7                        \n" | ||||
|     "movdqa %xmm0,%xmm6                        \n" | ||||
|     "punpckldq %xmm4,%xmm0                     \n" | ||||
|     "punpckhdq %xmm4,%xmm6                     \n" | ||||
|     "movdqa %xmm6,%xmm4                        \n" | ||||
|     "movdqu (%esp),%xmm6                       \n" | ||||
|     "movlpd %xmm0,(%edx)                       \n" | ||||
|     "movhpd %xmm0,(%ebx)                       \n" | ||||
|     "movlpd %xmm4,(%edx,%esi,1)                \n" | ||||
|     "lea    (%edx,%esi,2),%edx                 \n" | ||||
|     "movhpd %xmm4,(%ebx,%ebp,1)                \n" | ||||
|     "lea    (%ebx,%ebp,2),%ebx                 \n" | ||||
|     "movdqa %xmm2,%xmm0                        \n" | ||||
|     "punpckldq %xmm6,%xmm2                     \n" | ||||
|     "movlpd %xmm2,(%edx)                       \n" | ||||
|     "movhpd %xmm2,(%ebx)                       \n" | ||||
|     "punpckhdq %xmm6,%xmm0                     \n" | ||||
|     "movlpd %xmm0,(%edx,%esi,1)                \n" | ||||
|     "lea    (%edx,%esi,2),%edx                 \n" | ||||
|     "movhpd %xmm0,(%ebx,%ebp,1)                \n" | ||||
|     "lea    (%ebx,%ebp,2),%ebx                 \n" | ||||
|     "movdqa %xmm1,%xmm0                        \n" | ||||
|     "punpckldq %xmm5,%xmm1                     \n" | ||||
|     "movlpd %xmm1,(%edx)                       \n" | ||||
|     "movhpd %xmm1,(%ebx)                       \n" | ||||
|     "punpckhdq %xmm5,%xmm0                     \n" | ||||
|     "movlpd %xmm0,(%edx,%esi,1)                \n" | ||||
|     "lea    (%edx,%esi,2),%edx                 \n" | ||||
|     "movhpd %xmm0,(%ebx,%ebp,1)                \n" | ||||
|     "lea    (%ebx,%ebp,2),%ebx                 \n" | ||||
|     "movdqa %xmm3,%xmm0                        \n" | ||||
|     "punpckldq %xmm7,%xmm3                     \n" | ||||
|     "movlpd %xmm3,(%edx)                       \n" | ||||
|     "movhpd %xmm3,(%ebx)                       \n" | ||||
|     "punpckhdq %xmm7,%xmm0                     \n" | ||||
|     "sub    $0x8,%ecx                          \n" | ||||
|     "movlpd %xmm0,(%edx,%esi,1)                \n" | ||||
|     "lea    (%edx,%esi,2),%edx                 \n" | ||||
|     "movhpd %xmm0,(%ebx,%ebp,1)                \n" | ||||
|     "lea    (%ebx,%ebp,2),%ebx                 \n" | ||||
|     "jg     1b                                 \n" | ||||
|     "mov    0x10(%esp),%esp                    \n" | ||||
|     "pop    %ebp                               \n" | ||||
|     "pop    %edi                               \n" | ||||
|     "pop    %esi                               \n" | ||||
|     "pop    %ebx                               \n" | ||||
| #if defined(__native_client__) | ||||
|     "pop    %ecx                               \n" | ||||
|     "and    $0xffffffe0,%ecx                   \n" | ||||
|     "jmp    *%ecx                              \n" | ||||
| #else | ||||
|     "ret                                       \n" | ||||
| #endif | ||||
| ); | ||||
| #endif | ||||
| #if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \ | ||||
|     defined(__x86_64__) | ||||
| // 64 bit version has enough registers to do 16x8 to 8x16 at a time. | ||||
| void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride, | ||||
|                              uint8* dst, int dst_stride, int width) { | ||||
|   asm volatile ( | ||||
|   // Read in the data from the source pointer. | ||||
|   // First round of bit swap. | ||||
|     LABELALIGN | ||||
|   ".p2align  2                                 \n" | ||||
| "1:                                            \n" | ||||
|   "movdqu     (%0),%%xmm0                      \n" | ||||
|   "movdqu     (%0,%3),%%xmm1                   \n" | ||||
| @@ -247,17 +373,14 @@ void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride, | ||||
|     "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13",  "xmm14",  "xmm15" | ||||
| ); | ||||
| } | ||||
| #endif  // defined(HAS_TRANSPOSEWX8_FAST_SSSE3) | ||||
|  | ||||
| // Transpose UV 8x8.  64 bit. | ||||
| #if defined(HAS_TRANSPOSEUVWX8_SSE2) | ||||
| void TransposeUVWx8_SSE2(const uint8* src, int src_stride, | ||||
|                          uint8* dst_a, int dst_stride_a, | ||||
|                          uint8* dst_b, int dst_stride_b, int width) { | ||||
|   asm volatile ( | ||||
|   // Read in the data from the source pointer. | ||||
|   // First round of bit swap. | ||||
|     LABELALIGN | ||||
|   ".p2align  2                                 \n" | ||||
| "1:                                            \n" | ||||
|   "movdqu     (%0),%%xmm0                      \n" | ||||
|   "movdqu     (%0,%4),%%xmm1                   \n" | ||||
| @@ -359,7 +482,9 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride, | ||||
|     "xmm8", "xmm9" | ||||
| ); | ||||
| } | ||||
| #endif  // defined(HAS_TRANSPOSEUVWX8_SSE2) | ||||
| #endif | ||||
| #endif | ||||
|  | ||||
| #endif  // defined(__x86_64__) || defined(__i386__) | ||||
|  | ||||
| #ifdef __cplusplus | ||||
|   | ||||
							
								
								
									
										6
									
								
								third_party/libyuv/source/rotate_mips.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								third_party/libyuv/source/rotate_mips.cc
									
									
									
									
										vendored
									
									
								
							| @@ -22,7 +22,7 @@ extern "C" { | ||||
|     defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \ | ||||
|     (_MIPS_SIM == _MIPS_SIM_ABI32) | ||||
|  | ||||
| void TransposeWx8_DSPR2(const uint8* src, int src_stride, | ||||
| void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride, | ||||
|                              uint8* dst, int dst_stride, int width) { | ||||
|    __asm__ __volatile__ ( | ||||
|       ".set push                                         \n" | ||||
| @@ -106,7 +106,7 @@ void TransposeWx8_DSPR2(const uint8* src, int src_stride, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride, | ||||
| void TransposeWx8_Fast_MIPS_DSPR2(const uint8* src, int src_stride, | ||||
|                                   uint8* dst, int dst_stride, int width) { | ||||
|   __asm__ __volatile__ ( | ||||
|       ".set noat                                         \n" | ||||
| @@ -308,7 +308,7 @@ void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void TransposeUVWx8_DSPR2(const uint8* src, int src_stride, | ||||
| void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride, | ||||
|                                uint8* dst_a, int dst_stride_a, | ||||
|                                uint8* dst_b, int dst_stride_b, | ||||
|                                int width) { | ||||
|   | ||||
							
								
								
									
										10
									
								
								third_party/libyuv/source/rotate_neon.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								third_party/libyuv/source/rotate_neon.cc
									
									
									
									
										vendored
									
									
								
							| @@ -27,7 +27,7 @@ static uvec8 kVTbl4x4Transpose = | ||||
| void TransposeWx8_NEON(const uint8* src, int src_stride, | ||||
|                        uint8* dst, int dst_stride, | ||||
|                        int width) { | ||||
|   const uint8* src_temp; | ||||
|   const uint8* src_temp = NULL; | ||||
|   asm volatile ( | ||||
|     // loops are on blocks of 8. loop will stop when | ||||
|     // counter gets to or below 0. starting the counter | ||||
| @@ -35,6 +35,7 @@ void TransposeWx8_NEON(const uint8* src, int src_stride, | ||||
|     "sub         %5, #8                        \n" | ||||
|  | ||||
|     // handle 8x8 blocks. this should be the majority of the plane | ||||
|     ".p2align  2                               \n" | ||||
|     "1:                                        \n" | ||||
|       "mov         %0, %1                      \n" | ||||
|  | ||||
| @@ -229,7 +230,7 @@ void TransposeWx8_NEON(const uint8* src, int src_stride, | ||||
|  | ||||
|     "4:                                        \n" | ||||
|  | ||||
|     : "=&r"(src_temp),         // %0 | ||||
|     : "+r"(src_temp),          // %0 | ||||
|       "+r"(src),               // %1 | ||||
|       "+r"(src_stride),        // %2 | ||||
|       "+r"(dst),               // %3 | ||||
| @@ -247,7 +248,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride, | ||||
|                          uint8* dst_a, int dst_stride_a, | ||||
|                          uint8* dst_b, int dst_stride_b, | ||||
|                          int width) { | ||||
|   const uint8* src_temp; | ||||
|   const uint8* src_temp = NULL; | ||||
|   asm volatile ( | ||||
|     // loops are on blocks of 8. loop will stop when | ||||
|     // counter gets to or below 0. starting the counter | ||||
| @@ -255,6 +256,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride, | ||||
|     "sub         %7, #8                        \n" | ||||
|  | ||||
|     // handle 8x8 blocks. this should be the majority of the plane | ||||
|     ".p2align  2                               \n" | ||||
|     "1:                                        \n" | ||||
|       "mov         %0, %1                      \n" | ||||
|  | ||||
| @@ -512,7 +514,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride, | ||||
|  | ||||
|     "4:                                        \n" | ||||
|  | ||||
|     : "=&r"(src_temp),           // %0 | ||||
|     : "+r"(src_temp),            // %0 | ||||
|       "+r"(src),                 // %1 | ||||
|       "+r"(src_stride),          // %2 | ||||
|       "+r"(dst_a),               // %3 | ||||
|   | ||||
							
								
								
									
										8
									
								
								third_party/libyuv/source/rotate_neon64.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								third_party/libyuv/source/rotate_neon64.cc
									
									
									
									
										vendored
									
									
								
							| @@ -26,7 +26,7 @@ static uvec8 kVTbl4x4Transpose = | ||||
|  | ||||
| void TransposeWx8_NEON(const uint8* src, int src_stride, | ||||
|                        uint8* dst, int dst_stride, int width) { | ||||
|   const uint8* src_temp; | ||||
|   const uint8* src_temp = NULL; | ||||
|   int64 width64 = (int64) width;  // Work around clang 3.4 warning. | ||||
|   asm volatile ( | ||||
|     // loops are on blocks of 8. loop will stop when | ||||
| @@ -235,7 +235,7 @@ void TransposeWx8_NEON(const uint8* src, int src_stride, | ||||
|  | ||||
|     "4:                                          \n" | ||||
|  | ||||
|     : "=&r"(src_temp),                            // %0 | ||||
|     : "+r"(src_temp),                             // %0 | ||||
|       "+r"(src),                                  // %1 | ||||
|       "+r"(dst),                                  // %2 | ||||
|       "+r"(width64)                               // %3 | ||||
| @@ -255,7 +255,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride, | ||||
|                          uint8* dst_a, int dst_stride_a, | ||||
|                          uint8* dst_b, int dst_stride_b, | ||||
|                          int width) { | ||||
|   const uint8* src_temp; | ||||
|   const uint8* src_temp = NULL; | ||||
|   int64 width64 = (int64) width;  // Work around clang 3.4 warning. | ||||
|   asm volatile ( | ||||
|     // loops are on blocks of 8. loop will stop when | ||||
| @@ -520,7 +520,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride, | ||||
|  | ||||
|     "4:                                        \n" | ||||
|  | ||||
|     : "=&r"(src_temp),                            // %0 | ||||
|     : "+r"(src_temp),                             // %0 | ||||
|       "+r"(src),                                  // %1 | ||||
|       "+r"(dst_a),                                // %2 | ||||
|       "+r"(dst_b),                                // %3 | ||||
|   | ||||
							
								
								
									
										5
									
								
								third_party/libyuv/source/rotate_win.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										5
									
								
								third_party/libyuv/source/rotate_win.cc
									
									
									
									
										vendored
									
									
								
							| @@ -16,8 +16,9 @@ namespace libyuv { | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| // This module is for 32 bit Visual C x86 and clangcl | ||||
| #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) | ||||
| // This module is for Visual C x86. | ||||
| #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ | ||||
|     defined(_MSC_VER) && !defined(__clang__) | ||||
|  | ||||
| __declspec(naked) | ||||
| void TransposeWx8_SSSE3(const uint8* src, int src_stride, | ||||
|   | ||||
							
								
								
									
										394
									
								
								third_party/libyuv/source/row_any.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										394
									
								
								third_party/libyuv/source/row_any.cc
									
									
									
									
										vendored
									
									
								
							| @@ -22,39 +22,6 @@ extern "C" { | ||||
| // Subsampled source needs to be increase by 1 of not even. | ||||
| #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift)) | ||||
|  | ||||
| // Any 4 planes to 1 with yuvconstants | ||||
| #define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)                \ | ||||
|     void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf,   \ | ||||
|                  const uint8* a_buf, uint8* dst_ptr,                           \ | ||||
|                  const struct YuvConstants* yuvconstants,  int width) {        \ | ||||
|       SIMD_ALIGNED(uint8 temp[64 * 5]);                                        \ | ||||
|       memset(temp, 0, 64 * 4);  /* for msan */                                 \ | ||||
|       int r = width & MASK;                                                    \ | ||||
|       int n = width & ~MASK;                                                   \ | ||||
|       if (n > 0) {                                                             \ | ||||
|         ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);        \ | ||||
|       }                                                                        \ | ||||
|       memcpy(temp, y_buf + n, r);                                              \ | ||||
|       memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \ | ||||
|       memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));              \ | ||||
|       memcpy(temp + 192, a_buf + n, r);                                        \ | ||||
|       ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256,            \ | ||||
|                yuvconstants, MASK + 1);                                        \ | ||||
|       memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256,                      \ | ||||
|              SS(r, DUVSHIFT) * BPP);                                           \ | ||||
|     } | ||||
|  | ||||
| #ifdef HAS_I422ALPHATOARGBROW_SSSE3 | ||||
| ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7) | ||||
| #endif | ||||
| #ifdef HAS_I422ALPHATOARGBROW_AVX2 | ||||
| ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_I422ALPHATOARGBROW_NEON | ||||
| ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7) | ||||
| #endif | ||||
| #undef ANY41C | ||||
|  | ||||
| // Any 3 planes to 1. | ||||
| #define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)                 \ | ||||
|     void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf,   \ | ||||
| @@ -73,9 +40,75 @@ ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7) | ||||
|       memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192,                      \ | ||||
|              SS(r, DUVSHIFT) * BPP);                                           \ | ||||
|     } | ||||
| #ifdef HAS_I422TOYUY2ROW_SSE2 | ||||
|  | ||||
| #ifdef HAS_I422TOARGBROW_SSSE3 | ||||
| ANY31(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) | ||||
| #endif | ||||
| #ifdef HAS_I444TOARGBROW_SSSE3 | ||||
| ANY31(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) | ||||
| ANY31(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7) | ||||
| ANY31(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7) | ||||
| ANY31(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7) | ||||
| ANY31(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) | ||||
| ANY31(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) | ||||
| ANY31(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) | ||||
| ANY31(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) | ||||
| ANY31(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7) | ||||
| ANY31(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7) | ||||
| ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) | ||||
| ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) | ||||
| #endif  // HAS_I444TOARGBROW_SSSE3 | ||||
| #ifdef HAS_I422TORGB24ROW_AVX2 | ||||
| ANY31(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15) | ||||
| #endif | ||||
| #ifdef HAS_I422TORAWROW_AVX2 | ||||
| ANY31(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15) | ||||
| #endif | ||||
| #ifdef HAS_J422TOARGBROW_SSSE3 | ||||
| ANY31(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, 1, 0, 4, 7) | ||||
| #endif | ||||
| #ifdef HAS_J422TOARGBROW_AVX2 | ||||
| ANY31(J422ToARGBRow_Any_AVX2, J422ToARGBRow_AVX2, 1, 0, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_I422TOARGBROW_AVX2 | ||||
| ANY31(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_I422TOBGRAROW_AVX2 | ||||
| ANY31(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_I422TORGBAROW_AVX2 | ||||
| ANY31(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_I422TOABGRROW_AVX2 | ||||
| ANY31(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_I444TOARGBROW_AVX2 | ||||
| ANY31(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_I411TOARGBROW_AVX2 | ||||
| ANY31(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_I422TOARGB4444ROW_AVX2 | ||||
| ANY31(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7) | ||||
| #endif | ||||
| #ifdef HAS_I422TOARGB1555ROW_AVX2 | ||||
| ANY31(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7) | ||||
| #endif | ||||
| #ifdef HAS_I422TORGB565ROW_AVX2 | ||||
| ANY31(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7) | ||||
| #endif | ||||
| #ifdef HAS_I422TOARGBROW_NEON | ||||
| ANY31(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) | ||||
| ANY31(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) | ||||
| ANY31(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7) | ||||
| ANY31(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7) | ||||
| ANY31(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7) | ||||
| ANY31(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) | ||||
| ANY31(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) | ||||
| ANY31(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7) | ||||
| ANY31(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) | ||||
| ANY31(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) | ||||
| ANY31(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) | ||||
| #endif | ||||
| #ifdef HAS_I422TOYUY2ROW_NEON | ||||
| ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) | ||||
| @@ -83,91 +116,8 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) | ||||
| #ifdef HAS_I422TOUYVYROW_NEON | ||||
| ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_BLENDPLANEROW_AVX2 | ||||
| ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31) | ||||
| #endif | ||||
| #ifdef HAS_BLENDPLANEROW_SSSE3 | ||||
| ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7) | ||||
| #endif | ||||
| #undef ANY31 | ||||
|  | ||||
| // Note that odd width replication includes 444 due to implementation | ||||
| // on arm that subsamples 444 to 422 internally. | ||||
| // Any 3 planes to 1 with yuvconstants | ||||
| #define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)                \ | ||||
|     void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf,   \ | ||||
|                  uint8* dst_ptr, const struct YuvConstants* yuvconstants,      \ | ||||
|                  int width) {                                                  \ | ||||
|       SIMD_ALIGNED(uint8 temp[64 * 4]);                                        \ | ||||
|       memset(temp, 0, 64 * 3);  /* for YUY2 and msan */                        \ | ||||
|       int r = width & MASK;                                                    \ | ||||
|       int n = width & ~MASK;                                                   \ | ||||
|       if (n > 0) {                                                             \ | ||||
|         ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);               \ | ||||
|       }                                                                        \ | ||||
|       memcpy(temp, y_buf + n, r);                                              \ | ||||
|       memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \ | ||||
|       memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));              \ | ||||
|       if (width & 1) {                                                         \ | ||||
|         temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1];             \ | ||||
|         temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1];           \ | ||||
|       }                                                                        \ | ||||
|       ANY_SIMD(temp, temp + 64, temp + 128, temp + 192,                        \ | ||||
|                yuvconstants, MASK + 1);                                        \ | ||||
|       memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192,                      \ | ||||
|              SS(r, DUVSHIFT) * BPP);                                           \ | ||||
|     } | ||||
|  | ||||
| #ifdef HAS_I422TOARGBROW_SSSE3 | ||||
| ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) | ||||
| #endif | ||||
| #ifdef HAS_I411TOARGBROW_SSSE3 | ||||
| ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7) | ||||
| #endif | ||||
| #ifdef HAS_I444TOARGBROW_SSSE3 | ||||
| ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) | ||||
| ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) | ||||
| ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) | ||||
| ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) | ||||
| ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) | ||||
| ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7) | ||||
| #endif  // HAS_I444TOARGBROW_SSSE3 | ||||
| #ifdef HAS_I422TORGB24ROW_AVX2 | ||||
| ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15) | ||||
| #endif | ||||
| #ifdef HAS_I422TOARGBROW_AVX2 | ||||
| ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_I422TORGBAROW_AVX2 | ||||
| ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_I444TOARGBROW_AVX2 | ||||
| ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_I411TOARGBROW_AVX2 | ||||
| ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_I422TOARGB4444ROW_AVX2 | ||||
| ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7) | ||||
| #endif | ||||
| #ifdef HAS_I422TOARGB1555ROW_AVX2 | ||||
| ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7) | ||||
| #endif | ||||
| #ifdef HAS_I422TORGB565ROW_AVX2 | ||||
| ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7) | ||||
| #endif | ||||
| #ifdef HAS_I422TOARGBROW_NEON | ||||
| ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) | ||||
| ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) | ||||
| ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7) | ||||
| ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) | ||||
| ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) | ||||
| ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) | ||||
| ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) | ||||
| ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) | ||||
| #endif | ||||
| #undef ANY31C | ||||
|  | ||||
| // Any 2 planes to 1. | ||||
| #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)              \ | ||||
|     void NAMEANY(const uint8* y_buf, const uint8* uv_buf,                      \ | ||||
| @@ -186,6 +136,32 @@ ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) | ||||
|       memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \ | ||||
|     } | ||||
|  | ||||
| // Biplanar to RGB. | ||||
| #ifdef HAS_NV12TOARGBROW_SSSE3 | ||||
| ANY21(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7) | ||||
| ANY21(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7) | ||||
| #endif | ||||
| #ifdef HAS_NV12TOARGBROW_AVX2 | ||||
| ANY21(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) | ||||
| ANY21(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_NV12TOARGBROW_NEON | ||||
| ANY21(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) | ||||
| ANY21(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7) | ||||
| #endif | ||||
| #ifdef HAS_NV12TORGB565ROW_SSSE3 | ||||
| ANY21(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) | ||||
| ANY21(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) | ||||
| #endif | ||||
| #ifdef HAS_NV12TORGB565ROW_AVX2 | ||||
| ANY21(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) | ||||
| ANY21(NV21ToRGB565Row_Any_AVX2, NV21ToRGB565Row_AVX2, 1, 1, 2, 2, 15) | ||||
| #endif | ||||
| #ifdef HAS_NV12TORGB565ROW_NEON | ||||
| ANY21(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) | ||||
| ANY21(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, 1, 1, 2, 2, 7) | ||||
| #endif | ||||
|  | ||||
| // Merge functions. | ||||
| #ifdef HAS_MERGEUVROW_SSE2 | ||||
| ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15) | ||||
| @@ -245,55 +221,6 @@ ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7) | ||||
| #endif | ||||
| #undef ANY21 | ||||
|  | ||||
| // Any 2 planes to 1 with yuvconstants | ||||
| #define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)             \ | ||||
|     void NAMEANY(const uint8* y_buf, const uint8* uv_buf,                      \ | ||||
|                  uint8* dst_ptr, const struct YuvConstants* yuvconstants,      \ | ||||
|                  int width) {                                                  \ | ||||
|       SIMD_ALIGNED(uint8 temp[64 * 3]);                                        \ | ||||
|       memset(temp, 0, 64 * 2);  /* for msan */                                 \ | ||||
|       int r = width & MASK;                                                    \ | ||||
|       int n = width & ~MASK;                                                   \ | ||||
|       if (n > 0) {                                                             \ | ||||
|         ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n);                     \ | ||||
|       }                                                                        \ | ||||
|       memcpy(temp, y_buf + n * SBPP, r * SBPP);                                \ | ||||
|       memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2,                       \ | ||||
|              SS(r, UVSHIFT) * SBPP2);                                          \ | ||||
|       ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1);           \ | ||||
|       memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \ | ||||
|     } | ||||
|  | ||||
| // Biplanar to RGB. | ||||
| #ifdef HAS_NV12TOARGBROW_SSSE3 | ||||
| ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7) | ||||
| #endif | ||||
| #ifdef HAS_NV12TOARGBROW_AVX2 | ||||
| ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_NV12TOARGBROW_NEON | ||||
| ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) | ||||
| #endif | ||||
| #ifdef HAS_NV21TOARGBROW_SSSE3 | ||||
| ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7) | ||||
| #endif | ||||
| #ifdef HAS_NV21TOARGBROW_AVX2 | ||||
| ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_NV21TOARGBROW_NEON | ||||
| ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7) | ||||
| #endif | ||||
| #ifdef HAS_NV12TORGB565ROW_SSSE3 | ||||
| ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) | ||||
| #endif | ||||
| #ifdef HAS_NV12TORGB565ROW_AVX2 | ||||
| ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) | ||||
| #endif | ||||
| #ifdef HAS_NV12TORGB565ROW_NEON | ||||
| ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) | ||||
| #endif | ||||
| #undef ANY21C | ||||
|  | ||||
| // Any 1 to 1. | ||||
| #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                     \ | ||||
|     void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) {            \ | ||||
| @@ -325,10 +252,8 @@ ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3) | ||||
| ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3) | ||||
| ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3) | ||||
| #endif | ||||
| #if defined(HAS_ARGBTORGB565ROW_AVX2) | ||||
| ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) | ||||
| #endif | ||||
| #if defined(HAS_ARGBTOARGB4444ROW_AVX2) | ||||
| ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) | ||||
| ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7) | ||||
| ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7) | ||||
| #endif | ||||
| @@ -344,16 +269,15 @@ ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7) | ||||
| #if defined(HAS_I400TOARGBROW_AVX2) | ||||
| ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15) | ||||
| #endif | ||||
| #if defined(HAS_RGB24TOARGBROW_SSSE3) | ||||
| #if defined(HAS_YUY2TOARGBROW_SSSE3) | ||||
| ANY11(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) | ||||
| ANY11(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) | ||||
| ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15) | ||||
| ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15) | ||||
| ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7) | ||||
| ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7) | ||||
| ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7) | ||||
| #endif | ||||
| #if defined(HAS_RAWTORGB24ROW_SSSE3) | ||||
| ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7) | ||||
| #endif | ||||
| #if defined(HAS_RGB565TOARGBROW_AVX2) | ||||
| ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15) | ||||
| #endif | ||||
| @@ -363,6 +287,10 @@ ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15) | ||||
| #if defined(HAS_ARGB4444TOARGBROW_AVX2) | ||||
| ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15) | ||||
| #endif | ||||
| #if defined(HAS_YUY2TOARGBROW_AVX2) | ||||
| ANY11(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31) | ||||
| ANY11(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) | ||||
| #endif | ||||
| #if defined(HAS_ARGBTORGB24ROW_NEON) | ||||
| ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7) | ||||
| ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7) | ||||
| @@ -371,9 +299,8 @@ ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7) | ||||
| ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7) | ||||
| ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7) | ||||
| ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7) | ||||
| #endif | ||||
| #if defined(HAS_RAWTORGB24ROW_NEON) | ||||
| ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7) | ||||
| ANY11(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) | ||||
| ANY11(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) | ||||
| #endif | ||||
| #ifdef HAS_ARGBTOYROW_AVX2 | ||||
| ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31) | ||||
| @@ -454,6 +381,9 @@ ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) | ||||
| #ifdef HAS_ARGBATTENUATEROW_SSSE3 | ||||
| ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3) | ||||
| #endif | ||||
| #ifdef HAS_ARGBATTENUATEROW_SSE2 | ||||
| ANY11(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, 0, 4, 4, 3) | ||||
| #endif | ||||
| #ifdef HAS_ARGBUNATTENUATEROW_SSE2 | ||||
| ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3) | ||||
| #endif | ||||
| @@ -466,44 +396,8 @@ ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7) | ||||
| #ifdef HAS_ARGBATTENUATEROW_NEON | ||||
| ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) | ||||
| #endif | ||||
| #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 | ||||
| ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7) | ||||
| #endif | ||||
| #ifdef HAS_ARGBEXTRACTALPHAROW_NEON | ||||
| ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15) | ||||
| #endif | ||||
| #undef ANY11 | ||||
|  | ||||
| // Any 1 to 1 blended.  Destination is read, modify, write. | ||||
| #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                    \ | ||||
|     void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) {            \ | ||||
|       SIMD_ALIGNED(uint8 temp[128 * 2]);                                       \ | ||||
|       memset(temp, 0, 128 * 2);  /* for YUY2 and msan */                       \ | ||||
|       int r = width & MASK;                                                    \ | ||||
|       int n = width & ~MASK;                                                   \ | ||||
|       if (n > 0) {                                                             \ | ||||
|         ANY_SIMD(src_ptr, dst_ptr, n);                                         \ | ||||
|       }                                                                        \ | ||||
|       memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP);    \ | ||||
|       memcpy(temp + 128, dst_ptr + n * BPP, r * BPP);                          \ | ||||
|       ANY_SIMD(temp, temp + 128, MASK + 1);                                    \ | ||||
|       memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \ | ||||
|     } | ||||
|  | ||||
| #ifdef HAS_ARGBCOPYALPHAROW_AVX2 | ||||
| ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_ARGBCOPYALPHAROW_SSE2 | ||||
| ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7) | ||||
| #endif | ||||
| #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 | ||||
| ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15) | ||||
| #endif | ||||
| #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 | ||||
| ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7) | ||||
| #endif | ||||
| #undef ANY11B | ||||
|  | ||||
| // Any 1 to 1 with parameter. | ||||
| #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK)                          \ | ||||
|     void NAMEANY(const uint8* src_ptr, uint8* dst_ptr,                         \ | ||||
| @@ -546,35 +440,6 @@ ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8*, 4, 4, 3) | ||||
| #endif | ||||
| #undef ANY11P | ||||
|  | ||||
| // Any 1 to 1 with yuvconstants | ||||
| #define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                    \ | ||||
|     void NAMEANY(const uint8* src_ptr, uint8* dst_ptr,                         \ | ||||
|                  const struct YuvConstants* yuvconstants, int width) {         \ | ||||
|       SIMD_ALIGNED(uint8 temp[128 * 2]);                                       \ | ||||
|       memset(temp, 0, 128);  /* for YUY2 and msan */                           \ | ||||
|       int r = width & MASK;                                                    \ | ||||
|       int n = width & ~MASK;                                                   \ | ||||
|       if (n > 0) {                                                             \ | ||||
|         ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n);                           \ | ||||
|       }                                                                        \ | ||||
|       memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP);    \ | ||||
|       ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1);                      \ | ||||
|       memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \ | ||||
|     } | ||||
| #if defined(HAS_YUY2TOARGBROW_SSSE3) | ||||
| ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) | ||||
| ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) | ||||
| #endif | ||||
| #if defined(HAS_YUY2TOARGBROW_AVX2) | ||||
| ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31) | ||||
| ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) | ||||
| #endif | ||||
| #if defined(HAS_YUY2TOARGBROW_NEON) | ||||
| ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) | ||||
| ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) | ||||
| #endif | ||||
| #undef ANY11C | ||||
|  | ||||
| // Any 1 to 1 interpolate.  Takes 2 rows of source via stride. | ||||
| #define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK)                             \ | ||||
|     void NAMEANY(uint8* dst_ptr, const uint8* src_ptr,                         \ | ||||
| @@ -599,11 +464,14 @@ ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31) | ||||
| #ifdef HAS_INTERPOLATEROW_SSSE3 | ||||
| ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15) | ||||
| #endif | ||||
| #ifdef HAS_INTERPOLATEROW_SSE2 | ||||
| ANY11T(InterpolateRow_Any_SSE2, InterpolateRow_SSE2, 1, 1, 15) | ||||
| #endif | ||||
| #ifdef HAS_INTERPOLATEROW_NEON | ||||
| ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15) | ||||
| #endif | ||||
| #ifdef HAS_INTERPOLATEROW_DSPR2 | ||||
| ANY11T(InterpolateRow_Any_DSPR2, InterpolateRow_DSPR2, 1, 1, 3) | ||||
| #ifdef HAS_INTERPOLATEROW_MIPS_DSPR2 | ||||
| ANY11T(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2, 1, 1, 3) | ||||
| #endif | ||||
| #undef ANY11T | ||||
|  | ||||
| @@ -628,6 +496,9 @@ ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31) | ||||
| #ifdef HAS_MIRRORROW_SSSE3 | ||||
| ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15) | ||||
| #endif | ||||
| #ifdef HAS_MIRRORROW_SSE2 | ||||
| ANY11M(MirrorRow_Any_SSE2, MirrorRow_SSE2, 1, 15) | ||||
| #endif | ||||
| #ifdef HAS_MIRRORROW_NEON | ||||
| ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15) | ||||
| #endif | ||||
| @@ -677,25 +548,9 @@ ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3) | ||||
|         ANY_SIMD(src_ptr, dst_u, dst_v, n);                                    \ | ||||
|       }                                                                        \ | ||||
|       memcpy(temp, src_ptr  + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);     \ | ||||
|       /* repeat last 4 bytes for 422 subsampler */                             \ | ||||
|       if ((width & 1) && BPP == 4 && DUVSHIFT == 1) {                          \ | ||||
|       if ((width & 1) && BPP == 4) {  /* repeat last 4 bytes for subsampler */ \ | ||||
|         memcpy(temp + SS(r, UVSHIFT) * BPP,                                    \ | ||||
|                temp + SS(r, UVSHIFT) * BPP - BPP, BPP);                        \ | ||||
|       }                                                                        \ | ||||
|       /* repeat last 4 - 12 bytes for 411 subsampler */                        \ | ||||
|       if (((width & 3) == 1) && BPP == 4 && DUVSHIFT == 2) {                   \ | ||||
|         memcpy(temp + SS(r, UVSHIFT) * BPP,                                    \ | ||||
|                temp + SS(r, UVSHIFT) * BPP - BPP, BPP);                        \ | ||||
|         memcpy(temp + SS(r, UVSHIFT) * BPP + BPP,                              \ | ||||
|                temp + SS(r, UVSHIFT) * BPP - BPP, BPP * 2);                    \ | ||||
|       }                                                                        \ | ||||
|       if (((width & 3) == 2) && BPP == 4 && DUVSHIFT == 2) {                   \ | ||||
|         memcpy(temp + SS(r, UVSHIFT) * BPP,                                    \ | ||||
|                temp + SS(r, UVSHIFT) * BPP - BPP * 2, BPP * 2);                \ | ||||
|       }                                                                        \ | ||||
|       if (((width & 3) == 3) && BPP == 4 && DUVSHIFT == 2) {                   \ | ||||
|         memcpy(temp + SS(r, UVSHIFT) * BPP,                                    \ | ||||
|                temp + SS(r, UVSHIFT) * BPP - BPP, BPP);                        \ | ||||
|                temp + SS(r, UVSHIFT) * BPP - BPP, 4);                          \ | ||||
|       }                                                                        \ | ||||
|       ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1);                        \ | ||||
|       memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT));            \ | ||||
| @@ -711,8 +566,8 @@ ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31) | ||||
| #ifdef HAS_SPLITUVROW_NEON | ||||
| ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15) | ||||
| #endif | ||||
| #ifdef HAS_SPLITUVROW_DSPR2 | ||||
| ANY12(SplitUVRow_Any_DSPR2, SplitUVRow_DSPR2, 0, 2, 0, 15) | ||||
| #ifdef HAS_SPLITUVROW_MIPS_DSPR2 | ||||
| ANY12(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_MIPS_DSPR2, 0, 2, 0, 15) | ||||
| #endif | ||||
| #ifdef HAS_ARGBTOUV444ROW_SSSE3 | ||||
| ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15) | ||||
| @@ -721,12 +576,16 @@ ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15) | ||||
| ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31) | ||||
| ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31) | ||||
| #endif | ||||
| #ifdef HAS_ARGBTOUV422ROW_SSSE3 | ||||
| ANY12(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_SSSE3, 0, 4, 1, 15) | ||||
| #endif | ||||
| #ifdef HAS_YUY2TOUV422ROW_SSE2 | ||||
| ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15) | ||||
| ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15) | ||||
| #endif | ||||
| #ifdef HAS_YUY2TOUV422ROW_NEON | ||||
| ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7) | ||||
| ANY12(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON, 0, 4, 1, 15) | ||||
| ANY12(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 0, 4, 2, 31) | ||||
| ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15) | ||||
| ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) | ||||
| @@ -748,11 +607,11 @@ ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) | ||||
|       memcpy(temp, src_ptr  + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);     \ | ||||
|       memcpy(temp + 128, src_ptr  + src_stride_ptr + (n >> UVSHIFT) * BPP,     \ | ||||
|              SS(r, UVSHIFT) * BPP);                                            \ | ||||
|       if ((width & 1) && UVSHIFT == 0) {  /* repeat last pixel for subsample */\ | ||||
|       if ((width & 1) && BPP == 4) {  /* repeat last 4 bytes for subsampler */ \ | ||||
|         memcpy(temp + SS(r, UVSHIFT) * BPP,                                    \ | ||||
|                temp + SS(r, UVSHIFT) * BPP - BPP, BPP);                        \ | ||||
|                temp + SS(r, UVSHIFT) * BPP - BPP, 4);                          \ | ||||
|         memcpy(temp + 128 + SS(r, UVSHIFT) * BPP,                              \ | ||||
|                temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP);                  \ | ||||
|                temp + 128 + SS(r, UVSHIFT) * BPP - BPP, 4);                    \ | ||||
|       }                                                                        \ | ||||
|       ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1);                   \ | ||||
|       memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1));                          \ | ||||
| @@ -762,9 +621,6 @@ ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) | ||||
| #ifdef HAS_ARGBTOUVROW_AVX2 | ||||
| ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31) | ||||
| #endif | ||||
| #ifdef HAS_ARGBTOUVJROW_AVX2 | ||||
| ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31) | ||||
| #endif | ||||
| #ifdef HAS_ARGBTOUVROW_SSSE3 | ||||
| ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15) | ||||
| ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15) | ||||
|   | ||||
							
								
								
									
										1235
									
								
								third_party/libyuv/source/row_common.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1235
									
								
								third_party/libyuv/source/row_common.cc
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										2059
									
								
								third_party/libyuv/source/row_gcc.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2059
									
								
								third_party/libyuv/source/row_gcc.cc
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										155
									
								
								third_party/libyuv/source/row_mips.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										155
									
								
								third_party/libyuv/source/row_mips.cc
									
									
									
									
										vendored
									
									
								
							| @@ -375,12 +375,12 @@ void CopyRow_MIPS(const uint8* src, uint8* dst, int count) { | ||||
| } | ||||
| #endif  // HAS_COPYROW_MIPS | ||||
|  | ||||
| // DSPR2 functions | ||||
| // MIPS DSPR2 functions | ||||
| #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \ | ||||
|     (__mips_dsp_rev >= 2) && \ | ||||
|     (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6) | ||||
|  | ||||
| void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, | ||||
| void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, | ||||
|                            int width) { | ||||
|   __asm__ __volatile__ ( | ||||
|     ".set push                                     \n" | ||||
| @@ -389,6 +389,7 @@ void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, | ||||
|     "blez            $t4, 2f                       \n" | ||||
|     " andi           %[width], %[width], 0xf       \n"  // residual | ||||
|  | ||||
|     ".p2align        2                             \n" | ||||
|   "1:                                              \n" | ||||
|     "addiu           $t4, $t4, -1                  \n" | ||||
|     "lw              $t0, 0(%[src_uv])             \n"  // V1 | U1 | V0 | U0 | ||||
| @@ -446,7 +447,7 @@ void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) { | ||||
| void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) { | ||||
|   __asm__ __volatile__ ( | ||||
|     ".set push                             \n" | ||||
|     ".set noreorder                        \n" | ||||
| @@ -456,6 +457,7 @@ void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) { | ||||
|     "blez      $t4, 2f                     \n" | ||||
|     " addu     %[src], %[src], %[width]    \n"  // src += width | ||||
|  | ||||
|     ".p2align  2                           \n" | ||||
|    "1:                                     \n" | ||||
|     "lw        $t0, -16(%[src])            \n"  // |3|2|1|0| | ||||
|     "lw        $t1, -12(%[src])            \n"  // |7|6|5|4| | ||||
| @@ -496,10 +498,10 @@ void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) { | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, | ||||
| void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, | ||||
|                             int width) { | ||||
|   int x; | ||||
|   int y; | ||||
|   int x = 0; | ||||
|   int y = 0; | ||||
|   __asm__ __volatile__ ( | ||||
|     ".set push                                    \n" | ||||
|     ".set noreorder                               \n" | ||||
| @@ -510,6 +512,7 @@ void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, | ||||
|     "blez            %[x], 2f                     \n" | ||||
|     " addu           %[src_uv], %[src_uv], $t4    \n" | ||||
|  | ||||
|     ".p2align        2                            \n" | ||||
|    "1:                                            \n" | ||||
|     "lw              $t0, -32(%[src_uv])          \n"  // |3|2|1|0| | ||||
|     "lw              $t1, -28(%[src_uv])          \n"  // |7|6|5|4| | ||||
| @@ -579,7 +582,7 @@ void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, | ||||
|         [dst_u] "+r" (dst_u), | ||||
|         [dst_v] "+r" (dst_v), | ||||
|         [x] "=&r" (x), | ||||
|         [y] "=&r" (y) | ||||
|         [y] "+r" (y) | ||||
|       : [width] "r" (width) | ||||
|       : "t0", "t1", "t2", "t3", "t4", | ||||
|       "t5", "t7", "t8", "t9" | ||||
| @@ -593,7 +596,7 @@ void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, | ||||
| // t8 = | 0 | G1 | 0 | g1 | | ||||
| // t2 = | 0 | R0 | 0 | r0 | | ||||
| // t1 = | 0 | R1 | 0 | r1 | | ||||
| #define YUVTORGB                                                               \ | ||||
| #define I422ToTransientMipsRGB                                                 \ | ||||
|       "lw                $t0, 0(%[y_buf])       \n"                            \ | ||||
|       "lhu               $t1, 0(%[u_buf])       \n"                            \ | ||||
|       "lhu               $t2, 0(%[v_buf])       \n"                            \ | ||||
| @@ -652,12 +655,10 @@ void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, | ||||
|       "addu.ph           $t2, $t2, $s5          \n"                            \ | ||||
|       "addu.ph           $t1, $t1, $s5          \n" | ||||
|  | ||||
| // TODO(fbarchard): accept yuv conversion constants. | ||||
| void I422ToARGBRow_DSPR2(const uint8* y_buf, | ||||
| void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf, | ||||
|                               const uint8* u_buf, | ||||
|                               const uint8* v_buf, | ||||
|                               uint8* rgb_buf, | ||||
|                          const struct YuvConstants* yuvconstants, | ||||
|                               int width) { | ||||
|   __asm__ __volatile__ ( | ||||
|     ".set push                                \n" | ||||
| @@ -672,8 +673,9 @@ void I422ToARGBRow_DSPR2(const uint8* y_buf, | ||||
|     "lui               $s6, 0xff00            \n" | ||||
|     "ori               $s6, 0xff00            \n"  // |ff|00|ff|00|ff| | ||||
|  | ||||
|     ".p2align          2                      \n" | ||||
|    "1:                                        \n" | ||||
|       YUVTORGB | ||||
|       I422ToTransientMipsRGB | ||||
| // Arranging into argb format | ||||
|     "precr.qb.ph       $t4, $t8, $t4          \n"  // |G1|g1|B1|b1| | ||||
|     "precr.qb.ph       $t5, $t9, $t5          \n"  // |G0|g0|B0|b0| | ||||
| @@ -715,8 +717,134 @@ void I422ToARGBRow_DSPR2(const uint8* y_buf, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf, | ||||
|                               const uint8* u_buf, | ||||
|                               const uint8* v_buf, | ||||
|                               uint8* rgb_buf, | ||||
|                               int width) { | ||||
|   __asm__ __volatile__ ( | ||||
|     ".set push                                \n" | ||||
|     ".set noreorder                           \n" | ||||
|     "beqz              %[width], 2f           \n" | ||||
|     " repl.ph          $s0, 74                \n"  // |YG|YG| = |74|74| | ||||
|     "repl.ph           $s1, -25               \n"  // |UG|UG| = |-25|-25| | ||||
|     "repl.ph           $s2, -52               \n"  // |VG|VG| = |-52|-52| | ||||
|     "repl.ph           $s3, 102               \n"  // |VR|VR| = |102|102| | ||||
|     "repl.ph           $s4, 16                \n"  // |0|16|0|16| | ||||
|     "repl.ph           $s5, 128               \n"  // |128|128| | ||||
|     "lui               $s6, 0xff00            \n" | ||||
|     "ori               $s6, 0xff00            \n"  // |ff|00|ff|00| | ||||
|  | ||||
|     ".p2align          2                       \n" | ||||
|    "1:                                         \n" | ||||
|       I422ToTransientMipsRGB | ||||
| // Arranging into abgr format | ||||
|     "precr.qb.ph      $t0, $t8, $t1           \n"  // |G1|g1|R1|r1| | ||||
|     "precr.qb.ph      $t3, $t9, $t2           \n"  // |G0|g0|R0|r0| | ||||
|     "precrq.qb.ph     $t8, $t0, $t3           \n"  // |G1|R1|G0|R0| | ||||
|     "precr.qb.ph      $t9, $t0, $t3           \n"  // |g1|r1|g0|r0| | ||||
|  | ||||
|     "precr.qb.ph       $t2, $t4, $t5          \n"  // |B1|b1|B0|b0| | ||||
|     "addiu             %[width], -4           \n" | ||||
|     "addiu             %[y_buf], 4            \n" | ||||
|     "preceu.ph.qbla    $t1, $t2               \n"  // |0 |B1|0 |B0| | ||||
|     "preceu.ph.qbra    $t2, $t2               \n"  // |0 |b1|0 |b0| | ||||
|     "or                $t1, $t1, $s6          \n"  // |ff|B1|ff|B0| | ||||
|     "or                $t2, $t2, $s6          \n"  // |ff|b1|ff|b0| | ||||
|     "precrq.ph.w       $t0, $t2, $t9          \n"  // |ff|b1|g1|r1| | ||||
|     "precrq.ph.w       $t3, $t1, $t8          \n"  // |ff|B1|G1|R1| | ||||
|     "sll               $t9, $t9, 16           \n" | ||||
|     "sll               $t8, $t8, 16           \n" | ||||
|     "packrl.ph         $t2, $t2, $t9          \n"  // |ff|b0|g0|r0| | ||||
|     "packrl.ph         $t1, $t1, $t8          \n"  // |ff|B0|G0|R0| | ||||
| // Store results. | ||||
|     "sw                $t2, 0(%[rgb_buf])     \n" | ||||
|     "sw                $t0, 4(%[rgb_buf])     \n" | ||||
|     "sw                $t1, 8(%[rgb_buf])     \n" | ||||
|     "sw                $t3, 12(%[rgb_buf])    \n" | ||||
|     "bnez              %[width], 1b           \n" | ||||
|     " addiu            %[rgb_buf], 16         \n" | ||||
|    "2:                                        \n" | ||||
|     ".set pop                                 \n" | ||||
|       :[y_buf] "+r" (y_buf), | ||||
|        [u_buf] "+r" (u_buf), | ||||
|        [v_buf] "+r" (v_buf), | ||||
|        [width] "+r" (width), | ||||
|        [rgb_buf] "+r" (rgb_buf) | ||||
|       : | ||||
|       : "t0", "t1",  "t2", "t3",  "t4", "t5", | ||||
|       "t6", "t7", "t8", "t9", | ||||
|       "s0", "s1", "s2", "s3", | ||||
|       "s4", "s5", "s6" | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf, | ||||
|                               const uint8* u_buf, | ||||
|                               const uint8* v_buf, | ||||
|                               uint8* rgb_buf, | ||||
|                               int width) { | ||||
|   __asm__ __volatile__ ( | ||||
|     ".set push                                \n" | ||||
|     ".set noreorder                           \n" | ||||
|     "beqz              %[width], 2f           \n" | ||||
|     " repl.ph          $s0, 74                \n"  // |YG|YG| = |74 |74 | | ||||
|     "repl.ph           $s1, -25               \n"  // |UG|UG| = |-25|-25| | ||||
|     "repl.ph           $s2, -52               \n"  // |VG|VG| = |-52|-52| | ||||
|     "repl.ph           $s3, 102               \n"  // |VR|VR| = |102|102| | ||||
|     "repl.ph           $s4, 16                \n"  // |0|16|0|16| | ||||
|     "repl.ph           $s5, 128               \n"  // |128|128| | ||||
|     "lui               $s6, 0xff              \n" | ||||
|     "ori               $s6, 0xff              \n"  // |00|ff|00|ff| | ||||
|  | ||||
|     ".p2align          2                      \n" | ||||
|    "1:                                        \n" | ||||
|       I422ToTransientMipsRGB | ||||
|       // Arranging into bgra format | ||||
|     "precr.qb.ph       $t4, $t4, $t8          \n"  // |B1|b1|G1|g1| | ||||
|     "precr.qb.ph       $t5, $t5, $t9          \n"  // |B0|b0|G0|g0| | ||||
|     "precrq.qb.ph      $t8, $t4, $t5          \n"  // |B1|G1|B0|G0| | ||||
|     "precr.qb.ph       $t9, $t4, $t5          \n"  // |b1|g1|b0|g0| | ||||
|  | ||||
|     "precr.qb.ph       $t2, $t1, $t2          \n"  // |R1|r1|R0|r0| | ||||
|     "addiu             %[width], -4           \n" | ||||
|     "addiu             %[y_buf], 4            \n" | ||||
|     "preceu.ph.qbla    $t1, $t2               \n"  // |0 |R1|0 |R0| | ||||
|     "preceu.ph.qbra    $t2, $t2               \n"  // |0 |r1|0 |r0| | ||||
|     "sll               $t1, $t1, 8            \n"  // |R1|0 |R0|0 | | ||||
|     "sll               $t2, $t2, 8            \n"  // |r1|0 |r0|0 | | ||||
|     "or                $t1, $t1, $s6          \n"  // |R1|ff|R0|ff| | ||||
|     "or                $t2, $t2, $s6          \n"  // |r1|ff|r0|ff| | ||||
|     "precrq.ph.w       $t0, $t9, $t2          \n"  // |b1|g1|r1|ff| | ||||
|     "precrq.ph.w       $t3, $t8, $t1          \n"  // |B1|G1|R1|ff| | ||||
|     "sll               $t1, $t1, 16           \n" | ||||
|     "sll               $t2, $t2, 16           \n" | ||||
|     "packrl.ph         $t2, $t9, $t2          \n"  // |b0|g0|r0|ff| | ||||
|     "packrl.ph         $t1, $t8, $t1          \n"  // |B0|G0|R0|ff| | ||||
| // Store results. | ||||
|     "sw                $t2, 0(%[rgb_buf])     \n" | ||||
|     "sw                $t0, 4(%[rgb_buf])     \n" | ||||
|     "sw                $t1, 8(%[rgb_buf])     \n" | ||||
|     "sw                $t3, 12(%[rgb_buf])    \n" | ||||
|     "bnez              %[width], 1b           \n" | ||||
|     " addiu            %[rgb_buf], 16         \n" | ||||
|    "2:                                        \n" | ||||
|     ".set pop                                 \n" | ||||
|       :[y_buf] "+r" (y_buf), | ||||
|        [u_buf] "+r" (u_buf), | ||||
|        [v_buf] "+r" (v_buf), | ||||
|        [width] "+r" (width), | ||||
|        [rgb_buf] "+r" (rgb_buf) | ||||
|       : | ||||
|       : "t0", "t1",  "t2", "t3",  "t4", "t5", | ||||
|       "t6", "t7", "t8", "t9", | ||||
|       "s0", "s1", "s2", "s3", | ||||
|       "s4", "s5", "s6" | ||||
|   ); | ||||
| } | ||||
|  | ||||
| // Bilinear filter 8x2 -> 8x1 | ||||
| void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr, | ||||
| void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, | ||||
|                                ptrdiff_t src_stride, int dst_width, | ||||
|                                int source_y_fraction) { | ||||
|     int y0_fraction = 256 - source_y_fraction; | ||||
| @@ -729,6 +857,7 @@ void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr, | ||||
|      "replv.ph          $t0, %[y0_fraction]               \n" | ||||
|      "replv.ph          $t1, %[source_y_fraction]         \n" | ||||
|  | ||||
|     ".p2align           2                                 \n" | ||||
|    "1:                                                    \n" | ||||
|      "lw                $t2, 0(%[src_ptr])                \n" | ||||
|      "lw                $t3, 0(%[src_ptr1])               \n" | ||||
|   | ||||
							
								
								
									
										795
									
								
								third_party/libyuv/source/row_neon.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										795
									
								
								third_party/libyuv/source/row_neon.cc
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										854
									
								
								third_party/libyuv/source/row_neon64.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										854
									
								
								third_party/libyuv/source/row_neon64.cc
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										2212
									
								
								third_party/libyuv/source/row_win.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2212
									
								
								third_party/libyuv/source/row_win.cc
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										146
									
								
								third_party/libyuv/source/row_x86.asm
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										146
									
								
								third_party/libyuv/source/row_x86.asm
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,146 @@ | ||||
| ; | ||||
| ; Copyright 2012 The LibYuv Project Authors. All rights reserved. | ||||
| ; | ||||
| ; Use of this source code is governed by a BSD-style license | ||||
| ; that can be found in the LICENSE file in the root of the source | ||||
| ; tree. An additional intellectual property rights grant can be found | ||||
| ; in the file PATENTS. All contributing project authors may | ||||
| ; be found in the AUTHORS file in the root of the source tree. | ||||
| ; | ||||
|  | ||||
| %ifdef __YASM_VERSION_ID__ | ||||
| %if __YASM_VERSION_ID__ < 01020000h | ||||
| %error AVX2 is supported only by yasm 1.2.0 or later. | ||||
| %endif | ||||
| %endif | ||||
| %include "x86inc.asm" | ||||
|  | ||||
| SECTION .text | ||||
|  | ||||
| ; cglobal numeric constants are parameters, gpr regs, mm regs | ||||
|  | ||||
| ; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) | ||||
|  | ||||
| %macro YUY2TOYROW 2-3 | ||||
| cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix | ||||
| %ifidn %1,YUY2 | ||||
|     pcmpeqb    m2, m2, m2        ; generate mask 0x00ff00ff | ||||
|     psrlw      m2, m2, 8 | ||||
| %endif | ||||
|  | ||||
|     ALIGN      4 | ||||
| .convertloop: | ||||
|     mov%2      m0, [src_yuy2q] | ||||
|     mov%2      m1, [src_yuy2q + mmsize] | ||||
|     lea        src_yuy2q, [src_yuy2q + mmsize * 2] | ||||
| %ifidn %1,YUY2 | ||||
|     pand       m0, m0, m2   ; YUY2 even bytes are Y | ||||
|     pand       m1, m1, m2 | ||||
| %else | ||||
|     psrlw      m0, m0, 8    ; UYVY odd bytes are Y | ||||
|     psrlw      m1, m1, 8 | ||||
| %endif | ||||
|     packuswb   m0, m0, m1 | ||||
| %if cpuflag(AVX2) | ||||
|     vpermq     m0, m0, 0xd8 | ||||
| %endif | ||||
|     sub        pixd, mmsize | ||||
|     mov%2      [dst_yq], m0 | ||||
|     lea        dst_yq, [dst_yq + mmsize] | ||||
|     jg         .convertloop | ||||
|     REP_RET | ||||
| %endmacro | ||||
|  | ||||
| ; TODO(fbarchard): Remove MMX.  Add SSSE3 pshufb version. | ||||
| INIT_MMX MMX | ||||
| YUY2TOYROW YUY2,a, | ||||
| YUY2TOYROW YUY2,u,_Unaligned | ||||
| YUY2TOYROW UYVY,a, | ||||
| YUY2TOYROW UYVY,u,_Unaligned | ||||
| INIT_XMM SSE2 | ||||
| YUY2TOYROW YUY2,a, | ||||
| YUY2TOYROW YUY2,u,_Unaligned | ||||
| YUY2TOYROW UYVY,a, | ||||
| YUY2TOYROW UYVY,u,_Unaligned | ||||
| INIT_YMM AVX2 | ||||
| YUY2TOYROW YUY2,a, | ||||
| YUY2TOYROW UYVY,a, | ||||
|  | ||||
| ; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) | ||||
|  | ||||
| %macro SplitUVRow 1-2 | ||||
| cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix | ||||
|     pcmpeqb    m4, m4, m4        ; generate mask 0x00ff00ff | ||||
|     psrlw      m4, m4, 8 | ||||
|     sub        dst_vq, dst_uq | ||||
|  | ||||
|     ALIGN      4 | ||||
| .convertloop: | ||||
|     mov%1      m0, [src_uvq] | ||||
|     mov%1      m1, [src_uvq + mmsize] | ||||
|     lea        src_uvq, [src_uvq + mmsize * 2] | ||||
|     psrlw      m2, m0, 8         ; odd bytes | ||||
|     psrlw      m3, m1, 8 | ||||
|     pand       m0, m0, m4        ; even bytes | ||||
|     pand       m1, m1, m4 | ||||
|     packuswb   m0, m0, m1 | ||||
|     packuswb   m2, m2, m3 | ||||
| %if cpuflag(AVX2) | ||||
|     vpermq     m0, m0, 0xd8 | ||||
|     vpermq     m2, m2, 0xd8 | ||||
| %endif | ||||
|     mov%1      [dst_uq], m0 | ||||
|     mov%1      [dst_uq + dst_vq], m2 | ||||
|     lea        dst_uq, [dst_uq + mmsize] | ||||
|     sub        pixd, mmsize | ||||
|     jg         .convertloop | ||||
|     REP_RET | ||||
| %endmacro | ||||
|  | ||||
| INIT_MMX MMX | ||||
| SplitUVRow a, | ||||
| SplitUVRow u,_Unaligned | ||||
| INIT_XMM SSE2 | ||||
| SplitUVRow a, | ||||
| SplitUVRow u,_Unaligned | ||||
| INIT_YMM AVX2 | ||||
| SplitUVRow a, | ||||
|  | ||||
| ; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, | ||||
| ;                      int width); | ||||
|  | ||||
| %macro MergeUVRow_ 1-2 | ||||
| cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix | ||||
|     sub        src_vq, src_uq | ||||
|  | ||||
|     ALIGN      4 | ||||
| .convertloop: | ||||
|     mov%1      m0, [src_uq] | ||||
|     mov%1      m1, [src_vq] | ||||
|     lea        src_uq, [src_uq + mmsize] | ||||
|     punpcklbw  m2, m0, m1       // first 8 UV pairs | ||||
|     punpckhbw  m0, m0, m1       // next 8 UV pairs | ||||
| %if cpuflag(AVX2) | ||||
|     vperm2i128 m1, m2, m0, 0x20  // low 128 of ymm2 and low 128 of ymm0 | ||||
|     vperm2i128 m2, m2, m0, 0x31  // high 128 of ymm2 and high 128 of ymm0 | ||||
|     mov%1      [dst_uvq], m1 | ||||
|     mov%1      [dst_uvq + mmsize], m2 | ||||
| %else | ||||
|     mov%1      [dst_uvq], m2 | ||||
|     mov%1      [dst_uvq + mmsize], m0 | ||||
| %endif | ||||
|     lea        dst_uvq, [dst_uvq + mmsize * 2] | ||||
|     sub        pixd, mmsize | ||||
|     jg         .convertloop | ||||
|     REP_RET | ||||
| %endmacro | ||||
|  | ||||
| INIT_MMX MMX | ||||
| MergeUVRow_ a, | ||||
| MergeUVRow_ u,_Unaligned | ||||
| INIT_XMM SSE2 | ||||
| MergeUVRow_ a, | ||||
| MergeUVRow_ u,_Unaligned | ||||
| INIT_YMM AVX2 | ||||
| MergeUVRow_ a, | ||||
|  | ||||
							
								
								
									
										153
									
								
								third_party/libyuv/source/scale.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										153
									
								
								third_party/libyuv/source/scale.cc
									
									
									
									
										vendored
									
									
								
							| @@ -61,15 +61,15 @@ static void ScalePlaneDown2(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_SCALEROWDOWN2_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSSE3 : | ||||
|         (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3 : | ||||
|         ScaleRowDown2Box_Any_SSSE3); | ||||
| #if defined(HAS_SCALEROWDOWN2_SSE2) | ||||
|   if (TestCpuFlag(kCpuHasSSE2)) { | ||||
|     ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSE2 : | ||||
|         (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSE2 : | ||||
|         ScaleRowDown2Box_Any_SSE2); | ||||
|     if (IS_ALIGNED(dst_width, 16)) { | ||||
|       ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSSE3 : | ||||
|           (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3 : | ||||
|           ScaleRowDown2Box_SSSE3); | ||||
|       ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 : | ||||
|           (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 : | ||||
|           ScaleRowDown2Box_SSE2); | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| @@ -85,12 +85,12 @@ static void ScalePlaneDown2(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_SCALEROWDOWN2_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) && | ||||
| #if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) && | ||||
|       IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && | ||||
|       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { | ||||
|     ScaleRowDown2 = filtering ? | ||||
|         ScaleRowDown2Box_DSPR2 : ScaleRowDown2_DSPR2; | ||||
|         ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
| @@ -135,12 +135,12 @@ static void ScalePlaneDown2_16(int src_width, int src_height, | ||||
|         ScaleRowDown2Box_16_SSE2); | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_SCALEROWDOWN2_16_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) && | ||||
| #if defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) && | ||||
|       IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && | ||||
|       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { | ||||
|     ScaleRowDown2 = filtering ? | ||||
|         ScaleRowDown2Box_16_DSPR2 : ScaleRowDown2_16_DSPR2; | ||||
|         ScaleRowDown2Box_16_MIPS_DSPR2 : ScaleRowDown2_16_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
| @@ -182,12 +182,12 @@ static void ScalePlaneDown4(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_SCALEROWDOWN4_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
| #if defined(HAS_SCALEROWDOWN4_SSE2) | ||||
|   if (TestCpuFlag(kCpuHasSSE2)) { | ||||
|     ScaleRowDown4 = filtering ? | ||||
|         ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3; | ||||
|         ScaleRowDown4Box_Any_SSE2 : ScaleRowDown4_Any_SSE2; | ||||
|     if (IS_ALIGNED(dst_width, 8)) { | ||||
|       ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3; | ||||
|       ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| @@ -200,12 +200,12 @@ static void ScalePlaneDown4(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_SCALEROWDOWN4_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) && | ||||
| #if defined(HAS_SCALEROWDOWN4_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) && | ||||
|       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && | ||||
|       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { | ||||
|     ScaleRowDown4 = filtering ? | ||||
|         ScaleRowDown4Box_DSPR2 : ScaleRowDown4_DSPR2; | ||||
|         ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
| @@ -245,12 +245,12 @@ static void ScalePlaneDown4_16(int src_width, int src_height, | ||||
|         ScaleRowDown4_16_SSE2; | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_SCALEROWDOWN4_16_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) && | ||||
| #if defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) && | ||||
|       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && | ||||
|       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { | ||||
|     ScaleRowDown4 = filtering ? | ||||
|         ScaleRowDown4Box_16_DSPR2 : ScaleRowDown4_16_DSPR2; | ||||
|         ScaleRowDown4Box_16_MIPS_DSPR2 : ScaleRowDown4_16_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
| @@ -325,16 +325,16 @@ static void ScalePlaneDown34(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_SCALEROWDOWN34_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) && | ||||
| #if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) && | ||||
|       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && | ||||
|       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { | ||||
|     if (!filtering) { | ||||
|       ScaleRowDown34_0 = ScaleRowDown34_DSPR2; | ||||
|       ScaleRowDown34_1 = ScaleRowDown34_DSPR2; | ||||
|       ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2; | ||||
|       ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2; | ||||
|     } else { | ||||
|       ScaleRowDown34_0 = ScaleRowDown34_0_Box_DSPR2; | ||||
|       ScaleRowDown34_1 = ScaleRowDown34_1_Box_DSPR2; | ||||
|       ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2; | ||||
|       ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| @@ -404,16 +404,16 @@ static void ScalePlaneDown34_16(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_SCALEROWDOWN34_16_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) && | ||||
| #if defined(HAS_SCALEROWDOWN34_16_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) && | ||||
|       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && | ||||
|       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { | ||||
|     if (!filtering) { | ||||
|       ScaleRowDown34_0 = ScaleRowDown34_16_DSPR2; | ||||
|       ScaleRowDown34_1 = ScaleRowDown34_16_DSPR2; | ||||
|       ScaleRowDown34_0 = ScaleRowDown34_16_MIPS_DSPR2; | ||||
|       ScaleRowDown34_1 = ScaleRowDown34_16_MIPS_DSPR2; | ||||
|     } else { | ||||
|       ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_DSPR2; | ||||
|       ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_DSPR2; | ||||
|       ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_MIPS_DSPR2; | ||||
|       ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_MIPS_DSPR2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| @@ -517,16 +517,16 @@ static void ScalePlaneDown38(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_SCALEROWDOWN38_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) && | ||||
| #if defined(HAS_SCALEROWDOWN38_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) && | ||||
|       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && | ||||
|       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { | ||||
|     if (!filtering) { | ||||
|       ScaleRowDown38_3 = ScaleRowDown38_DSPR2; | ||||
|       ScaleRowDown38_2 = ScaleRowDown38_DSPR2; | ||||
|       ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2; | ||||
|       ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2; | ||||
|     } else { | ||||
|       ScaleRowDown38_3 = ScaleRowDown38_3_Box_DSPR2; | ||||
|       ScaleRowDown38_2 = ScaleRowDown38_2_Box_DSPR2; | ||||
|       ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2; | ||||
|       ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| @@ -595,16 +595,16 @@ static void ScalePlaneDown38_16(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_SCALEROWDOWN38_16_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) && | ||||
| #if defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) && | ||||
|       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && | ||||
|       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { | ||||
|     if (!filtering) { | ||||
|       ScaleRowDown38_3 = ScaleRowDown38_16_DSPR2; | ||||
|       ScaleRowDown38_2 = ScaleRowDown38_16_DSPR2; | ||||
|       ScaleRowDown38_3 = ScaleRowDown38_16_MIPS_DSPR2; | ||||
|       ScaleRowDown38_2 = ScaleRowDown38_16_MIPS_DSPR2; | ||||
|     } else { | ||||
|       ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_DSPR2; | ||||
|       ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_DSPR2; | ||||
|       ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_MIPS_DSPR2; | ||||
|       ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_MIPS_DSPR2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| @@ -659,6 +659,7 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, | ||||
|   int i; | ||||
|   int scaletbl[2]; | ||||
|   int minboxwidth = dx >> 16; | ||||
|   int* scaleptr = scaletbl - minboxwidth; | ||||
|   int boxwidth; | ||||
|   scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight); | ||||
|   scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight); | ||||
| @@ -666,8 +667,7 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, | ||||
|     int ix = x >> 16; | ||||
|     x += dx; | ||||
|     boxwidth = MIN1((x >> 16) - ix); | ||||
|     *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * | ||||
|         scaletbl[boxwidth - minboxwidth] >> 16; | ||||
|     *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16; | ||||
|   } | ||||
| } | ||||
|  | ||||
| @@ -676,6 +676,7 @@ static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx, | ||||
|   int i; | ||||
|   int scaletbl[2]; | ||||
|   int minboxwidth = dx >> 16; | ||||
|   int* scaleptr = scaletbl - minboxwidth; | ||||
|   int boxwidth; | ||||
|   scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight); | ||||
|   scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight); | ||||
| @@ -683,8 +684,8 @@ static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx, | ||||
|     int ix = x >> 16; | ||||
|     x += dx; | ||||
|     boxwidth = MIN1((x >> 16) - ix); | ||||
|     *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) * | ||||
|         scaletbl[boxwidth - minboxwidth]  >> 16; | ||||
|     *dst_ptr++ = | ||||
|         SumPixels_16(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16; | ||||
|   } | ||||
| } | ||||
|  | ||||
| @@ -874,6 +875,14 @@ void ScalePlaneBilinearDown(int src_width, int src_height, | ||||
|              &x, &y, &dx, &dy); | ||||
|   src_width = Abs(src_width); | ||||
|  | ||||
| #if defined(HAS_INTERPOLATEROW_SSE2) | ||||
|   if (TestCpuFlag(kCpuHasSSE2)) { | ||||
|     InterpolateRow = InterpolateRow_Any_SSE2; | ||||
|     if (IS_ALIGNED(src_width, 16)) { | ||||
|       InterpolateRow = InterpolateRow_SSE2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     InterpolateRow = InterpolateRow_Any_SSSE3; | ||||
| @@ -898,11 +907,11 @@ void ScalePlaneBilinearDown(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2)) { | ||||
|     InterpolateRow = InterpolateRow_Any_DSPR2; | ||||
| #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2)) { | ||||
|     InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; | ||||
|     if (IS_ALIGNED(src_width, 4)) { | ||||
|       InterpolateRow = InterpolateRow_DSPR2; | ||||
|       InterpolateRow = InterpolateRow_MIPS_DSPR2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| @@ -1002,11 +1011,11 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_16_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2)) { | ||||
|     InterpolateRow = InterpolateRow_Any_16_DSPR2; | ||||
| #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2)) { | ||||
|     InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2; | ||||
|     if (IS_ALIGNED(src_width, 4)) { | ||||
|       InterpolateRow = InterpolateRow_16_DSPR2; | ||||
|       InterpolateRow = InterpolateRow_16_MIPS_DSPR2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| @@ -1063,6 +1072,14 @@ void ScalePlaneBilinearUp(int src_width, int src_height, | ||||
|              &x, &y, &dx, &dy); | ||||
|   src_width = Abs(src_width); | ||||
|  | ||||
| #if defined(HAS_INTERPOLATEROW_SSE2) | ||||
|   if (TestCpuFlag(kCpuHasSSE2)) { | ||||
|     InterpolateRow = InterpolateRow_Any_SSE2; | ||||
|     if (IS_ALIGNED(dst_width, 16)) { | ||||
|       InterpolateRow = InterpolateRow_SSE2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     InterpolateRow = InterpolateRow_Any_SSSE3; | ||||
| @@ -1087,11 +1104,11 @@ void ScalePlaneBilinearUp(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2)) { | ||||
|     InterpolateRow = InterpolateRow_Any_DSPR2; | ||||
| #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2)) { | ||||
|     InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; | ||||
|     if (IS_ALIGNED(dst_width, 4)) { | ||||
|       InterpolateRow = InterpolateRow_DSPR2; | ||||
|       InterpolateRow = InterpolateRow_MIPS_DSPR2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| @@ -1226,11 +1243,11 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_16_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2)) { | ||||
|     InterpolateRow = InterpolateRow_Any_16_DSPR2; | ||||
| #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2)) { | ||||
|     InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2; | ||||
|     if (IS_ALIGNED(dst_width, 4)) { | ||||
|       InterpolateRow = InterpolateRow_16_DSPR2; | ||||
|       InterpolateRow = InterpolateRow_16_MIPS_DSPR2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
|   | ||||
							
								
								
									
										35
									
								
								third_party/libyuv/source/scale_any.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										35
									
								
								third_party/libyuv/source/scale_any.cc
									
									
									
									
										vendored
									
									
								
							| @@ -55,29 +55,12 @@ CANY(ScaleARGBFilterCols_Any_NEON, ScaleARGBFilterCols_NEON, | ||||
|                      dst_ptr + n * BPP, r);                                    \ | ||||
|     } | ||||
|  | ||||
| // Fixed scale down for odd source width.  Used by I420Blend subsampling. | ||||
| // Since dst_width is (width + 1) / 2, this function scales one less pixel | ||||
| // and copies the last pixel. | ||||
| #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK)   \ | ||||
|     void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride,                   \ | ||||
|                  uint8* dst_ptr, int dst_width) {                              \ | ||||
|       int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1));               \ | ||||
|       int n = dst_width - r;                                                   \ | ||||
|       if (n > 0) {                                                             \ | ||||
|         SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n);                    \ | ||||
|       }                                                                        \ | ||||
|       SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride,                 \ | ||||
|                      dst_ptr + n * BPP, r);                                    \ | ||||
|     } | ||||
|  | ||||
| #ifdef HAS_SCALEROWDOWN2_SSSE3 | ||||
| SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15) | ||||
| SDANY(ScaleRowDown2Linear_Any_SSSE3, ScaleRowDown2Linear_SSSE3, | ||||
| #ifdef HAS_SCALEROWDOWN2_SSE2 | ||||
| SDANY(ScaleRowDown2_Any_SSE2, ScaleRowDown2_SSE2, ScaleRowDown2_C, 2, 1, 15) | ||||
| SDANY(ScaleRowDown2Linear_Any_SSE2, ScaleRowDown2Linear_SSE2, | ||||
|       ScaleRowDown2Linear_C, 2, 1, 15) | ||||
| SDANY(ScaleRowDown2Box_Any_SSSE3, ScaleRowDown2Box_SSSE3, ScaleRowDown2Box_C, | ||||
| SDANY(ScaleRowDown2Box_Any_SSE2, ScaleRowDown2Box_SSE2, ScaleRowDown2Box_C, | ||||
|       2, 1, 15) | ||||
| SDODD(ScaleRowDown2Box_Odd_SSSE3, ScaleRowDown2Box_SSSE3, | ||||
|       ScaleRowDown2Box_Odd_C, 2, 1, 15) | ||||
| #endif | ||||
| #ifdef HAS_SCALEROWDOWN2_AVX2 | ||||
| SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31) | ||||
| @@ -85,8 +68,6 @@ SDANY(ScaleRowDown2Linear_Any_AVX2, ScaleRowDown2Linear_AVX2, | ||||
|       ScaleRowDown2Linear_C, 2, 1, 31) | ||||
| SDANY(ScaleRowDown2Box_Any_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_C, | ||||
|       2, 1, 31) | ||||
| SDODD(ScaleRowDown2Box_Odd_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_Odd_C, | ||||
|       2, 1, 31) | ||||
| #endif | ||||
| #ifdef HAS_SCALEROWDOWN2_NEON | ||||
| SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15) | ||||
| @@ -94,12 +75,10 @@ SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON, | ||||
|       ScaleRowDown2Linear_C, 2, 1, 15) | ||||
| SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON, | ||||
|       ScaleRowDown2Box_C, 2, 1, 15) | ||||
| SDODD(ScaleRowDown2Box_Odd_NEON, ScaleRowDown2Box_NEON, | ||||
|       ScaleRowDown2Box_Odd_C, 2, 1, 15) | ||||
| #endif | ||||
| #ifdef HAS_SCALEROWDOWN4_SSSE3 | ||||
| SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7) | ||||
| SDANY(ScaleRowDown4Box_Any_SSSE3, ScaleRowDown4Box_SSSE3, ScaleRowDown4Box_C, | ||||
| #ifdef HAS_SCALEROWDOWN4_SSE2 | ||||
| SDANY(ScaleRowDown4_Any_SSE2, ScaleRowDown4_SSE2, ScaleRowDown4_C, 4, 1, 7) | ||||
| SDANY(ScaleRowDown4Box_Any_SSE2, ScaleRowDown4Box_SSE2, ScaleRowDown4Box_C, | ||||
|       4, 1, 7) | ||||
| #endif | ||||
| #ifdef HAS_SCALEROWDOWN4_AVX2 | ||||
|   | ||||
							
								
								
									
										80
									
								
								third_party/libyuv/source/scale_argb.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										80
									
								
								third_party/libyuv/source/scale_argb.cc
									
									
									
									
										vendored
									
									
								
							| @@ -210,6 +210,14 @@ static void ScaleARGBBilinearDown(int src_width, int src_height, | ||||
|   clip_src_width = (int)(xr - xl) * 4;  // Width aligned to 4. | ||||
|   src_argb += xl * 4; | ||||
|   x -= (int)(xl << 16); | ||||
| #if defined(HAS_INTERPOLATEROW_SSE2) | ||||
|   if (TestCpuFlag(kCpuHasSSE2)) { | ||||
|     InterpolateRow = InterpolateRow_Any_SSE2; | ||||
|     if (IS_ALIGNED(clip_src_width, 16)) { | ||||
|       InterpolateRow = InterpolateRow_SSE2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     InterpolateRow = InterpolateRow_Any_SSSE3; | ||||
| @@ -234,12 +242,12 @@ static void ScaleARGBBilinearDown(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && | ||||
| #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && | ||||
|       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) { | ||||
|     InterpolateRow = InterpolateRow_Any_DSPR2; | ||||
|     InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; | ||||
|     if (IS_ALIGNED(clip_src_width, 4)) { | ||||
|       InterpolateRow = InterpolateRow_DSPR2; | ||||
|       InterpolateRow = InterpolateRow_MIPS_DSPR2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| @@ -300,6 +308,14 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, | ||||
|       int dst_width, int x, int dx) = | ||||
|       filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; | ||||
|   const int max_y = (src_height - 1) << 16; | ||||
| #if defined(HAS_INTERPOLATEROW_SSE2) | ||||
|   if (TestCpuFlag(kCpuHasSSE2)) { | ||||
|     InterpolateRow = InterpolateRow_Any_SSE2; | ||||
|     if (IS_ALIGNED(dst_width, 4)) { | ||||
|       InterpolateRow = InterpolateRow_SSE2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     InterpolateRow = InterpolateRow_Any_SSSE3; | ||||
| @@ -324,10 +340,10 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && | ||||
| #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && | ||||
|       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { | ||||
|     InterpolateRow = InterpolateRow_DSPR2; | ||||
|     InterpolateRow = InterpolateRow_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|   if (src_width >= 32768) { | ||||
| @@ -465,19 +481,27 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_I422TOARGBROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) && | ||||
| #if defined(HAS_I422TOARGBROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) && | ||||
|       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && | ||||
|       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && | ||||
|       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && | ||||
|       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { | ||||
|     I422ToARGBRow = I422ToARGBRow_DSPR2; | ||||
|     I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, | ||||
|       ptrdiff_t src_stride, int dst_width, int source_y_fraction) = | ||||
|       InterpolateRow_C; | ||||
| #if defined(HAS_INTERPOLATEROW_SSE2) | ||||
|   if (TestCpuFlag(kCpuHasSSE2)) { | ||||
|     InterpolateRow = InterpolateRow_Any_SSE2; | ||||
|     if (IS_ALIGNED(dst_width, 4)) { | ||||
|       InterpolateRow = InterpolateRow_SSE2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     InterpolateRow = InterpolateRow_Any_SSSE3; | ||||
| @@ -502,10 +526,10 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && | ||||
| #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && | ||||
|       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { | ||||
|     InterpolateRow = InterpolateRow_DSPR2; | ||||
|     InterpolateRow = InterpolateRow_MIPS_DSPR2; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
| @@ -823,36 +847,6 @@ int ARGBScale(const uint8* src_argb, int src_stride_argb, | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Scale with YUV conversion to ARGB and clipping. | ||||
| LIBYUV_API | ||||
| int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y, | ||||
|                        const uint8* src_u, int src_stride_u, | ||||
|                        const uint8* src_v, int src_stride_v, | ||||
|                        uint32 src_fourcc, | ||||
|                        int src_width, int src_height, | ||||
|                        uint8* dst_argb, int dst_stride_argb, | ||||
|                        uint32 dst_fourcc, | ||||
|                        int dst_width, int dst_height, | ||||
|                        int clip_x, int clip_y, int clip_width, int clip_height, | ||||
|                        enum FilterMode filtering) { | ||||
|   uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4); | ||||
|   int r; | ||||
|   I420ToARGB(src_y, src_stride_y, | ||||
|              src_u, src_stride_u, | ||||
|              src_v, src_stride_v, | ||||
|              argb_buffer, src_width * 4, | ||||
|              src_width, src_height); | ||||
|  | ||||
|   r = ARGBScaleClip(argb_buffer, src_width * 4, | ||||
|                     src_width, src_height, | ||||
|                     dst_argb, dst_stride_argb, | ||||
|                     dst_width, dst_height, | ||||
|                     clip_x, clip_y, clip_width, clip_height, | ||||
|                     filtering); | ||||
|   free(argb_buffer); | ||||
|   return r; | ||||
| } | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| }  // extern "C" | ||||
| }  // namespace libyuv | ||||
|   | ||||
							
								
								
									
										58
									
								
								third_party/libyuv/source/scale_common.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										58
									
								
								third_party/libyuv/source/scale_common.cc
									
									
									
									
										vendored
									
									
								
							| @@ -103,28 +103,6 @@ void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   } | ||||
| } | ||||
|  | ||||
| void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                             uint8* dst, int dst_width) { | ||||
|   const uint8* s = src_ptr; | ||||
|   const uint8* t = src_ptr + src_stride; | ||||
|   int x; | ||||
|   dst_width -= 1; | ||||
|   for (x = 0; x < dst_width - 1; x += 2) { | ||||
|     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; | ||||
|     dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; | ||||
|     dst += 2; | ||||
|     s += 4; | ||||
|     t += 4; | ||||
|   } | ||||
|   if (dst_width & 1) { | ||||
|     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; | ||||
|     dst += 1; | ||||
|     s += 2; | ||||
|     t += 2; | ||||
|   } | ||||
|   dst[0] = (s[0] + t[0] + 1) >> 1; | ||||
| } | ||||
|  | ||||
| void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, | ||||
|                            uint16* dst, int dst_width) { | ||||
|   const uint16* s = src_ptr; | ||||
| @@ -417,14 +395,8 @@ void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr, | ||||
| } | ||||
|  | ||||
| // (1-f)a + fb can be replaced with a + f(b-a) | ||||
| #if defined(__arm__) || defined(__aarch64__) | ||||
| #define BLENDER(a, b, f) (uint8)((int)(a) + \ | ||||
|     ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) | ||||
| #else | ||||
| // inteluses 7 bit math with rounding. | ||||
| #define BLENDER(a, b, f) (uint8)((int)(a) + \ | ||||
|     (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7)) | ||||
| #endif | ||||
|     ((int)(f) * ((int)(b) - (int)(a)) >> 16)) | ||||
|  | ||||
| void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, | ||||
|                        int dst_width, int x, int dx) { | ||||
| @@ -476,9 +448,8 @@ void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, | ||||
| } | ||||
| #undef BLENDER | ||||
|  | ||||
| // Same as 8 bit arm blender but return is cast to uint16 | ||||
| #define BLENDER(a, b, f) (uint16)((int)(a) + \ | ||||
|     ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) | ||||
|     ((int)(f) * ((int)(b) - (int)(a)) >> 16)) | ||||
|  | ||||
| void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr, | ||||
|                        int dst_width, int x, int dx) { | ||||
| @@ -816,7 +787,6 @@ void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb, | ||||
|   } | ||||
| } | ||||
|  | ||||
| // TODO(fbarchard): Replace 0x7f ^ f with 128-f.  bug=607. | ||||
| // Mimics SSSE3 blender | ||||
| #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7 | ||||
| #define BLENDERC(a, b, f, s) (uint32)( \ | ||||
| @@ -906,6 +876,14 @@ void ScalePlaneVertical(int src_height, | ||||
|   assert(dst_width > 0); | ||||
|   assert(dst_height > 0); | ||||
|   src_argb += (x >> 16) * bpp; | ||||
| #if defined(HAS_INTERPOLATEROW_SSE2) | ||||
|   if (TestCpuFlag(kCpuHasSSE2)) { | ||||
|     InterpolateRow = InterpolateRow_Any_SSE2; | ||||
|     if (IS_ALIGNED(dst_width_bytes, 16)) { | ||||
|       InterpolateRow = InterpolateRow_SSE2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_SSSE3) | ||||
|   if (TestCpuFlag(kCpuHasSSSE3)) { | ||||
|     InterpolateRow = InterpolateRow_Any_SSSE3; | ||||
| @@ -930,13 +908,13 @@ void ScalePlaneVertical(int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && | ||||
| #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && | ||||
|       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) && | ||||
|       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { | ||||
|     InterpolateRow = InterpolateRow_Any_DSPR2; | ||||
|     InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; | ||||
|     if (IS_ALIGNED(dst_width_bytes, 4)) { | ||||
|       InterpolateRow = InterpolateRow_DSPR2; | ||||
|       InterpolateRow = InterpolateRow_MIPS_DSPR2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| @@ -1004,13 +982,13 @@ void ScalePlaneVertical_16(int src_height, | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| #if defined(HAS_INTERPOLATEROW_16_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasDSPR2) && | ||||
| #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2) | ||||
|   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && | ||||
|       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) && | ||||
|       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { | ||||
|     InterpolateRow = InterpolateRow_Any_16_DSPR2; | ||||
|     InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2; | ||||
|     if (IS_ALIGNED(dst_width_bytes, 4)) { | ||||
|       InterpolateRow = InterpolateRow_16_DSPR2; | ||||
|       InterpolateRow = InterpolateRow_16_MIPS_DSPR2; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
|   | ||||
							
								
								
									
										455
									
								
								third_party/libyuv/source/scale_gcc.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										455
									
								
								third_party/libyuv/source/scale_gcc.cc
									
									
									
									
										vendored
									
									
								
							| @@ -9,7 +9,6 @@ | ||||
|  */ | ||||
|  | ||||
| #include "libyuv/row.h" | ||||
| #include "libyuv/scale_row.h" | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| namespace libyuv { | ||||
| @@ -17,8 +16,7 @@ extern "C" { | ||||
| #endif | ||||
|  | ||||
| // This module is for GCC x86 and x64. | ||||
| #if !defined(LIBYUV_DISABLE_X86) && \ | ||||
|     (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) | ||||
| #if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) | ||||
|  | ||||
| // Offsets for source bytes 0 to 9 | ||||
| static uvec8 kShuf0 = | ||||
| @@ -98,7 +96,7 @@ static uvec16 kScaleAb2 = | ||||
| // Generated using gcc disassembly on Visual C object file: | ||||
| // objdump -D yuvscaler.obj >yuvscaler.txt | ||||
|  | ||||
| void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                         uint8* dst_ptr, int dst_width) { | ||||
|   asm volatile ( | ||||
|     LABELALIGN | ||||
| @@ -120,23 +118,25 @@ void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                               uint8* dst_ptr, int dst_width) { | ||||
|   asm volatile ( | ||||
|     "pcmpeqb    %%xmm4,%%xmm4                  \n" | ||||
|     "psrlw      $0xf,%%xmm4                    \n" | ||||
|     "packuswb   %%xmm4,%%xmm4                  \n" | ||||
|     "pxor       %%xmm5,%%xmm5                  \n" | ||||
|     "pcmpeqb   %%xmm5,%%xmm5                   \n" | ||||
|     "psrlw     $0x8,%%xmm5                     \n" | ||||
|  | ||||
|     LABELALIGN | ||||
|   "1:                                          \n" | ||||
|     "movdqu    " MEMACCESS(0) ",%%xmm0         \n" | ||||
|     "movdqu    " MEMACCESS2(0x10, 0) ",%%xmm1  \n" | ||||
|     "lea       " MEMLEA(0x20,0) ",%0           \n" | ||||
|     "pmaddubsw  %%xmm4,%%xmm0                  \n" | ||||
|     "pmaddubsw  %%xmm4,%%xmm1                  \n" | ||||
|     "pavgw      %%xmm5,%%xmm0                  \n" | ||||
|     "pavgw      %%xmm5,%%xmm1                  \n" | ||||
|     "movdqa    %%xmm0,%%xmm2                   \n" | ||||
|     "psrlw     $0x8,%%xmm0                     \n" | ||||
|     "movdqa    %%xmm1,%%xmm3                   \n" | ||||
|     "psrlw     $0x8,%%xmm1                     \n" | ||||
|     "pand      %%xmm5,%%xmm2                   \n" | ||||
|     "pand      %%xmm5,%%xmm3                   \n" | ||||
|     "pavgw     %%xmm2,%%xmm0                   \n" | ||||
|     "pavgw     %%xmm3,%%xmm1                   \n" | ||||
|     "packuswb  %%xmm1,%%xmm0                   \n" | ||||
|     "movdqu    %%xmm0," MEMACCESS(1) "         \n" | ||||
|     "lea       " MEMLEA(0x10,1) ",%1           \n" | ||||
| @@ -145,17 +145,15 @@ void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   : "+r"(src_ptr),    // %0 | ||||
|     "+r"(dst_ptr),    // %1 | ||||
|     "+r"(dst_width)   // %2 | ||||
|   :: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5" | ||||
|   :: "memory", "cc", "xmm0", "xmm1", "xmm5" | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                            uint8* dst_ptr, int dst_width) { | ||||
|   asm volatile ( | ||||
|     "pcmpeqb    %%xmm4,%%xmm4                  \n" | ||||
|     "psrlw      $0xf,%%xmm4                    \n" | ||||
|     "packuswb   %%xmm4,%%xmm4                  \n" | ||||
|     "pxor       %%xmm5,%%xmm5                  \n" | ||||
|     "pcmpeqb   %%xmm5,%%xmm5                   \n" | ||||
|     "psrlw     $0x8,%%xmm5                     \n" | ||||
|  | ||||
|     LABELALIGN | ||||
|   "1:                                          \n" | ||||
| @@ -164,16 +162,16 @@ void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|     MEMOPREG(movdqu,0x00,0,3,1,xmm2)           //  movdqu  (%0,%3,1),%%xmm2 | ||||
|     MEMOPREG(movdqu,0x10,0,3,1,xmm3)           //  movdqu  0x10(%0,%3,1),%%xmm3 | ||||
|     "lea       " MEMLEA(0x20,0) ",%0           \n" | ||||
|     "pmaddubsw  %%xmm4,%%xmm0                  \n" | ||||
|     "pmaddubsw  %%xmm4,%%xmm1                  \n" | ||||
|     "pmaddubsw  %%xmm4,%%xmm2                  \n" | ||||
|     "pmaddubsw  %%xmm4,%%xmm3                  \n" | ||||
|     "paddw      %%xmm2,%%xmm0                  \n" | ||||
|     "paddw      %%xmm3,%%xmm1                  \n" | ||||
|     "psrlw      $0x1,%%xmm0                    \n" | ||||
|     "psrlw      $0x1,%%xmm1                    \n" | ||||
|     "pavgw      %%xmm5,%%xmm0                  \n" | ||||
|     "pavgw      %%xmm5,%%xmm1                  \n" | ||||
|     "pavgb     %%xmm2,%%xmm0                   \n" | ||||
|     "pavgb     %%xmm3,%%xmm1                   \n" | ||||
|     "movdqa    %%xmm0,%%xmm2                   \n" | ||||
|     "psrlw     $0x8,%%xmm0                     \n" | ||||
|     "movdqa    %%xmm1,%%xmm3                   \n" | ||||
|     "psrlw     $0x8,%%xmm1                     \n" | ||||
|     "pand      %%xmm5,%%xmm2                   \n" | ||||
|     "pand      %%xmm5,%%xmm3                   \n" | ||||
|     "pavgw     %%xmm2,%%xmm0                   \n" | ||||
|     "pavgw     %%xmm3,%%xmm1                   \n" | ||||
|     "packuswb  %%xmm1,%%xmm0                   \n" | ||||
|     "movdqu    %%xmm0," MEMACCESS(1) "         \n" | ||||
|     "lea       " MEMLEA(0x10,1) ",%1           \n" | ||||
| @@ -188,105 +186,7 @@ void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| #ifdef HAS_SCALEROWDOWN2_AVX2 | ||||
| void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                         uint8* dst_ptr, int dst_width) { | ||||
|   asm volatile ( | ||||
|     LABELALIGN | ||||
|   "1:                                          \n" | ||||
|     "vmovdqu    " MEMACCESS(0) ",%%ymm0        \n" | ||||
|     "vmovdqu    " MEMACCESS2(0x20,0) ",%%ymm1  \n" | ||||
|     "lea        " MEMLEA(0x40,0) ",%0          \n" | ||||
|     "vpsrlw     $0x8,%%ymm0,%%ymm0             \n" | ||||
|     "vpsrlw     $0x8,%%ymm1,%%ymm1             \n" | ||||
|     "vpackuswb  %%ymm1,%%ymm0,%%ymm0           \n" | ||||
|     "vpermq     $0xd8,%%ymm0,%%ymm0            \n" | ||||
|     "vmovdqu    %%ymm0," MEMACCESS(1) "        \n" | ||||
|     "lea        " MEMLEA(0x20,1) ",%1          \n" | ||||
|     "sub        $0x20,%2                       \n" | ||||
|     "jg         1b                             \n" | ||||
|     "vzeroupper                                \n" | ||||
|   : "+r"(src_ptr),    // %0 | ||||
|     "+r"(dst_ptr),    // %1 | ||||
|     "+r"(dst_width)   // %2 | ||||
|   :: "memory", "cc", "xmm0", "xmm1" | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                               uint8* dst_ptr, int dst_width) { | ||||
|   asm volatile ( | ||||
|     "vpcmpeqb   %%ymm4,%%ymm4,%%ymm4           \n" | ||||
|     "vpsrlw     $0xf,%%ymm4,%%ymm4             \n" | ||||
|     "vpackuswb  %%ymm4,%%ymm4,%%ymm4           \n" | ||||
|     "vpxor      %%ymm5,%%ymm5,%%ymm5           \n" | ||||
|  | ||||
|     LABELALIGN | ||||
|   "1:                                          \n" | ||||
|     "vmovdqu    " MEMACCESS(0) ",%%ymm0        \n" | ||||
|     "vmovdqu    " MEMACCESS2(0x20, 0) ",%%ymm1 \n" | ||||
|     "lea        " MEMLEA(0x40,0) ",%0          \n" | ||||
|     "vpmaddubsw %%ymm4,%%ymm0,%%ymm0           \n" | ||||
|     "vpmaddubsw %%ymm4,%%ymm1,%%ymm1           \n" | ||||
|     "vpavgw     %%ymm5,%%ymm0,%%ymm0           \n" | ||||
|     "vpavgw     %%ymm5,%%ymm1,%%ymm1           \n" | ||||
|     "vpackuswb  %%ymm1,%%ymm0,%%ymm0           \n" | ||||
|     "vpermq     $0xd8,%%ymm0,%%ymm0            \n" | ||||
|     "vmovdqu    %%ymm0," MEMACCESS(1) "        \n" | ||||
|     "lea        " MEMLEA(0x20,1) ",%1          \n" | ||||
|     "sub        $0x20,%2                       \n" | ||||
|     "jg         1b                             \n" | ||||
|     "vzeroupper                                \n" | ||||
|   : "+r"(src_ptr),    // %0 | ||||
|     "+r"(dst_ptr),    // %1 | ||||
|     "+r"(dst_width)   // %2 | ||||
|   :: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5" | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                            uint8* dst_ptr, int dst_width) { | ||||
|   asm volatile ( | ||||
|     "vpcmpeqb   %%ymm4,%%ymm4,%%ymm4           \n" | ||||
|     "vpsrlw     $0xf,%%ymm4,%%ymm4             \n" | ||||
|     "vpackuswb  %%ymm4,%%ymm4,%%ymm4           \n" | ||||
|     "vpxor      %%ymm5,%%ymm5,%%ymm5           \n" | ||||
|  | ||||
|     LABELALIGN | ||||
|   "1:                                          \n" | ||||
|     "vmovdqu    " MEMACCESS(0) ",%%ymm0        \n" | ||||
|     "vmovdqu    " MEMACCESS2(0x20,0) ",%%ymm1  \n" | ||||
|     MEMOPREG(vmovdqu,0x00,0,3,1,ymm2)          //  vmovdqu  (%0,%3,1),%%ymm2 | ||||
|     MEMOPREG(vmovdqu,0x20,0,3,1,ymm3)          //  vmovdqu  0x20(%0,%3,1),%%ymm3 | ||||
|     "lea        " MEMLEA(0x40,0) ",%0          \n" | ||||
|     "vpmaddubsw %%ymm4,%%ymm0,%%ymm0           \n" | ||||
|     "vpmaddubsw %%ymm4,%%ymm1,%%ymm1           \n" | ||||
|     "vpmaddubsw %%ymm4,%%ymm2,%%ymm2           \n" | ||||
|     "vpmaddubsw %%ymm4,%%ymm3,%%ymm3           \n" | ||||
|     "vpaddw     %%ymm2,%%ymm0,%%ymm0           \n" | ||||
|     "vpaddw     %%ymm3,%%ymm1,%%ymm1           \n" | ||||
|     "vpsrlw     $0x1,%%ymm0,%%ymm0             \n" | ||||
|     "vpsrlw     $0x1,%%ymm1,%%ymm1             \n" | ||||
|     "vpavgw     %%ymm5,%%ymm0,%%ymm0           \n" | ||||
|     "vpavgw     %%ymm5,%%ymm1,%%ymm1           \n" | ||||
|     "vpackuswb  %%ymm1,%%ymm0,%%ymm0           \n" | ||||
|     "vpermq     $0xd8,%%ymm0,%%ymm0            \n" | ||||
|     "vmovdqu    %%ymm0," MEMACCESS(1) "        \n" | ||||
|     "lea        " MEMLEA(0x20,1) ",%1          \n" | ||||
|     "sub        $0x20,%2                       \n" | ||||
|     "jg         1b                             \n" | ||||
|     "vzeroupper                                \n" | ||||
|   : "+r"(src_ptr),    // %0 | ||||
|     "+r"(dst_ptr),    // %1 | ||||
|     "+r"(dst_width)   // %2 | ||||
|   : "r"((intptr_t)(src_stride))   // %3 | ||||
|   : "memory", "cc", NACL_R14 | ||||
|     "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | ||||
|   ); | ||||
| } | ||||
| #endif  // HAS_SCALEROWDOWN2_AVX2 | ||||
|  | ||||
| void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                         uint8* dst_ptr, int dst_width) { | ||||
|   asm volatile ( | ||||
|     "pcmpeqb   %%xmm5,%%xmm5                   \n" | ||||
| @@ -314,15 +214,12 @@ void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                            uint8* dst_ptr, int dst_width) { | ||||
|   intptr_t stridex3; | ||||
|   intptr_t stridex3 = 0; | ||||
|   asm volatile ( | ||||
|     "pcmpeqb    %%xmm4,%%xmm4                  \n" | ||||
|     "psrlw      $0xf,%%xmm4                    \n" | ||||
|     "movdqa     %%xmm4,%%xmm5                  \n" | ||||
|     "packuswb   %%xmm4,%%xmm4                  \n" | ||||
|     "psllw      $0x3,%%xmm5                    \n" | ||||
|     "pcmpeqb   %%xmm7,%%xmm7                   \n" | ||||
|     "psrlw     $0x8,%%xmm7                     \n" | ||||
|     "lea       " MEMLEA4(0x00,4,4,2) ",%3      \n" | ||||
|  | ||||
|     LABELALIGN | ||||
| @@ -331,28 +228,30 @@ void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|     "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n" | ||||
|     MEMOPREG(movdqu,0x00,0,4,1,xmm2)           //  movdqu  (%0,%4,1),%%xmm2 | ||||
|     MEMOPREG(movdqu,0x10,0,4,1,xmm3)           //  movdqu  0x10(%0,%4,1),%%xmm3 | ||||
|     "pmaddubsw  %%xmm4,%%xmm0                  \n" | ||||
|     "pmaddubsw  %%xmm4,%%xmm1                  \n" | ||||
|     "pmaddubsw  %%xmm4,%%xmm2                  \n" | ||||
|     "pmaddubsw  %%xmm4,%%xmm3                  \n" | ||||
|     "paddw      %%xmm2,%%xmm0                  \n" | ||||
|     "paddw      %%xmm3,%%xmm1                  \n" | ||||
|     "pavgb     %%xmm2,%%xmm0                   \n" | ||||
|     "pavgb     %%xmm3,%%xmm1                   \n" | ||||
|     MEMOPREG(movdqu,0x00,0,4,2,xmm2)           //  movdqu  (%0,%4,2),%%xmm2 | ||||
|     MEMOPREG(movdqu,0x10,0,4,2,xmm3)           //  movdqu  0x10(%0,%4,2),%%xmm3 | ||||
|     "pmaddubsw  %%xmm4,%%xmm2                  \n" | ||||
|     "pmaddubsw  %%xmm4,%%xmm3                  \n" | ||||
|     "paddw      %%xmm2,%%xmm0                  \n" | ||||
|     "paddw      %%xmm3,%%xmm1                  \n" | ||||
|     MEMOPREG(movdqu,0x00,0,3,1,xmm2)           //  movdqu  (%0,%3,1),%%xmm2 | ||||
|     MEMOPREG(movdqu,0x10,0,3,1,xmm3)           //  movdqu  0x10(%0,%3,1),%%xmm3 | ||||
|     MEMOPREG(movdqu,0x00,0,3,1,xmm4)           //  movdqu  (%0,%3,1),%%xmm4 | ||||
|     MEMOPREG(movdqu,0x10,0,3,1,xmm5)           //  movdqu  0x10(%0,%3,1),%%xmm5 | ||||
|     "lea       " MEMLEA(0x20,0) ",%0           \n" | ||||
|     "pmaddubsw  %%xmm4,%%xmm2                  \n" | ||||
|     "pmaddubsw  %%xmm4,%%xmm3                  \n" | ||||
|     "paddw      %%xmm2,%%xmm0                  \n" | ||||
|     "paddw      %%xmm3,%%xmm1                  \n" | ||||
|     "phaddw     %%xmm1,%%xmm0                  \n" | ||||
|     "paddw      %%xmm5,%%xmm0                  \n" | ||||
|     "psrlw      $0x4,%%xmm0                    \n" | ||||
|     "pavgb     %%xmm4,%%xmm2                   \n" | ||||
|     "pavgb     %%xmm2,%%xmm0                   \n" | ||||
|     "pavgb     %%xmm5,%%xmm3                   \n" | ||||
|     "pavgb     %%xmm3,%%xmm1                   \n" | ||||
|     "movdqa    %%xmm0,%%xmm2                   \n" | ||||
|     "psrlw     $0x8,%%xmm0                     \n" | ||||
|     "movdqa    %%xmm1,%%xmm3                   \n" | ||||
|     "psrlw     $0x8,%%xmm1                     \n" | ||||
|     "pand      %%xmm7,%%xmm2                   \n" | ||||
|     "pand      %%xmm7,%%xmm3                   \n" | ||||
|     "pavgw     %%xmm2,%%xmm0                   \n" | ||||
|     "pavgw     %%xmm3,%%xmm1                   \n" | ||||
|     "packuswb  %%xmm1,%%xmm0                   \n" | ||||
|     "movdqa    %%xmm0,%%xmm2                   \n" | ||||
|     "psrlw     $0x8,%%xmm0                     \n" | ||||
|     "pand      %%xmm7,%%xmm2                   \n" | ||||
|     "pavgw     %%xmm2,%%xmm0                   \n" | ||||
|     "packuswb  %%xmm0,%%xmm0                   \n" | ||||
|     "movq      %%xmm0," MEMACCESS(1) "         \n" | ||||
|     "lea       " MEMLEA(0x8,1) ",%1            \n" | ||||
| @@ -361,100 +260,13 @@ void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   : "+r"(src_ptr),     // %0 | ||||
|     "+r"(dst_ptr),     // %1 | ||||
|     "+r"(dst_width),   // %2 | ||||
|     "=&r"(stridex3)    // %3 | ||||
|     "+r"(stridex3)     // %3 | ||||
|   : "r"((intptr_t)(src_stride))    // %4 | ||||
|   : "memory", "cc", NACL_R14 | ||||
|     "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | ||||
|     "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm7" | ||||
|   ); | ||||
| } | ||||
|  | ||||
|  | ||||
| #ifdef HAS_SCALEROWDOWN4_AVX2 | ||||
| void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                         uint8* dst_ptr, int dst_width) { | ||||
|   asm volatile ( | ||||
|     "vpcmpeqb   %%ymm5,%%ymm5,%%ymm5           \n" | ||||
|     "vpsrld     $0x18,%%ymm5,%%ymm5            \n" | ||||
|     "vpslld     $0x10,%%ymm5,%%ymm5            \n" | ||||
|     LABELALIGN | ||||
|   "1:                                          \n" | ||||
|     "vmovdqu    " MEMACCESS(0) ",%%ymm0        \n" | ||||
|     "vmovdqu    " MEMACCESS2(0x20,0) ",%%ymm1  \n" | ||||
|     "lea        " MEMLEA(0x40,0) ",%0          \n" | ||||
|     "vpand      %%ymm5,%%ymm0,%%ymm0           \n" | ||||
|     "vpand      %%ymm5,%%ymm1,%%ymm1           \n" | ||||
|     "vpackuswb  %%ymm1,%%ymm0,%%ymm0           \n" | ||||
|     "vpermq     $0xd8,%%ymm0,%%ymm0            \n" | ||||
|     "vpsrlw     $0x8,%%ymm0,%%ymm0             \n" | ||||
|     "vpackuswb  %%ymm0,%%ymm0,%%ymm0           \n" | ||||
|     "vpermq     $0xd8,%%ymm0,%%ymm0            \n" | ||||
|     "vmovdqu    %%xmm0," MEMACCESS(1) "        \n" | ||||
|     "lea        " MEMLEA(0x10,1) ",%1          \n" | ||||
|     "sub        $0x10,%2                       \n" | ||||
|     "jg         1b                             \n" | ||||
|     "vzeroupper                                \n" | ||||
|   : "+r"(src_ptr),    // %0 | ||||
|     "+r"(dst_ptr),    // %1 | ||||
|     "+r"(dst_width)   // %2 | ||||
|   :: "memory", "cc", "xmm0", "xmm1", "xmm5" | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                            uint8* dst_ptr, int dst_width) { | ||||
|   asm volatile ( | ||||
|     "vpcmpeqb   %%ymm4,%%ymm4,%%ymm4           \n" | ||||
|     "vpsrlw     $0xf,%%ymm4,%%ymm4             \n" | ||||
|     "vpsllw     $0x3,%%ymm4,%%ymm5             \n" | ||||
|     "vpackuswb  %%ymm4,%%ymm4,%%ymm4           \n" | ||||
|  | ||||
|     LABELALIGN | ||||
|   "1:                                          \n" | ||||
|     "vmovdqu    " MEMACCESS(0) ",%%ymm0        \n" | ||||
|     "vmovdqu    " MEMACCESS2(0x20,0) ",%%ymm1  \n" | ||||
|     MEMOPREG(vmovdqu,0x00,0,3,1,ymm2)          //  vmovdqu  (%0,%3,1),%%ymm2 | ||||
|     MEMOPREG(vmovdqu,0x20,0,3,1,ymm3)          //  vmovdqu  0x20(%0,%3,1),%%ymm3 | ||||
|     "vpmaddubsw %%ymm4,%%ymm0,%%ymm0           \n" | ||||
|     "vpmaddubsw %%ymm4,%%ymm1,%%ymm1           \n" | ||||
|     "vpmaddubsw %%ymm4,%%ymm2,%%ymm2           \n" | ||||
|     "vpmaddubsw %%ymm4,%%ymm3,%%ymm3           \n" | ||||
|     "vpaddw     %%ymm2,%%ymm0,%%ymm0           \n" | ||||
|     "vpaddw     %%ymm3,%%ymm1,%%ymm1           \n" | ||||
|     MEMOPREG(vmovdqu,0x00,0,3,2,ymm2)          //  vmovdqu  (%0,%3,2),%%ymm2 | ||||
|     MEMOPREG(vmovdqu,0x20,0,3,2,ymm3)          //  vmovdqu  0x20(%0,%3,2),%%ymm3 | ||||
|     "vpmaddubsw %%ymm4,%%ymm2,%%ymm2           \n" | ||||
|     "vpmaddubsw %%ymm4,%%ymm3,%%ymm3           \n" | ||||
|     "vpaddw     %%ymm2,%%ymm0,%%ymm0           \n" | ||||
|     "vpaddw     %%ymm3,%%ymm1,%%ymm1           \n" | ||||
|     MEMOPREG(vmovdqu,0x00,0,4,1,ymm2)          //  vmovdqu  (%0,%4,1),%%ymm2 | ||||
|     MEMOPREG(vmovdqu,0x20,0,4,1,ymm3)          //  vmovdqu  0x20(%0,%4,1),%%ymm3 | ||||
|     "lea        " MEMLEA(0x40,0) ",%0          \n" | ||||
|     "vpmaddubsw %%ymm4,%%ymm2,%%ymm2           \n" | ||||
|     "vpmaddubsw %%ymm4,%%ymm3,%%ymm3           \n" | ||||
|     "vpaddw     %%ymm2,%%ymm0,%%ymm0           \n" | ||||
|     "vpaddw     %%ymm3,%%ymm1,%%ymm1           \n" | ||||
|     "vphaddw    %%ymm1,%%ymm0,%%ymm0           \n" | ||||
|     "vpermq     $0xd8,%%ymm0,%%ymm0            \n" | ||||
|     "vpaddw     %%ymm5,%%ymm0,%%ymm0           \n" | ||||
|     "vpsrlw     $0x4,%%ymm0,%%ymm0             \n" | ||||
|     "vpackuswb  %%ymm0,%%ymm0,%%ymm0           \n" | ||||
|     "vpermq     $0xd8,%%ymm0,%%ymm0            \n" | ||||
|     "vmovdqu    %%xmm0," MEMACCESS(1) "        \n" | ||||
|     "lea        " MEMLEA(0x10,1) ",%1          \n" | ||||
|     "sub        $0x10,%2                       \n" | ||||
|     "jg         1b                             \n" | ||||
|     "vzeroupper                                \n" | ||||
|   : "+r"(src_ptr),    // %0 | ||||
|     "+r"(dst_ptr),    // %1 | ||||
|     "+r"(dst_width)   // %2 | ||||
|   : "r"((intptr_t)(src_stride)),  // %3 | ||||
|     "r"((intptr_t)(src_stride * 3))   // %4 | ||||
|   : "memory", "cc", NACL_R14 | ||||
|     "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | ||||
|   ); | ||||
| } | ||||
| #endif  // HAS_SCALEROWDOWN4_AVX2 | ||||
|  | ||||
| void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                           uint8* dst_ptr, int dst_width) { | ||||
|   asm volatile ( | ||||
| @@ -762,89 +574,61 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, | ||||
| } | ||||
|  | ||||
| // Reads 16xN bytes and produces 16 shorts at a time. | ||||
| void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { | ||||
| void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                        uint16* dst_ptr, int src_width, int src_height) { | ||||
|   int tmp_height = 0; | ||||
|   intptr_t tmp_src = 0; | ||||
|   asm volatile ( | ||||
|     "pxor      %%xmm5,%%xmm5                   \n" | ||||
|     "mov       %0,%3                           \n"  // row pointer | ||||
|     "mov       %5,%2                           \n"  // height | ||||
|     "pxor      %%xmm0,%%xmm0                   \n"  // clear accumulators | ||||
|     "pxor      %%xmm1,%%xmm1                   \n" | ||||
|     "pxor      %%xmm4,%%xmm4                   \n" | ||||
|  | ||||
|     LABELALIGN | ||||
|   "1:                                          \n" | ||||
|     "movdqu    " MEMACCESS(0) ",%%xmm3         \n" | ||||
|     "lea       " MEMLEA(0x10,0) ",%0           \n"  // src_ptr += 16 | ||||
|     "movdqu    " MEMACCESS(1) ",%%xmm0         \n" | ||||
|     "movdqu    " MEMACCESS2(0x10,1) ",%%xmm1   \n" | ||||
|     "movdqa    %%xmm3,%%xmm2                   \n" | ||||
|     "punpcklbw %%xmm5,%%xmm2                   \n" | ||||
|     "punpckhbw %%xmm5,%%xmm3                   \n" | ||||
|     "movdqu    " MEMACCESS(3) ",%%xmm2         \n" | ||||
|     "add       %6,%3                           \n" | ||||
|     "movdqa    %%xmm2,%%xmm3                   \n" | ||||
|     "punpcklbw %%xmm4,%%xmm2                   \n" | ||||
|     "punpckhbw %%xmm4,%%xmm3                   \n" | ||||
|     "paddusw   %%xmm2,%%xmm0                   \n" | ||||
|     "paddusw   %%xmm3,%%xmm1                   \n" | ||||
|     "sub       $0x1,%2                         \n" | ||||
|     "jg        1b                              \n" | ||||
|  | ||||
|     "movdqu    %%xmm0," MEMACCESS(1) "         \n" | ||||
|     "movdqu    %%xmm1," MEMACCESS2(0x10,1) "   \n" | ||||
|     "lea       " MEMLEA(0x20,1) ",%1           \n" | ||||
|     "sub       $0x10,%2                        \n" | ||||
|     "lea       " MEMLEA(0x10,0) ",%0           \n"  // src_ptr += 16 | ||||
|     "mov       %0,%3                           \n"  // row pointer | ||||
|     "mov       %5,%2                           \n"  // height | ||||
|     "pxor      %%xmm0,%%xmm0                   \n"  // clear accumulators | ||||
|     "pxor      %%xmm1,%%xmm1                   \n" | ||||
|     "sub       $0x10,%4                        \n" | ||||
|     "jg        1b                              \n" | ||||
|   : "+r"(src_ptr),     // %0 | ||||
|     "+r"(dst_ptr),     // %1 | ||||
|     "+r"(src_width)    // %2 | ||||
|   : | ||||
|   : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | ||||
|     "+r"(tmp_height),  // %2 | ||||
|     "+r"(tmp_src),     // %3 | ||||
|     "+r"(src_width),   // %4 | ||||
|     "+rm"(src_height)  // %5 | ||||
|   : "rm"((intptr_t)(src_stride))  // %6 | ||||
|   : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" | ||||
|   ); | ||||
| } | ||||
|  | ||||
|  | ||||
| #ifdef HAS_SCALEADDROW_AVX2 | ||||
| // Reads 32 bytes and accumulates to 32 shorts at a time. | ||||
| void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { | ||||
|   asm volatile ( | ||||
|     "vpxor      %%ymm5,%%ymm5,%%ymm5           \n" | ||||
|  | ||||
|     LABELALIGN | ||||
|   "1:                                          \n" | ||||
|     "vmovdqu    " MEMACCESS(0) ",%%ymm3        \n" | ||||
|     "lea        " MEMLEA(0x20,0) ",%0          \n"  // src_ptr += 32 | ||||
|     "vpermq     $0xd8,%%ymm3,%%ymm3            \n" | ||||
|     "vpunpcklbw %%ymm5,%%ymm3,%%ymm2           \n" | ||||
|     "vpunpckhbw %%ymm5,%%ymm3,%%ymm3           \n" | ||||
|     "vpaddusw   " MEMACCESS(1) ",%%ymm2,%%ymm0 \n" | ||||
|     "vpaddusw   " MEMACCESS2(0x20,1) ",%%ymm3,%%ymm1 \n" | ||||
|     "vmovdqu    %%ymm0," MEMACCESS(1) "        \n" | ||||
|     "vmovdqu    %%ymm1," MEMACCESS2(0x20,1) "  \n" | ||||
|     "lea       " MEMLEA(0x40,1) ",%1           \n" | ||||
|     "sub       $0x20,%2                        \n" | ||||
|     "jg        1b                              \n" | ||||
|     "vzeroupper                                \n" | ||||
|   : "+r"(src_ptr),     // %0 | ||||
|     "+r"(dst_ptr),     // %1 | ||||
|     "+r"(src_width)    // %2 | ||||
|   : | ||||
|   : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | ||||
|   ); | ||||
| } | ||||
| #endif  // HAS_SCALEADDROW_AVX2 | ||||
|  | ||||
| // Constant for making pixels signed to avoid pmaddubsw | ||||
| // saturation. | ||||
| static uvec8 kFsub80 = | ||||
|   { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, | ||||
|     0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; | ||||
|  | ||||
| // Constant for making pixels unsigned and adding .5 for rounding. | ||||
| static uvec16 kFadd40 = | ||||
|   { 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040 }; | ||||
|  | ||||
| // Bilinear column filtering. SSSE3 version. | ||||
| void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, | ||||
|                            int dst_width, int x, int dx) { | ||||
|   intptr_t x0, x1, temp_pixel; | ||||
|   intptr_t x0 = 0, x1 = 0, temp_pixel = 0; | ||||
|   asm volatile ( | ||||
|     "movd      %6,%%xmm2                       \n" | ||||
|     "movd      %7,%%xmm3                       \n" | ||||
|     "movl      $0x04040000,%k2                 \n" | ||||
|     "movd      %k2,%%xmm5                      \n" | ||||
|     "pcmpeqb   %%xmm6,%%xmm6                   \n" | ||||
|     "psrlw     $0x9,%%xmm6                     \n"  // 0x007f007f | ||||
|     "pcmpeqb   %%xmm7,%%xmm7                   \n" | ||||
|     "psrlw     $15,%%xmm7                      \n"  // 0x00010001 | ||||
|  | ||||
|     "psrlw     $0x9,%%xmm6                     \n" | ||||
|     "pextrw    $0x1,%%xmm2,%k3                 \n" | ||||
|     "subl      $0x2,%5                         \n" | ||||
|     "jl        29f                             \n" | ||||
| @@ -866,19 +650,16 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, | ||||
|     "movd      %k2,%%xmm4                      \n" | ||||
|     "pshufb    %%xmm5,%%xmm1                   \n" | ||||
|     "punpcklwd %%xmm4,%%xmm0                   \n" | ||||
|     "psubb     %8,%%xmm0                       \n"  // make pixels signed. | ||||
|     "pxor      %%xmm6,%%xmm1                   \n"  // 128 -f = (f ^ 127 ) + 1 | ||||
|     "paddusb   %%xmm7,%%xmm1                   \n" | ||||
|     "pmaddubsw %%xmm0,%%xmm1                   \n" | ||||
|     "pxor      %%xmm6,%%xmm1                   \n" | ||||
|     "pmaddubsw %%xmm1,%%xmm0                   \n" | ||||
|     "pextrw    $0x1,%%xmm2,%k3                 \n" | ||||
|     "pextrw    $0x3,%%xmm2,%k4                 \n" | ||||
|     "paddw     %9,%%xmm1                       \n"  // make pixels unsigned. | ||||
|     "psrlw     $0x7,%%xmm1                     \n" | ||||
|     "packuswb  %%xmm1,%%xmm1                   \n" | ||||
|     "movd      %%xmm1,%k2                      \n" | ||||
|     "psrlw     $0x7,%%xmm0                     \n" | ||||
|     "packuswb  %%xmm0,%%xmm0                   \n" | ||||
|     "movd      %%xmm0,%k2                      \n" | ||||
|     "mov       %w2," MEMACCESS(0) "            \n" | ||||
|     "lea       " MEMLEA(0x2,0) ",%0            \n" | ||||
|     "subl      $0x2,%5                         \n" | ||||
|     "sub       $0x2,%5                         \n" | ||||
|     "jge       2b                              \n" | ||||
|  | ||||
|     LABELALIGN | ||||
| @@ -889,37 +670,23 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, | ||||
|     "movd      %k2,%%xmm0                      \n" | ||||
|     "psrlw     $0x9,%%xmm2                     \n" | ||||
|     "pshufb    %%xmm5,%%xmm2                   \n" | ||||
|     "psubb     %8,%%xmm0                       \n"  // make pixels signed. | ||||
|     "pxor      %%xmm6,%%xmm2                   \n" | ||||
|     "paddusb   %%xmm7,%%xmm2                   \n" | ||||
|     "pmaddubsw %%xmm0,%%xmm2                   \n" | ||||
|     "paddw     %9,%%xmm2                       \n"  // make pixels unsigned. | ||||
|     "psrlw     $0x7,%%xmm2                     \n" | ||||
|     "packuswb  %%xmm2,%%xmm2                   \n" | ||||
|     "movd      %%xmm2,%k2                      \n" | ||||
|     "pmaddubsw %%xmm2,%%xmm0                   \n" | ||||
|     "psrlw     $0x7,%%xmm0                     \n" | ||||
|     "packuswb  %%xmm0,%%xmm0                   \n" | ||||
|     "movd      %%xmm0,%k2                      \n" | ||||
|     "mov       %b2," MEMACCESS(0) "            \n" | ||||
|   "99:                                         \n" | ||||
|   : "+r"(dst_ptr),     // %0 | ||||
|     "+r"(src_ptr),     // %1 | ||||
|     "=&a"(temp_pixel),  // %2 | ||||
|     "=&r"(x0),          // %3 | ||||
|     "=&r"(x1),          // %4 | ||||
| #if defined(__x86_64__) | ||||
|     "+a"(temp_pixel),  // %2 | ||||
|     "+r"(x0),          // %3 | ||||
|     "+r"(x1),          // %4 | ||||
|     "+rm"(dst_width)   // %5 | ||||
| #else | ||||
|     "+m"(dst_width)    // %5 | ||||
| #endif | ||||
|   : "rm"(x),           // %6 | ||||
|     "rm"(dx),           // %7 | ||||
| #if defined(__x86_64__) | ||||
|     "x"(kFsub80),       // %8 | ||||
|     "x"(kFadd40)        // %9 | ||||
| #else | ||||
|     "m"(kFsub80),       // %8 | ||||
|     "m"(kFadd40)        // %9 | ||||
| #endif | ||||
|     "rm"(dx)           // %7 | ||||
|   : "memory", "cc", NACL_R14 | ||||
|     "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" | ||||
|     "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" | ||||
|   ); | ||||
| } | ||||
|  | ||||
| @@ -1028,7 +795,7 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, | ||||
| void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, | ||||
|                                int src_stepx, uint8* dst_argb, int dst_width) { | ||||
|   intptr_t src_stepx_x4 = (intptr_t)(src_stepx); | ||||
|   intptr_t src_stepx_x12; | ||||
|   intptr_t src_stepx_x12 = 0; | ||||
|   asm volatile ( | ||||
|     "lea       " MEMLEA3(0x00,1,4) ",%1        \n" | ||||
|     "lea       " MEMLEA4(0x00,1,1,2) ",%4      \n" | ||||
| @@ -1050,7 +817,7 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, | ||||
|     "+r"(src_stepx_x4),  // %1 | ||||
|     "+r"(dst_argb),      // %2 | ||||
|     "+r"(dst_width),     // %3 | ||||
|     "=&r"(src_stepx_x12)  // %4 | ||||
|     "+r"(src_stepx_x12)  // %4 | ||||
|   :: "memory", "cc", NACL_R14 | ||||
|     "xmm0", "xmm1", "xmm2", "xmm3" | ||||
|   ); | ||||
| @@ -1062,7 +829,7 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, | ||||
|                                   ptrdiff_t src_stride, int src_stepx, | ||||
|                                   uint8* dst_argb, int dst_width) { | ||||
|   intptr_t src_stepx_x4 = (intptr_t)(src_stepx); | ||||
|   intptr_t src_stepx_x12; | ||||
|   intptr_t src_stepx_x12 = 0; | ||||
|   intptr_t row1 = (intptr_t)(src_stride); | ||||
|   asm volatile ( | ||||
|     "lea       " MEMLEA3(0x00,1,4) ",%1        \n" | ||||
| @@ -1095,7 +862,7 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, | ||||
|     "+r"(src_stepx_x4),   // %1 | ||||
|     "+r"(dst_argb),       // %2 | ||||
|     "+rm"(dst_width),     // %3 | ||||
|     "=&r"(src_stepx_x12),  // %4 | ||||
|     "+r"(src_stepx_x12),  // %4 | ||||
|     "+r"(row1)            // %5 | ||||
|   :: "memory", "cc", NACL_R14 | ||||
|     "xmm0", "xmm1", "xmm2", "xmm3" | ||||
| @@ -1104,7 +871,7 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, | ||||
|  | ||||
| void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, | ||||
|                         int dst_width, int x, int dx) { | ||||
|   intptr_t x0, x1; | ||||
|   intptr_t x0 = 0, x1 = 0; | ||||
|   asm volatile ( | ||||
|     "movd      %5,%%xmm2                       \n" | ||||
|     "movd      %6,%%xmm3                       \n" | ||||
| @@ -1157,8 +924,8 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, | ||||
|     MEMOPREG(movd,0x00,3,0,4,xmm0)             //  movd      (%3,%0,4),%%xmm0 | ||||
|     "movd      %%xmm0," MEMACCESS(2) "         \n" | ||||
|   "99:                                         \n" | ||||
|   : "=&a"(x0),         // %0 | ||||
|     "=&d"(x1),         // %1 | ||||
|   : "+a"(x0),          // %0 | ||||
|     "+d"(x1),          // %1 | ||||
|     "+r"(dst_argb),    // %2 | ||||
|     "+r"(src_argb),    // %3 | ||||
|     "+r"(dst_width)    // %4 | ||||
| @@ -1209,7 +976,7 @@ static uvec8 kShuffleFractions = { | ||||
| // Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version | ||||
| void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, | ||||
|                                int dst_width, int x, int dx) { | ||||
|   intptr_t x0, x1; | ||||
|   intptr_t x0 = 0, x1 = 0; | ||||
|   asm volatile ( | ||||
|     "movdqa    %0,%%xmm4                       \n" | ||||
|     "movdqa    %1,%%xmm5                       \n" | ||||
| @@ -1272,8 +1039,8 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, | ||||
|   : "+r"(dst_argb),    // %0 | ||||
|     "+r"(src_argb),    // %1 | ||||
|     "+rm"(dst_width),  // %2 | ||||
|     "=&r"(x0),         // %3 | ||||
|     "=&r"(x1)          // %4 | ||||
|     "+r"(x0),          // %3 | ||||
|     "+r"(x1)           // %4 | ||||
|   : "rm"(x),           // %5 | ||||
|     "rm"(dx)           // %6 | ||||
|   : "memory", "cc", NACL_R14 | ||||
|   | ||||
							
								
								
									
										30
									
								
								third_party/libyuv/source/scale_mips.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										30
									
								
								third_party/libyuv/source/scale_mips.cc
									
									
									
									
										vendored
									
									
								
							| @@ -21,7 +21,7 @@ extern "C" { | ||||
|     defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \ | ||||
|     (_MIPS_SIM == _MIPS_SIM_ABI32) | ||||
|  | ||||
| void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                               uint8* dst, int dst_width) { | ||||
|   __asm__ __volatile__( | ||||
|     ".set push                                     \n" | ||||
| @@ -31,6 +31,7 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|     "beqz           $t9, 2f                        \n" | ||||
|     " nop                                          \n" | ||||
|  | ||||
|     ".p2align       2                              \n" | ||||
|   "1:                                              \n" | ||||
|     "lw             $t0, 0(%[src_ptr])             \n"  // |3|2|1|0| | ||||
|     "lw             $t1, 4(%[src_ptr])             \n"  // |7|6|5|4| | ||||
| @@ -77,7 +78,7 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                  uint8* dst, int dst_width) { | ||||
|   const uint8* t = src_ptr + src_stride; | ||||
|  | ||||
| @@ -89,6 +90,7 @@ void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|     "bltz           $t9, 2f                       \n" | ||||
|     " nop                                         \n" | ||||
|  | ||||
|     ".p2align       2                             \n" | ||||
|   "1:                                             \n" | ||||
|     "lw             $t0, 0(%[src_ptr])            \n"  // |3|2|1|0| | ||||
|     "lw             $t1, 4(%[src_ptr])            \n"  // |7|6|5|4| | ||||
| @@ -176,7 +178,7 @@ void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                               uint8* dst, int dst_width) { | ||||
|   __asm__ __volatile__ ( | ||||
|       ".set push                                    \n" | ||||
| @@ -186,6 +188,7 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|       "beqz           $t9, 2f                       \n" | ||||
|       " nop                                         \n" | ||||
|  | ||||
|       ".p2align       2                             \n" | ||||
|      "1:                                            \n" | ||||
|       "lw             $t1, 0(%[src_ptr])            \n"  // |3|2|1|0| | ||||
|       "lw             $t2, 4(%[src_ptr])            \n"  // |7|6|5|4| | ||||
| @@ -231,7 +234,7 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                  uint8* dst, int dst_width) { | ||||
|   intptr_t stride = src_stride; | ||||
|   const uint8* s1 = src_ptr + stride; | ||||
| @@ -245,6 +248,7 @@ void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|       "srl           $t9, %[dst_width], 1         \n" | ||||
|       "andi          $t8, %[dst_width], 1         \n" | ||||
|  | ||||
|       ".p2align      2                            \n" | ||||
|      "1:                                          \n" | ||||
|       "lw            $t0, 0(%[src_ptr])           \n"  // |3|2|1|0| | ||||
|       "lw            $t1, 0(%[s1])                \n"  // |7|6|5|4| | ||||
| @@ -310,11 +314,12 @@ void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                uint8* dst, int dst_width) { | ||||
|   __asm__ __volatile__ ( | ||||
|       ".set push                                          \n" | ||||
|       ".set noreorder                                     \n" | ||||
|       ".p2align        2                                  \n" | ||||
|     "1:                                                   \n" | ||||
|       "lw              $t1, 0(%[src_ptr])                 \n"  // |3|2|1|0| | ||||
|       "lw              $t2, 4(%[src_ptr])                 \n"  // |7|6|5|4| | ||||
| @@ -356,13 +361,14 @@ void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                      uint8* d, int dst_width) { | ||||
|   __asm__ __volatile__ ( | ||||
|       ".set push                                         \n" | ||||
|       ".set noreorder                                    \n" | ||||
|       "repl.ph           $t3, 3                          \n"  // 0x00030003 | ||||
|  | ||||
|      ".p2align           2                               \n" | ||||
|     "1:                                                  \n" | ||||
|       "lw                $t0, 0(%[src_ptr])              \n"  // |S3|S2|S1|S0| | ||||
|       "lwx               $t1, %[src_stride](%[src_ptr])  \n"  // |T3|T2|T1|T0| | ||||
| @@ -412,13 +418,14 @@ void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                      uint8* d, int dst_width) { | ||||
|   __asm__ __volatile__ ( | ||||
|       ".set push                                           \n" | ||||
|       ".set noreorder                                      \n" | ||||
|       "repl.ph           $t2, 3                            \n"  // 0x00030003 | ||||
|  | ||||
|       ".p2align          2                                 \n" | ||||
|     "1:                                                    \n" | ||||
|       "lw                $t0, 0(%[src_ptr])                \n"  // |S3|S2|S1|S0| | ||||
|       "lwx               $t1, %[src_stride](%[src_ptr])    \n"  // |T3|T2|T1|T0| | ||||
| @@ -464,12 +471,13 @@ void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                uint8* dst, int dst_width) { | ||||
|   __asm__ __volatile__ ( | ||||
|       ".set push                                     \n" | ||||
|       ".set noreorder                                \n" | ||||
|  | ||||
|       ".p2align   2                                  \n" | ||||
|     "1:                                              \n" | ||||
|       "lw         $t0, 0(%[src_ptr])                 \n"  // |3|2|1|0| | ||||
|       "lw         $t1, 4(%[src_ptr])                 \n"  // |7|6|5|4| | ||||
| @@ -510,7 +518,7 @@ void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                      uint8* dst_ptr, int dst_width) { | ||||
|   intptr_t stride = src_stride; | ||||
|   const uint8* t = src_ptr + stride; | ||||
| @@ -520,6 +528,7 @@ void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|       ".set push                                         \n" | ||||
|       ".set noreorder                                    \n" | ||||
|  | ||||
|       ".p2align        2                                 \n" | ||||
|     "1:                                                  \n" | ||||
|       "lw              $t0, 0(%[src_ptr])                \n"  // |S3|S2|S1|S0| | ||||
|       "lw              $t1, 4(%[src_ptr])                \n"  // |S7|S6|S5|S4| | ||||
| @@ -563,7 +572,7 @@ void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   ); | ||||
| } | ||||
|  | ||||
| void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, | ||||
| void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr, | ||||
|                                      ptrdiff_t src_stride, | ||||
|                                      uint8* dst_ptr, int dst_width) { | ||||
|   intptr_t stride = src_stride; | ||||
| @@ -577,6 +586,7 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, | ||||
|       ".set push                                         \n" | ||||
|       ".set noreorder                                    \n" | ||||
|  | ||||
|       ".p2align        2                                 \n" | ||||
|     "1:                                                  \n" | ||||
|       "lw              $t0, 0(%[src_ptr])                \n"  // |S3|S2|S1|S0| | ||||
|       "lw              $t1, 4(%[src_ptr])                \n"  // |S7|S6|S5|S4| | ||||
|   | ||||
							
								
								
									
										36
									
								
								third_party/libyuv/source/scale_neon.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										36
									
								
								third_party/libyuv/source/scale_neon.cc
									
									
									
									
										vendored
									
									
								
							| @@ -26,6 +26,7 @@ extern "C" { | ||||
| void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                         uint8* dst, int dst_width) { | ||||
|   asm volatile ( | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     // load even pixels into q0, odd into q1 | ||||
|     MEMACCESS(0) | ||||
| @@ -46,6 +47,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                            uint8* dst, int dst_width) { | ||||
|   asm volatile ( | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     MEMACCESS(0) | ||||
|     "vld1.8     {q0, q1}, [%0]!                \n"  // load pixels and post inc | ||||
| @@ -71,6 +73,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   asm volatile ( | ||||
|     // change the stride to row 2 pointer | ||||
|     "add        %1, %0                         \n" | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     MEMACCESS(0) | ||||
|     "vld1.8     {q0, q1}, [%0]!                \n"  // load row 1 and post inc | ||||
| @@ -98,6 +101,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                         uint8* dst_ptr, int dst_width) { | ||||
|   asm volatile ( | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     MEMACCESS(0) | ||||
|     "vld4.8     {d0, d1, d2, d3}, [%0]!        \n" // src line 0 | ||||
| @@ -119,6 +123,7 @@ void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   const uint8* src_ptr2 = src_ptr + src_stride * 2; | ||||
|   const uint8* src_ptr3 = src_ptr + src_stride * 3; | ||||
| asm volatile ( | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     MEMACCESS(0) | ||||
|     "vld1.8     {q0}, [%0]!                    \n"   // load up 16x4 | ||||
| @@ -157,6 +162,7 @@ void ScaleRowDown34_NEON(const uint8* src_ptr, | ||||
|                          ptrdiff_t src_stride, | ||||
|                          uint8* dst_ptr, int dst_width) { | ||||
|   asm volatile ( | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     MEMACCESS(0) | ||||
|     "vld4.8     {d0, d1, d2, d3}, [%0]!      \n" // src line 0 | ||||
| @@ -179,6 +185,7 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, | ||||
|   asm volatile ( | ||||
|     "vmov.u8    d24, #3                        \n" | ||||
|     "add        %3, %0                         \n" | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     MEMACCESS(0) | ||||
|     "vld4.8       {d0, d1, d2, d3}, [%0]!      \n" // src line 0 | ||||
| @@ -238,6 +245,7 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, | ||||
|   asm volatile ( | ||||
|     "vmov.u8    d24, #3                        \n" | ||||
|     "add        %3, %0                         \n" | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     MEMACCESS(0) | ||||
|     "vld4.8       {d0, d1, d2, d3}, [%0]!      \n" // src line 0 | ||||
| @@ -292,6 +300,7 @@ void ScaleRowDown38_NEON(const uint8* src_ptr, | ||||
|   asm volatile ( | ||||
|     MEMACCESS(3) | ||||
|     "vld1.8     {q3}, [%3]                     \n" | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     MEMACCESS(0) | ||||
|     "vld1.8     {d0, d1, d2, d3}, [%0]!        \n" | ||||
| @@ -325,6 +334,7 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, | ||||
|     MEMACCESS(7) | ||||
|     "vld1.8     {q15}, [%7]                    \n" | ||||
|     "add        %3, %0                         \n" | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|  | ||||
|     // d0 = 00 40 01 41 02 42 03 43 | ||||
| @@ -440,6 +450,7 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, | ||||
|     MEMACCESS(5) | ||||
|     "vld1.8     {q14}, [%5]                    \n" | ||||
|     "add        %3, %0                         \n" | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|  | ||||
|     // d0 = 00 40 01 41 02 42 03 43 | ||||
| @@ -532,8 +543,9 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, | ||||
|  | ||||
| void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                     uint16* dst_ptr, int src_width, int src_height) { | ||||
|   const uint8* src_tmp; | ||||
|   const uint8* src_tmp = NULL; | ||||
|   asm volatile ( | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     "mov       %0, %1                          \n" | ||||
|     "mov       r12, %5                         \n" | ||||
| @@ -552,7 +564,7 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|     "add        %1, %1, #16                    \n" | ||||
|     "subs       %4, %4, #16                    \n"  // 16 processed per loop | ||||
|     "bgt        1b                             \n" | ||||
|   : "=&r"(src_tmp),    // %0 | ||||
|   : "+r"(src_tmp),          // %0 | ||||
|     "+r"(src_ptr),          // %1 | ||||
|     "+r"(dst_ptr),          // %2 | ||||
|     "+r"(src_stride),       // %3 | ||||
| @@ -572,16 +584,13 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|     MEMACCESS(6)                                               \ | ||||
|     "vld2.8     {d6["#n"], d7["#n"]}, [%6]     \n" | ||||
|  | ||||
| // The NEON version mimics this formula: | ||||
| // #define BLENDER(a, b, f) (uint8)((int)(a) + | ||||
| //    ((int)(f) * ((int)(b) - (int)(a)) >> 16)) | ||||
|  | ||||
| void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, | ||||
|                           int dst_width, int x, int dx) { | ||||
|   int dx_offset[4] = {0, 1, 2, 3}; | ||||
|   int* tmp = dx_offset; | ||||
|   const uint8* src_tmp = src_ptr; | ||||
|   asm volatile ( | ||||
|     ".p2align   2                              \n" | ||||
|     "vdup.32    q0, %3                         \n"  // x | ||||
|     "vdup.32    q1, %4                         \n"  // dx | ||||
|     "vld1.32    {q2}, [%5]                     \n"  // 0 1 2 3 | ||||
| @@ -612,8 +621,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, | ||||
|     "vmovl.u16  q10, d21                       \n" | ||||
|     "vmul.s32   q11, q11, q13                  \n" | ||||
|     "vmul.s32   q12, q12, q10                  \n" | ||||
|     "vrshrn.s32  d18, q11, #16                 \n" | ||||
|     "vrshrn.s32  d19, q12, #16                 \n" | ||||
|     "vshrn.s32  d18, q11, #16                  \n" | ||||
|     "vshrn.s32  d19, q12, #16                  \n" | ||||
|     "vadd.s16   q8, q8, q9                     \n" | ||||
|     "vmovn.s16  d6, q8                         \n" | ||||
|  | ||||
| @@ -740,6 +749,7 @@ void ScaleFilterRows_NEON(uint8* dst_ptr, | ||||
| void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                             uint8* dst, int dst_width) { | ||||
|   asm volatile ( | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     // load even pixels into q0, odd into q1 | ||||
|     MEMACCESS(0) | ||||
| @@ -763,6 +773,7 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride, | ||||
|                                   uint8* dst_argb, int dst_width) { | ||||
|   asm volatile ( | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     MEMACCESS(0) | ||||
|     "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels. | ||||
| @@ -793,6 +804,7 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   asm volatile ( | ||||
|     // change the stride to row 2 pointer | ||||
|     "add        %1, %1, %0                     \n" | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     MEMACCESS(0) | ||||
|     "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels. | ||||
| @@ -833,6 +845,7 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb,  ptrdiff_t src_stride, | ||||
|                                int src_stepx, uint8* dst_argb, int dst_width) { | ||||
|   asm volatile ( | ||||
|     "mov        r12, %3, lsl #2                \n" | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     MEMACCESS(0) | ||||
|     "vld1.32    {d0[0]}, [%0], r12             \n" | ||||
| @@ -862,6 +875,7 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, | ||||
|   asm volatile ( | ||||
|     "mov        r12, %4, lsl #2                \n" | ||||
|     "add        %1, %1, %0                     \n" | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     MEMACCESS(0) | ||||
|     "vld1.8     {d0}, [%0], r12                \n"  // Read 4 2x2 blocks -> 2x1 | ||||
| @@ -913,9 +927,10 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, | ||||
|  | ||||
| void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb, | ||||
|                         int dst_width, int x, int dx) { | ||||
|   int tmp; | ||||
|   int tmp = 0; | ||||
|   const uint8* src_tmp = src_argb; | ||||
|   asm volatile ( | ||||
|     ".p2align   2                              \n" | ||||
|   "1:                                          \n" | ||||
|     LOAD1_DATA32_LANE(d0, 0) | ||||
|     LOAD1_DATA32_LANE(d0, 1) | ||||
| @@ -935,7 +950,7 @@ void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb, | ||||
|     "+r"(dst_width),        // %2 | ||||
|     "+r"(x),                // %3 | ||||
|     "+r"(dx),               // %4 | ||||
|     "=&r"(tmp),       // %5 | ||||
|     "+r"(tmp),              // %5 | ||||
|     "+r"(src_tmp)           // %6 | ||||
|   : | ||||
|   : "memory", "cc", "q0", "q1" | ||||
| @@ -959,6 +974,7 @@ void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb, | ||||
|   int* tmp = dx_offset; | ||||
|   const uint8* src_tmp = src_argb; | ||||
|   asm volatile ( | ||||
|     ".p2align   2                              \n" | ||||
|     "vdup.32    q0, %3                         \n"  // x | ||||
|     "vdup.32    q1, %4                         \n"  // dx | ||||
|     "vld1.32    {q2}, [%5]                     \n"  // 0 1 2 3 | ||||
|   | ||||
							
								
								
									
										12
									
								
								third_party/libyuv/source/scale_neon64.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								third_party/libyuv/source/scale_neon64.cc
									
									
									
									
										vendored
									
									
								
							| @@ -547,7 +547,7 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, | ||||
|  | ||||
| void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                     uint16* dst_ptr, int src_width, int src_height) { | ||||
|   const uint8* src_tmp; | ||||
|   const uint8* src_tmp = NULL; | ||||
|   asm volatile ( | ||||
|   "1:                                          \n" | ||||
|     "mov       %0, %1                          \n" | ||||
| @@ -567,7 +567,7 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|     "add      %1, %1, #16                      \n" | ||||
|     "subs     %w4, %w4, #16                    \n"  // 16 processed per loop | ||||
|     "b.gt     1b                               \n" | ||||
|   : "=&r"(src_tmp),    // %0 | ||||
|   : "+r"(src_tmp),          // %0 | ||||
|     "+r"(src_ptr),          // %1 | ||||
|     "+r"(dst_ptr),          // %2 | ||||
|     "+r"(src_stride),       // %3 | ||||
| @@ -626,8 +626,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, | ||||
|     "ushll2    v6.4s, v6.8h, #0                \n" | ||||
|     "mul       v16.4s, v16.4s, v7.4s           \n" | ||||
|     "mul       v17.4s, v17.4s, v6.4s           \n" | ||||
|     "rshrn      v6.4h, v16.4s, #16             \n" | ||||
|     "rshrn2     v6.8h, v17.4s, #16             \n" | ||||
|     "shrn      v6.4h, v16.4s, #16              \n" | ||||
|     "shrn2     v6.8h, v17.4s, #16              \n" | ||||
|     "add       v4.8h, v4.8h, v6.8h             \n" | ||||
|     "xtn       v4.8b, v4.8h                    \n" | ||||
|  | ||||
| @@ -931,7 +931,7 @@ void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb, | ||||
|   int64 dst_width64 = (int64) dst_width;  // Work around ios 64 bit warning. | ||||
|   int64 x64 = (int64) x; | ||||
|   int64 dx64 = (int64) dx; | ||||
|   int64 tmp64; | ||||
|   int64 tmp64 = 0; | ||||
|   asm volatile ( | ||||
|   "1:                                          \n" | ||||
|     LOAD1_DATA32_LANE(v0, 0) | ||||
| @@ -952,7 +952,7 @@ void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb, | ||||
|     "+r"(dst_width64),      // %2 | ||||
|     "+r"(x64),              // %3 | ||||
|     "+r"(dx64),             // %4 | ||||
|     "=&r"(tmp64),       // %5 | ||||
|     "+r"(tmp64),            // %5 | ||||
|     "+r"(src_tmp)           // %6 | ||||
|   : | ||||
|   : "memory", "cc", "v0", "v1" | ||||
|   | ||||
							
								
								
									
										276
									
								
								third_party/libyuv/source/scale_win.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										276
									
								
								third_party/libyuv/source/scale_win.cc
									
									
									
									
										vendored
									
									
								
							| @@ -16,8 +16,9 @@ namespace libyuv { | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| // This module is for 32 bit Visual C x86 and clangcl | ||||
| #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) | ||||
| // This module is for Visual C x86. | ||||
| #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ | ||||
|     defined(_MSC_VER) && !defined(__clang__) | ||||
|  | ||||
| // Offsets for source bytes 0 to 9 | ||||
| static uvec8 kShuf0 = | ||||
| @@ -95,7 +96,7 @@ static uvec16 kScaleAb2 = | ||||
|  | ||||
| // Reads 32 pixels, throws half away and writes 16 pixels. | ||||
| __declspec(naked) | ||||
| void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                         uint8* dst_ptr, int dst_width) { | ||||
|   __asm { | ||||
|     mov        eax, [esp + 4]        // src_ptr | ||||
| @@ -121,28 +122,31 @@ void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|  | ||||
| // Blends 32x1 rectangle to 16x1. | ||||
| __declspec(naked) | ||||
| void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                               uint8* dst_ptr, int dst_width) { | ||||
|   __asm { | ||||
|     mov        eax, [esp + 4]        // src_ptr | ||||
|                                      // src_stride | ||||
|     mov        edx, [esp + 12]       // dst_ptr | ||||
|     mov        ecx, [esp + 16]       // dst_width | ||||
|  | ||||
|     pcmpeqb    xmm4, xmm4            // constant 0x0101 | ||||
|     psrlw      xmm4, 15 | ||||
|     packuswb   xmm4, xmm4 | ||||
|     pxor       xmm5, xmm5            // constant 0 | ||||
|     pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff | ||||
|     psrlw      xmm5, 8 | ||||
|  | ||||
|   wloop: | ||||
|     movdqu     xmm0, [eax] | ||||
|     movdqu     xmm1, [eax + 16] | ||||
|     lea        eax,  [eax + 32] | ||||
|     pmaddubsw  xmm0, xmm4      // horizontal add | ||||
|     pmaddubsw  xmm1, xmm4 | ||||
|     pavgw      xmm0, xmm5      // (x + 1) / 2 | ||||
|     pavgw      xmm1, xmm5 | ||||
|  | ||||
|     movdqa     xmm2, xmm0            // average columns (32 to 16 pixels) | ||||
|     psrlw      xmm0, 8 | ||||
|     movdqa     xmm3, xmm1 | ||||
|     psrlw      xmm1, 8 | ||||
|     pand       xmm2, xmm5 | ||||
|     pand       xmm3, xmm5 | ||||
|     pavgw      xmm0, xmm2 | ||||
|     pavgw      xmm1, xmm3 | ||||
|     packuswb   xmm0, xmm1 | ||||
|  | ||||
|     movdqu     [edx], xmm0 | ||||
|     lea        edx, [edx + 16] | ||||
|     sub        ecx, 16 | ||||
| @@ -154,7 +158,7 @@ void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|  | ||||
| // Blends 32x2 rectangle to 16x1. | ||||
| __declspec(naked) | ||||
| void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                            uint8* dst_ptr, int dst_width) { | ||||
|   __asm { | ||||
|     push       esi | ||||
| @@ -162,11 +166,8 @@ void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|     mov        esi, [esp + 4 + 8]    // src_stride | ||||
|     mov        edx, [esp + 4 + 12]   // dst_ptr | ||||
|     mov        ecx, [esp + 4 + 16]   // dst_width | ||||
|  | ||||
|     pcmpeqb    xmm4, xmm4            // constant 0x0101 | ||||
|     psrlw      xmm4, 15 | ||||
|     packuswb   xmm4, xmm4 | ||||
|     pxor       xmm5, xmm5            // constant 0 | ||||
|     pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff | ||||
|     psrlw      xmm5, 8 | ||||
|  | ||||
|   wloop: | ||||
|     movdqu     xmm0, [eax] | ||||
| @@ -174,17 +175,19 @@ void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|     movdqu     xmm2, [eax + esi] | ||||
|     movdqu     xmm3, [eax + esi + 16] | ||||
|     lea        eax,  [eax + 32] | ||||
|     pmaddubsw  xmm0, xmm4      // horizontal add | ||||
|     pmaddubsw  xmm1, xmm4 | ||||
|     pmaddubsw  xmm2, xmm4 | ||||
|     pmaddubsw  xmm3, xmm4 | ||||
|     paddw      xmm0, xmm2      // vertical add | ||||
|     paddw      xmm1, xmm3 | ||||
|     psrlw      xmm0, 1 | ||||
|     psrlw      xmm1, 1 | ||||
|     pavgw      xmm0, xmm5      // (x + 1) / 2 | ||||
|     pavgw      xmm1, xmm5 | ||||
|     pavgb      xmm0, xmm2            // average rows | ||||
|     pavgb      xmm1, xmm3 | ||||
|  | ||||
|     movdqa     xmm2, xmm0            // average columns (32 to 16 pixels) | ||||
|     psrlw      xmm0, 8 | ||||
|     movdqa     xmm3, xmm1 | ||||
|     psrlw      xmm1, 8 | ||||
|     pand       xmm2, xmm5 | ||||
|     pand       xmm3, xmm5 | ||||
|     pavgw      xmm0, xmm2 | ||||
|     pavgw      xmm1, xmm3 | ||||
|     packuswb   xmm0, xmm1 | ||||
|  | ||||
|     movdqu     [edx], xmm0 | ||||
|     lea        edx, [edx + 16] | ||||
|     sub        ecx, 16 | ||||
| @@ -243,12 +246,14 @@ void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|     vmovdqu     ymm0, [eax] | ||||
|     vmovdqu     ymm1, [eax + 32] | ||||
|     lea         eax,  [eax + 64] | ||||
|     vpmaddubsw  ymm0, ymm0, ymm4      // horizontal add | ||||
|  | ||||
|     vpmaddubsw  ymm0, ymm0, ymm4      // average horizontally | ||||
|     vpmaddubsw  ymm1, ymm1, ymm4 | ||||
|     vpavgw      ymm0, ymm0, ymm5      // (x + 1) / 2 | ||||
|     vpavgw      ymm1, ymm1, ymm5 | ||||
|     vpackuswb   ymm0, ymm0, ymm1 | ||||
|     vpermq      ymm0, ymm0, 0xd8      // unmutate vpackuswb | ||||
|  | ||||
|     vmovdqu     [edx], ymm0 | ||||
|     lea         edx, [edx + 32] | ||||
|     sub         ecx, 32 | ||||
| @@ -259,8 +264,6 @@ void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|   } | ||||
| } | ||||
|  | ||||
| // For rounding, average = (sum + 2) / 4 | ||||
| // becomes average((sum >> 1), 0) | ||||
| // Blends 64x2 rectangle to 32x1. | ||||
| __declspec(naked) | ||||
| void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| @@ -278,23 +281,19 @@ void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|     vpxor       ymm5, ymm5, ymm5      // constant 0 | ||||
|  | ||||
|   wloop: | ||||
|     vmovdqu     ymm0, [eax] | ||||
|     vmovdqu     ymm0, [eax]           // average rows | ||||
|     vmovdqu     ymm1, [eax + 32] | ||||
|     vmovdqu     ymm2, [eax + esi] | ||||
|     vmovdqu     ymm3, [eax + esi + 32] | ||||
|     vpavgb      ymm0, ymm0, [eax + esi] | ||||
|     vpavgb      ymm1, ymm1, [eax + esi + 32] | ||||
|     lea         eax,  [eax + 64] | ||||
|     vpmaddubsw  ymm0, ymm0, ymm4      // horizontal add | ||||
|  | ||||
|     vpmaddubsw  ymm0, ymm0, ymm4      // average horizontally | ||||
|     vpmaddubsw  ymm1, ymm1, ymm4 | ||||
|     vpmaddubsw  ymm2, ymm2, ymm4 | ||||
|     vpmaddubsw  ymm3, ymm3, ymm4 | ||||
|     vpaddw      ymm0, ymm0, ymm2      // vertical add | ||||
|     vpaddw      ymm1, ymm1, ymm3 | ||||
|     vpsrlw      ymm0, ymm0, 1         // (x + 2) / 4 = (x / 2 + 1) / 2 | ||||
|     vpsrlw      ymm1, ymm1, 1 | ||||
|     vpavgw      ymm0, ymm0, ymm5      // (x + 1) / 2 | ||||
|     vpavgw      ymm1, ymm1, ymm5 | ||||
|     vpackuswb   ymm0, ymm0, ymm1 | ||||
|     vpermq      ymm0, ymm0, 0xd8      // unmutate vpackuswb | ||||
|  | ||||
|     vmovdqu     [edx], ymm0 | ||||
|     lea         edx, [edx + 32] | ||||
|     sub         ecx, 32 | ||||
| @@ -309,7 +308,7 @@ void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|  | ||||
| // Point samples 32 pixels to 8 pixels. | ||||
| __declspec(naked) | ||||
| void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                         uint8* dst_ptr, int dst_width) { | ||||
|   __asm { | ||||
|     mov        eax, [esp + 4]        // src_ptr | ||||
| @@ -340,7 +339,7 @@ void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|  | ||||
| // Blends 32x4 rectangle to 8x1. | ||||
| __declspec(naked) | ||||
| void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
| void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                            uint8* dst_ptr, int dst_width) { | ||||
|   __asm { | ||||
|     push       esi | ||||
| @@ -350,40 +349,42 @@ void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|     mov        edx, [esp + 8 + 12]   // dst_ptr | ||||
|     mov        ecx, [esp + 8 + 16]   // dst_width | ||||
|     lea        edi, [esi + esi * 2]  // src_stride * 3 | ||||
|     pcmpeqb    xmm4, xmm4            // constant 0x0101 | ||||
|     psrlw      xmm4, 15 | ||||
|     movdqa     xmm5, xmm4 | ||||
|     packuswb   xmm4, xmm4 | ||||
|     psllw      xmm5, 3               // constant 0x0008 | ||||
|     pcmpeqb    xmm7, xmm7            // generate mask 0x00ff00ff | ||||
|     psrlw      xmm7, 8 | ||||
|  | ||||
|   wloop: | ||||
|     movdqu     xmm0, [eax]           // average rows | ||||
|     movdqu     xmm1, [eax + 16] | ||||
|     movdqu     xmm2, [eax + esi] | ||||
|     movdqu     xmm3, [eax + esi + 16] | ||||
|     pmaddubsw  xmm0, xmm4      // horizontal add | ||||
|     pmaddubsw  xmm1, xmm4 | ||||
|     pmaddubsw  xmm2, xmm4 | ||||
|     pmaddubsw  xmm3, xmm4 | ||||
|     paddw      xmm0, xmm2      // vertical add rows 0, 1 | ||||
|     paddw      xmm1, xmm3 | ||||
|     pavgb      xmm0, xmm2 | ||||
|     pavgb      xmm1, xmm3 | ||||
|     movdqu     xmm2, [eax + esi * 2] | ||||
|     movdqu     xmm3, [eax + esi * 2 + 16] | ||||
|     pmaddubsw  xmm2, xmm4 | ||||
|     pmaddubsw  xmm3, xmm4 | ||||
|     paddw      xmm0, xmm2      // add row 2 | ||||
|     paddw      xmm1, xmm3 | ||||
|     movdqu     xmm2, [eax + edi] | ||||
|     movdqu     xmm3, [eax + edi + 16] | ||||
|     movdqu     xmm4, [eax + edi] | ||||
|     movdqu     xmm5, [eax + edi + 16] | ||||
|     lea        eax, [eax + 32] | ||||
|     pmaddubsw  xmm2, xmm4 | ||||
|     pmaddubsw  xmm3, xmm4 | ||||
|     paddw      xmm0, xmm2      // add row 3 | ||||
|     paddw      xmm1, xmm3 | ||||
|     phaddw     xmm0, xmm1 | ||||
|     paddw      xmm0, xmm5      // + 8 for round | ||||
|     psrlw      xmm0, 4         // /16 for average of 4 * 4 | ||||
|     pavgb      xmm2, xmm4 | ||||
|     pavgb      xmm3, xmm5 | ||||
|     pavgb      xmm0, xmm2 | ||||
|     pavgb      xmm1, xmm3 | ||||
|  | ||||
|     movdqa     xmm2, xmm0            // average columns (32 to 16 pixels) | ||||
|     psrlw      xmm0, 8 | ||||
|     movdqa     xmm3, xmm1 | ||||
|     psrlw      xmm1, 8 | ||||
|     pand       xmm2, xmm7 | ||||
|     pand       xmm3, xmm7 | ||||
|     pavgw      xmm0, xmm2 | ||||
|     pavgw      xmm1, xmm3 | ||||
|     packuswb   xmm0, xmm1 | ||||
|  | ||||
|     movdqa     xmm2, xmm0            // average columns (16 to 8 pixels) | ||||
|     psrlw      xmm0, 8 | ||||
|     pand       xmm2, xmm7 | ||||
|     pavgw      xmm0, xmm2 | ||||
|     packuswb   xmm0, xmm0 | ||||
|  | ||||
|     movq       qword ptr [edx], xmm0 | ||||
|     lea        edx, [edx + 8] | ||||
|     sub        ecx, 8 | ||||
| @@ -442,41 +443,37 @@ void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|     mov         edx, [esp + 8 + 12]   // dst_ptr | ||||
|     mov         ecx, [esp + 8 + 16]   // dst_width | ||||
|     lea         edi, [esi + esi * 2]  // src_stride * 3 | ||||
|     vpcmpeqb    ymm4, ymm4, ymm4            // constant 0x0101 | ||||
|     vpsrlw      ymm4, ymm4, 15 | ||||
|     vpsllw      ymm5, ymm4, 3               // constant 0x0008 | ||||
|     vpackuswb   ymm4, ymm4, ymm4 | ||||
|     vpcmpeqb    ymm7, ymm7, ymm7      // generate mask 0x00ff00ff | ||||
|     vpsrlw      ymm7, ymm7, 8 | ||||
|  | ||||
|   wloop: | ||||
|     vmovdqu     ymm0, [eax]           // average rows | ||||
|     vmovdqu     ymm1, [eax + 32] | ||||
|     vmovdqu     ymm2, [eax + esi] | ||||
|     vmovdqu     ymm3, [eax + esi + 32] | ||||
|     vpmaddubsw  ymm0, ymm0, ymm4      // horizontal add | ||||
|     vpmaddubsw  ymm1, ymm1, ymm4 | ||||
|     vpmaddubsw  ymm2, ymm2, ymm4 | ||||
|     vpmaddubsw  ymm3, ymm3, ymm4 | ||||
|     vpaddw      ymm0, ymm0, ymm2      // vertical add rows 0, 1 | ||||
|     vpaddw      ymm1, ymm1, ymm3 | ||||
|     vpavgb      ymm0, ymm0, [eax + esi] | ||||
|     vpavgb      ymm1, ymm1, [eax + esi + 32] | ||||
|     vmovdqu     ymm2, [eax + esi * 2] | ||||
|     vmovdqu     ymm3, [eax + esi * 2 + 32] | ||||
|     vpmaddubsw  ymm2, ymm2, ymm4 | ||||
|     vpmaddubsw  ymm3, ymm3, ymm4 | ||||
|     vpaddw      ymm0, ymm0, ymm2      // add row 2 | ||||
|     vpaddw      ymm1, ymm1, ymm3 | ||||
|     vmovdqu     ymm2, [eax + edi] | ||||
|     vmovdqu     ymm3, [eax + edi + 32] | ||||
|     vpavgb      ymm2, ymm2, [eax + edi] | ||||
|     vpavgb      ymm3, ymm3, [eax + edi + 32] | ||||
|     lea         eax, [eax + 64] | ||||
|     vpmaddubsw  ymm2, ymm2, ymm4 | ||||
|     vpmaddubsw  ymm3, ymm3, ymm4 | ||||
|     vpaddw      ymm0, ymm0, ymm2      // add row 3 | ||||
|     vpaddw      ymm1, ymm1, ymm3 | ||||
|     vphaddw     ymm0, ymm0, ymm1      // mutates | ||||
|     vpermq      ymm0, ymm0, 0xd8      // unmutate vphaddw | ||||
|     vpaddw      ymm0, ymm0, ymm5      // + 8 for round | ||||
|     vpsrlw      ymm0, ymm0, 4         // /32 for average of 4 * 4 | ||||
|     vpavgb      ymm0, ymm0, ymm2 | ||||
|     vpavgb      ymm1, ymm1, ymm3 | ||||
|  | ||||
|     vpand       ymm2, ymm0, ymm7      // average columns (64 to 32 pixels) | ||||
|     vpand       ymm3, ymm1, ymm7 | ||||
|     vpsrlw      ymm0, ymm0, 8 | ||||
|     vpsrlw      ymm1, ymm1, 8 | ||||
|     vpavgw      ymm0, ymm0, ymm2 | ||||
|     vpavgw      ymm1, ymm1, ymm3 | ||||
|     vpackuswb   ymm0, ymm0, ymm1 | ||||
|     vpermq      ymm0, ymm0, 0xd8      // unmutate vpackuswb | ||||
|  | ||||
|     vpand       ymm2, ymm0, ymm7      // average columns (32 to 16 pixels) | ||||
|     vpsrlw      ymm0, ymm0, 8 | ||||
|     vpavgw      ymm0, ymm0, ymm2 | ||||
|     vpackuswb   ymm0, ymm0, ymm0 | ||||
|     vpermq      ymm0, ymm0, 0xd8      // unmutate vpackuswb | ||||
|  | ||||
|     vmovdqu     [edx], xmm0 | ||||
|     lea         edx, [edx + 16] | ||||
|     sub         ecx, 16 | ||||
| @@ -502,9 +499,9 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                      // src_stride ignored | ||||
|     mov        edx, [esp + 12]       // dst_ptr | ||||
|     mov        ecx, [esp + 16]       // dst_width | ||||
|     movdqa     xmm3, xmmword ptr kShuf0 | ||||
|     movdqa     xmm4, xmmword ptr kShuf1 | ||||
|     movdqa     xmm5, xmmword ptr kShuf2 | ||||
|     movdqa     xmm3, kShuf0 | ||||
|     movdqa     xmm4, kShuf1 | ||||
|     movdqa     xmm5, kShuf2 | ||||
|  | ||||
|   wloop: | ||||
|     movdqu     xmm0, [eax] | ||||
| @@ -551,12 +548,12 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, | ||||
|     mov        esi, [esp + 4 + 8]    // src_stride | ||||
|     mov        edx, [esp + 4 + 12]   // dst_ptr | ||||
|     mov        ecx, [esp + 4 + 16]   // dst_width | ||||
|     movdqa     xmm2, xmmword ptr kShuf01 | ||||
|     movdqa     xmm3, xmmword ptr kShuf11 | ||||
|     movdqa     xmm4, xmmword ptr kShuf21 | ||||
|     movdqa     xmm5, xmmword ptr kMadd01 | ||||
|     movdqa     xmm6, xmmword ptr kMadd11 | ||||
|     movdqa     xmm7, xmmword ptr kRound34 | ||||
|     movdqa     xmm2, kShuf01 | ||||
|     movdqa     xmm3, kShuf11 | ||||
|     movdqa     xmm4, kShuf21 | ||||
|     movdqa     xmm5, kMadd01 | ||||
|     movdqa     xmm6, kMadd11 | ||||
|     movdqa     xmm7, kRound34 | ||||
|  | ||||
|   wloop: | ||||
|     movdqu     xmm0, [eax]           // pixels 0..7 | ||||
| @@ -582,7 +579,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, | ||||
|     lea        eax, [eax + 32] | ||||
|     pavgb      xmm0, xmm1 | ||||
|     pshufb     xmm0, xmm4 | ||||
|     movdqa     xmm1, xmmword ptr kMadd21 | ||||
|     movdqa     xmm1, kMadd21 | ||||
|     pmaddubsw  xmm0, xmm1 | ||||
|     paddsw     xmm0, xmm7 | ||||
|     psrlw      xmm0, 2 | ||||
| @@ -608,12 +605,12 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, | ||||
|     mov        esi, [esp + 4 + 8]    // src_stride | ||||
|     mov        edx, [esp + 4 + 12]   // dst_ptr | ||||
|     mov        ecx, [esp + 4 + 16]   // dst_width | ||||
|     movdqa     xmm2, xmmword ptr kShuf01 | ||||
|     movdqa     xmm3, xmmword ptr kShuf11 | ||||
|     movdqa     xmm4, xmmword ptr kShuf21 | ||||
|     movdqa     xmm5, xmmword ptr kMadd01 | ||||
|     movdqa     xmm6, xmmword ptr kMadd11 | ||||
|     movdqa     xmm7, xmmword ptr kRound34 | ||||
|     movdqa     xmm2, kShuf01 | ||||
|     movdqa     xmm3, kShuf11 | ||||
|     movdqa     xmm4, kShuf21 | ||||
|     movdqa     xmm5, kMadd01 | ||||
|     movdqa     xmm6, kMadd11 | ||||
|     movdqa     xmm7, kRound34 | ||||
|  | ||||
|   wloop: | ||||
|     movdqu     xmm0, [eax]           // pixels 0..7 | ||||
| @@ -642,7 +639,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, | ||||
|     pavgb      xmm1, xmm0 | ||||
|     pavgb      xmm0, xmm1 | ||||
|     pshufb     xmm0, xmm4 | ||||
|     movdqa     xmm1, xmmword ptr kMadd21 | ||||
|     movdqa     xmm1, kMadd21 | ||||
|     pmaddubsw  xmm0, xmm1 | ||||
|     paddsw     xmm0, xmm7 | ||||
|     psrlw      xmm0, 2 | ||||
| @@ -668,8 +665,8 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, | ||||
|                                      // src_stride ignored | ||||
|     mov        edx, [esp + 12]       // dst_ptr | ||||
|     mov        ecx, [esp + 16]       // dst_width | ||||
|     movdqa     xmm4, xmmword ptr kShuf38a | ||||
|     movdqa     xmm5, xmmword ptr kShuf38b | ||||
|     movdqa     xmm4, kShuf38a | ||||
|     movdqa     xmm5, kShuf38b | ||||
|  | ||||
|   xloop: | ||||
|     movdqu     xmm0, [eax]           // 16 pixels -> 0,1,2,3,4,5 | ||||
| @@ -701,9 +698,9 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, | ||||
|     mov        esi, [esp + 4 + 8]    // src_stride | ||||
|     mov        edx, [esp + 4 + 12]   // dst_ptr | ||||
|     mov        ecx, [esp + 4 + 16]   // dst_width | ||||
|     movdqa     xmm2, xmmword ptr kShufAc | ||||
|     movdqa     xmm3, xmmword ptr kShufAc3 | ||||
|     movdqa     xmm4, xmmword ptr kScaleAc33 | ||||
|     movdqa     xmm2, kShufAc | ||||
|     movdqa     xmm3, kShufAc3 | ||||
|     movdqa     xmm4, kScaleAc33 | ||||
|     pxor       xmm5, xmm5 | ||||
|  | ||||
|   xloop: | ||||
| @@ -766,10 +763,10 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, | ||||
|     mov        esi, [esp + 4 + 8]    // src_stride | ||||
|     mov        edx, [esp + 4 + 12]   // dst_ptr | ||||
|     mov        ecx, [esp + 4 + 16]   // dst_width | ||||
|     movdqa     xmm2, xmmword ptr kShufAb0 | ||||
|     movdqa     xmm3, xmmword ptr kShufAb1 | ||||
|     movdqa     xmm4, xmmword ptr kShufAb2 | ||||
|     movdqa     xmm5, xmmword ptr kScaleAb2 | ||||
|     movdqa     xmm2, kShufAb0 | ||||
|     movdqa     xmm3, kShufAb1 | ||||
|     movdqa     xmm4, kShufAb2 | ||||
|     movdqa     xmm5, kScaleAb2 | ||||
|  | ||||
|   xloop: | ||||
|     movdqu     xmm0, [eax]           // average 2 rows into xmm0 | ||||
| @@ -860,16 +857,6 @@ void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { | ||||
| } | ||||
| #endif  // HAS_SCALEADDROW_AVX2 | ||||
|  | ||||
| // Constant for making pixels signed to avoid pmaddubsw | ||||
| // saturation. | ||||
| static uvec8 kFsub80 = | ||||
|   { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, | ||||
|     0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; | ||||
|  | ||||
| // Constant for making pixels unsigned and adding .5 for rounding. | ||||
| static uvec16 kFadd40 = | ||||
|   { 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040 }; | ||||
|  | ||||
| // Bilinear column filtering. SSSE3 version. | ||||
| __declspec(naked) | ||||
| void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, | ||||
| @@ -887,8 +874,6 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, | ||||
|     movd       xmm5, eax | ||||
|     pcmpeqb    xmm6, xmm6           // generate 0x007f for inverting fraction. | ||||
|     psrlw      xmm6, 9 | ||||
|     pcmpeqb    xmm7, xmm7           // generate 0x0001 | ||||
|     psrlw      xmm7, 15 | ||||
|     pextrw     eax, xmm2, 1         // get x0 integer. preroll | ||||
|     sub        ecx, 2 | ||||
|     jl         xloop29 | ||||
| @@ -911,22 +896,20 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, | ||||
|     movd       xmm4, ebx | ||||
|     pshufb     xmm1, xmm5           // 0011 | ||||
|     punpcklwd  xmm0, xmm4 | ||||
|     psubb      xmm0, xmmword ptr kFsub80  // make pixels signed. | ||||
|     pxor       xmm1, xmm6           // 0..7f and 7f..0 | ||||
|     paddusb    xmm1, xmm7           // +1 so 0..7f and 80..1 | ||||
|     pmaddubsw  xmm1, xmm0           // 16 bit, 2 pixels. | ||||
|     pmaddubsw  xmm0, xmm1           // 16 bit, 2 pixels. | ||||
|     pextrw     eax, xmm2, 1         // get x0 integer. next iteration. | ||||
|     pextrw     edx, xmm2, 3         // get x1 integer. next iteration. | ||||
|     paddw      xmm1, xmmword ptr kFadd40  // make pixels unsigned and round. | ||||
|     psrlw      xmm1, 7              // 8.7 fixed point to low 8 bits. | ||||
|     packuswb   xmm1, xmm1           // 8 bits, 2 pixels. | ||||
|     movd       ebx, xmm1 | ||||
|     psrlw      xmm0, 7              // 8.7 fixed point to low 8 bits. | ||||
|     packuswb   xmm0, xmm0           // 8 bits, 2 pixels. | ||||
|     movd       ebx, xmm0 | ||||
|     mov        [edi], bx | ||||
|     lea        edi, [edi + 2] | ||||
|     sub        ecx, 2               // 2 pixels | ||||
|     jge        xloop2 | ||||
|  | ||||
|  xloop29: | ||||
|  | ||||
|     add        ecx, 2 - 1 | ||||
|     jl         xloop99 | ||||
|  | ||||
| @@ -935,14 +918,11 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, | ||||
|     movd       xmm0, ebx | ||||
|     psrlw      xmm2, 9              // 7 bit fractions. | ||||
|     pshufb     xmm2, xmm5           // 0011 | ||||
|     psubb      xmm0, xmmword ptr kFsub80  // make pixels signed. | ||||
|     pxor       xmm2, xmm6           // 0..7f and 7f..0 | ||||
|     paddusb    xmm2, xmm7           // +1 so 0..7f and 80..1 | ||||
|     pmaddubsw  xmm2, xmm0           // 16 bit | ||||
|     paddw      xmm2, xmmword ptr kFadd40  // make pixels unsigned and round. | ||||
|     psrlw      xmm2, 7              // 8.7 fixed point to low 8 bits. | ||||
|     packuswb   xmm2, xmm2           // 8 bits | ||||
|     movd       ebx, xmm2 | ||||
|     pmaddubsw  xmm0, xmm2           // 16 bit | ||||
|     psrlw      xmm0, 7              // 8.7 fixed point to low 8 bits. | ||||
|     packuswb   xmm0, xmm0           // 8 bits | ||||
|     movd       ebx, xmm0 | ||||
|     mov        [edi], bl | ||||
|  | ||||
|  xloop99: | ||||
| @@ -1253,8 +1233,8 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, | ||||
|     mov        ecx, [esp + 8 + 12]   // dst_width | ||||
|     movd       xmm2, [esp + 8 + 16]  // x | ||||
|     movd       xmm3, [esp + 8 + 20]  // dx | ||||
|     movdqa     xmm4, xmmword ptr kShuffleColARGB | ||||
|     movdqa     xmm5, xmmword ptr kShuffleFractions | ||||
|     movdqa     xmm4, kShuffleColARGB | ||||
|     movdqa     xmm5, kShuffleFractions | ||||
|     pcmpeqb    xmm6, xmm6           // generate 0x007f for inverting fraction. | ||||
|     psrlw      xmm6, 9 | ||||
|     pextrw     eax, xmm2, 1         // get x0 integer. preroll | ||||
|   | ||||
							
								
								
									
										1
									
								
								third_party/libyuv/source/video_common.cc
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								third_party/libyuv/source/video_common.cc
									
									
									
									
										vendored
									
									
								
							| @@ -25,7 +25,6 @@ struct FourCCAliasEntry { | ||||
|  | ||||
| static const struct FourCCAliasEntry kFourCCAliases[] = { | ||||
|   {FOURCC_IYUV, FOURCC_I420}, | ||||
|   {FOURCC_YU12, FOURCC_I420}, | ||||
|   {FOURCC_YU16, FOURCC_I422}, | ||||
|   {FOURCC_YU24, FOURCC_I444}, | ||||
|   {FOURCC_YUYV, FOURCC_YUY2}, | ||||
|   | ||||
							
								
								
									
										1136
									
								
								third_party/libyuv/source/x86inc.asm
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1136
									
								
								third_party/libyuv/source/x86inc.asm
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user
	 James Bankoski
					James Bankoski