libyuv: update to c244a3e9
Fixes color issue when scaling without breaking mingw. BUG=https://bugs.chromium.org/p/libyuv/issues/detail?id=605 BUG=https://bugs.chromium.org/p/webm/issues/detail?id=1252 Change-Id: I09437d93fd65964ad57113274d8c819f3eaf2e57
This commit is contained in:
parent
ce634bbf4d
commit
6d7a9f3e9c
9
third_party/libyuv/README.libvpx
vendored
9
third_party/libyuv/README.libvpx
vendored
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1456
|
||||
Version: c244a3e9
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
@ -13,3 +13,10 @@ which down-samples the original input video (f.g. 1280x720) a number of times
|
||||
in order to encode multiple resolution bit streams.
|
||||
|
||||
Local Modifications:
|
||||
rm -rf .gitignore .gn AUTHORS Android.mk BUILD.gn CMakeLists.txt DEPS LICENSE \
|
||||
LICENSE_THIRD_PARTY OWNERS PATENTS PRESUBMIT.py README.chromium README.md \
|
||||
all.gyp build_overrides/ chromium/ codereview.settings docs/ \
|
||||
download_vs_toolchain.py gyp_libyuv gyp_libyuv.py include/libyuv.h \
|
||||
include/libyuv/compare_row.h libyuv.gyp libyuv.gypi libyuv_nacl.gyp \
|
||||
libyuv_test.gyp linux.mk public.mk setup_links.py sync_chromium.py \
|
||||
third_party/ tools/ unit_test/ util/ winarm.mk
|
||||
|
@ -13,7 +13,7 @@
|
||||
|
||||
#include <stddef.h> // for NULL, size_t
|
||||
|
||||
#if defined(__ANDROID__) || (defined(_MSC_VER) && (_MSC_VER < 1600))
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1600)
|
||||
#include <sys/types.h> // for uintptr_t on x86
|
||||
#else
|
||||
#include <stdint.h> // for uintptr_t
|
||||
|
22
third_party/libyuv/include/libyuv/convert.h
vendored
22
third_party/libyuv/include/libyuv/convert.h
vendored
@ -12,10 +12,13 @@
|
||||
#define INCLUDE_LIBYUV_CONVERT_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
// TODO(fbarchard): Remove the following headers includes.
|
||||
#include "libyuv/convert_from.h"
|
||||
#include "libyuv/planar_functions.h"
|
||||
#include "libyuv/rotate.h"
|
||||
|
||||
#include "libyuv/rotate.h" // For enum RotationMode.
|
||||
|
||||
// TODO(fbarchard): fix WebRTC source to include following libyuv headers:
|
||||
#include "libyuv/convert_argb.h" // For WebRTC I420ToARGB. b/620
|
||||
#include "libyuv/convert_from.h" // For WebRTC ConvertFromI420. b/620
|
||||
#include "libyuv/planar_functions.h" // For WebRTC I420Rect, CopyPlane. b/618
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
@ -115,6 +118,17 @@ int M420ToI420(const uint8* src_m420, int src_stride_m420,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert Android420 to I420.
|
||||
LIBYUV_API
|
||||
int Android420ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
int pixel_stride_uv,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// ARGB little endian (bgra in memory) to I420.
|
||||
LIBYUV_API
|
||||
int ARGBToI420(const uint8* src_frame, int src_stride_frame,
|
||||
|
96
third_party/libyuv/include/libyuv/convert_argb.h
vendored
96
third_party/libyuv/include/libyuv/convert_argb.h
vendored
@ -12,10 +12,8 @@
|
||||
#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
// TODO(fbarchard): Remove the following headers includes
|
||||
#include "libyuv/convert_from.h"
|
||||
#include "libyuv/planar_functions.h"
|
||||
#include "libyuv/rotate.h"
|
||||
|
||||
#include "libyuv/rotate.h" // For enum RotationMode.
|
||||
|
||||
// TODO(fbarchard): This set of functions should exactly match convert.h
|
||||
// TODO(fbarchard): Add tests. Create random content of right size and convert
|
||||
@ -44,6 +42,14 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Duplicate prototype for function in convert_from.h for remoting.
|
||||
LIBYUV_API
|
||||
int I420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert I422 to ARGB.
|
||||
LIBYUV_API
|
||||
int I422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
@ -60,6 +66,22 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert J444 to ARGB.
|
||||
LIBYUV_API
|
||||
int J444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert I444 to ABGR.
|
||||
LIBYUV_API
|
||||
int I444ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
|
||||
// Convert I411 to ARGB.
|
||||
LIBYUV_API
|
||||
int I411ToARGB(const uint8* src_y, int src_stride_y,
|
||||
@ -68,6 +90,24 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert I420 with Alpha to preattenuated ARGB.
|
||||
LIBYUV_API
|
||||
int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height, int attenuate);
|
||||
|
||||
// Convert I420 with Alpha to preattenuated ABGR.
|
||||
LIBYUV_API
|
||||
int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height, int attenuate);
|
||||
|
||||
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
|
||||
LIBYUV_API
|
||||
int I400ToARGB(const uint8* src_y, int src_stride_y,
|
||||
@ -131,6 +171,54 @@ int J422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert J420 to ABGR.
|
||||
LIBYUV_API
|
||||
int J420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
|
||||
// Convert J422 to ABGR.
|
||||
LIBYUV_API
|
||||
int J422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
|
||||
// Convert H420 to ARGB.
|
||||
LIBYUV_API
|
||||
int H420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert H422 to ARGB.
|
||||
LIBYUV_API
|
||||
int H422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert H420 to ABGR.
|
||||
LIBYUV_API
|
||||
int H420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
|
||||
// Convert H422 to ABGR.
|
||||
LIBYUV_API
|
||||
int H422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
|
||||
// BGRA little endian (argb in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
|
@ -56,8 +56,6 @@ int I400Copy(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
|
||||
// TODO(fbarchard): I420ToM420
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToNV12(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
|
11
third_party/libyuv/include/libyuv/cpu_id.h
vendored
11
third_party/libyuv/include/libyuv/cpu_id.h
vendored
@ -18,9 +18,8 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// TODO(fbarchard): Consider overlapping bits for different architectures.
|
||||
// Internal flag to indicate cpuid requires initialization.
|
||||
#define kCpuInit 0x1
|
||||
static const int kCpuInitialized = 0x1;
|
||||
|
||||
// These flags are only valid on ARM processors.
|
||||
static const int kCpuHasARM = 0x2;
|
||||
@ -37,12 +36,12 @@ static const int kCpuHasAVX = 0x200;
|
||||
static const int kCpuHasAVX2 = 0x400;
|
||||
static const int kCpuHasERMS = 0x800;
|
||||
static const int kCpuHasFMA3 = 0x1000;
|
||||
static const int kCpuHasAVX3 = 0x2000;
|
||||
// 0x2000, 0x4000, 0x8000 reserved for future X86 flags.
|
||||
|
||||
// These flags are only valid on MIPS processors.
|
||||
static const int kCpuHasMIPS = 0x10000;
|
||||
static const int kCpuHasMIPS_DSP = 0x20000;
|
||||
static const int kCpuHasMIPS_DSPR2 = 0x40000;
|
||||
static const int kCpuHasDSPR2 = 0x20000;
|
||||
|
||||
// Internal function used to auto-init.
|
||||
LIBYUV_API
|
||||
@ -57,13 +56,13 @@ int ArmCpuCaps(const char* cpuinfo_name);
|
||||
// returns non-zero if instruction set is detected
|
||||
static __inline int TestCpuFlag(int test_flag) {
|
||||
LIBYUV_API extern int cpu_info_;
|
||||
return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag;
|
||||
return (!cpu_info_ ? InitCpuFlags() : cpu_info_) & test_flag;
|
||||
}
|
||||
|
||||
// For testing, allow CPU flags to be disabled.
|
||||
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
|
||||
// MaskCpuFlags(-1) to enable all cpu specific optimizations.
|
||||
// MaskCpuFlags(0) to disable all cpu specific optimizations.
|
||||
// MaskCpuFlags(1) to disable all cpu specific optimizations.
|
||||
LIBYUV_API
|
||||
void MaskCpuFlags(int enable_flags);
|
||||
|
||||
|
108
third_party/libyuv/include/libyuv/planar_functions.h
vendored
108
third_party/libyuv/include/libyuv/planar_functions.h
vendored
@ -39,6 +39,20 @@ void SetPlane(uint8* dst_y, int dst_stride_y,
|
||||
int width, int height,
|
||||
uint32 value);
|
||||
|
||||
// Split interleaved UV plane into separate U and V planes.
|
||||
LIBYUV_API
|
||||
void SplitUVPlane(const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Merge separate U and V planes into one interleaved UV plane.
|
||||
LIBYUV_API
|
||||
void MergeUVPlane(const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_uv, int dst_stride_uv,
|
||||
int width, int height);
|
||||
|
||||
// Copy I400. Supports inverting.
|
||||
LIBYUV_API
|
||||
int I400ToI400(const uint8* src_y, int src_stride_y,
|
||||
@ -145,13 +159,6 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
int width, int height);
|
||||
|
||||
// Convert NV21 to RGB565.
|
||||
LIBYUV_API
|
||||
int NV21ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
int width, int height);
|
||||
|
||||
// I422ToARGB is in convert_argb.h
|
||||
// Convert I422 to BGRA.
|
||||
LIBYUV_API
|
||||
@ -177,6 +184,14 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height);
|
||||
|
||||
// Alias
|
||||
#define RGB24ToRAW RAWToRGB24
|
||||
|
||||
LIBYUV_API
|
||||
int RAWToRGB24(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
int width, int height);
|
||||
|
||||
// Draw a rectangle into I420.
|
||||
LIBYUV_API
|
||||
int I420Rect(uint8* dst_y, int dst_stride_y,
|
||||
@ -281,13 +296,19 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Copy ARGB to ARGB.
|
||||
// Copy Alpha channel of ARGB to alpha of ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Copy ARGB to ARGB.
|
||||
// Extract the alpha channel from ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBExtractAlpha(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
int width, int height);
|
||||
|
||||
// Copy Y channel to Alpha of ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
@ -301,6 +322,7 @@ LIBYUV_API
|
||||
ARGBBlendRow GetARGBBlend();
|
||||
|
||||
// Alpha Blend ARGB images and store to destination.
|
||||
// Source is pre-multiplied by alpha using ARGBAttenuate.
|
||||
// Alpha of destination is set to 255.
|
||||
LIBYUV_API
|
||||
int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
|
||||
@ -308,6 +330,31 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Alpha Blend plane and store to destination.
|
||||
// Source is not pre-multiplied by alpha.
|
||||
LIBYUV_API
|
||||
int BlendPlane(const uint8* src_y0, int src_stride_y0,
|
||||
const uint8* src_y1, int src_stride_y1,
|
||||
const uint8* alpha, int alpha_stride,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
|
||||
// Alpha Blend YUV images and store to destination.
|
||||
// Source is not pre-multiplied by alpha.
|
||||
// Alpha is full width x height and subsampled to half size to apply to UV.
|
||||
LIBYUV_API
|
||||
int I420Blend(const uint8* src_y0, int src_stride_y0,
|
||||
const uint8* src_u0, int src_stride_u0,
|
||||
const uint8* src_v0, int src_stride_v0,
|
||||
const uint8* src_y1, int src_stride_y1,
|
||||
const uint8* src_u1, int src_stride_u1,
|
||||
const uint8* src_v1, int src_stride_v1,
|
||||
const uint8* alpha, int alpha_stride,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
|
||||
LIBYUV_API
|
||||
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
|
||||
@ -357,12 +404,6 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert MJPG to ARGB.
|
||||
LIBYUV_API
|
||||
int MJPGToARGB(const uint8* sample, size_t sample_size,
|
||||
uint8* argb, int argb_stride,
|
||||
int w, int h, int dw, int dh);
|
||||
|
||||
// Internal function - do not call directly.
|
||||
// Computes table of cumulative sum for image where the value is the sum
|
||||
// of all values above and to the left of the entry. Used by ARGBBlur.
|
||||
@ -389,22 +430,49 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height, uint32 value);
|
||||
|
||||
// Interpolate between two ARGB images using specified amount of interpolation
|
||||
// Interpolate between two images using specified amount of interpolation
|
||||
// (0 to 255) and store to destination.
|
||||
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0
|
||||
// and 255 means 1% src_argb0 and 99% src_argb1.
|
||||
// Internally uses ARGBScale bilinear filtering.
|
||||
// Caveat: This function will write up to 16 bytes beyond the end of dst_argb.
|
||||
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
|
||||
// and 255 means 1% src0 and 99% src1.
|
||||
LIBYUV_API
|
||||
int InterpolatePlane(const uint8* src0, int src_stride0,
|
||||
const uint8* src1, int src_stride1,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height, int interpolation);
|
||||
|
||||
// Interpolate between two ARGB images using specified amount of interpolation
|
||||
// Internally calls InterpolatePlane with width * 4 (bpp).
|
||||
LIBYUV_API
|
||||
int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
|
||||
const uint8* src_argb1, int src_stride_argb1,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height, int interpolation);
|
||||
|
||||
// Interpolate between two YUV images using specified amount of interpolation
|
||||
// Internally calls InterpolatePlane on each plane where the U and V planes
|
||||
// are half width and half height.
|
||||
LIBYUV_API
|
||||
int I420Interpolate(const uint8* src0_y, int src0_stride_y,
|
||||
const uint8* src0_u, int src0_stride_u,
|
||||
const uint8* src0_v, int src0_stride_v,
|
||||
const uint8* src1_y, int src1_stride_y,
|
||||
const uint8* src1_u, int src1_stride_u,
|
||||
const uint8* src1_v, int src1_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height, int interpolation);
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__i386__) && !defined(__SSE2__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#endif
|
||||
// The following are available on all x86 platforms:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
||||
|
71
third_party/libyuv/include/libyuv/rotate_row.h
vendored
71
third_party/libyuv/include/libyuv/rotate_row.h
vendored
@ -22,53 +22,24 @@ extern "C" {
|
||||
(defined(__i386__) && !defined(__SSE2__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if defined(_M_IX86) && !defined(__clang__) && \
|
||||
defined(_MSC_VER) && _MSC_VER >= 1700
|
||||
#define VISUALC_HAS_AVX2 1
|
||||
#endif // VisualStudio >= 2012
|
||||
|
||||
// TODO(fbarchard): switch to standard form of inline; fails on clangcl.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
||||
#if defined(__APPLE__) && defined(__i386__)
|
||||
#define DECLARE_FUNCTION(name) \
|
||||
".text \n" \
|
||||
".private_extern _" #name " \n" \
|
||||
".align 4,0x90 \n" \
|
||||
"_" #name ": \n"
|
||||
#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
|
||||
#define DECLARE_FUNCTION(name) \
|
||||
".text \n" \
|
||||
".align 4,0x90 \n" \
|
||||
"_" #name ": \n"
|
||||
#else
|
||||
#define DECLARE_FUNCTION(name) \
|
||||
".text \n" \
|
||||
".align 4,0x90 \n" \
|
||||
#name ": \n"
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// The following are available for Visual C:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
||||
defined(_MSC_VER) && !defined(__clang__)
|
||||
// The following are available for Visual C and clangcl 32 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
#define HAS_TRANSPOSEWX8_SSSE3
|
||||
#define HAS_TRANSPOSEUVWX8_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available for GCC but not NaCL:
|
||||
// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
|
||||
#define HAS_TRANSPOSEWX8_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are available for 32 bit GCC:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__) && !defined(__clang__)
|
||||
#define HAS_TRANSPOSEUVWX8_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available for 64 bit GCC but not NaCL:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
|
||||
defined(__x86_64__)
|
||||
@ -85,8 +56,8 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
|
||||
defined(__mips__) && \
|
||||
defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
#define HAS_TRANSPOSEWX8_MIPS_DSPR2
|
||||
#define HAS_TRANSPOSEUVWx8_MIPS_DSPR2
|
||||
#define HAS_TRANSPOSEWX8_DSPR2
|
||||
#define HAS_TRANSPOSEUVWX8_DSPR2
|
||||
#endif // defined(__mips__)
|
||||
|
||||
void TransposeWxH_C(const uint8* src, int src_stride,
|
||||
@ -100,7 +71,9 @@ void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
|
||||
void TransposeWx8_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
|
||||
void TransposeWx8_Any_NEON(const uint8* src, int src_stride,
|
||||
@ -109,8 +82,8 @@ void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Any_MIPS_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Any_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
|
||||
void TransposeUVWxH_C(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
@ -126,9 +99,19 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
|
||||
void TransposeUVWx8_Any_SSE2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_Any_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_Any_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
1159
third_party/libyuv/include/libyuv/row.h
vendored
1159
third_party/libyuv/include/libyuv/row.h
vendored
File diff suppressed because it is too large
Load Diff
@ -35,7 +35,6 @@ int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// TODO(fbarchard): Implement this.
|
||||
// Scale with YUV conversion to ARGB and clipping.
|
||||
LIBYUV_API
|
||||
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
|
||||
|
135
third_party/libyuv/include/libyuv/scale_row.h
vendored
135
third_party/libyuv/include/libyuv/scale_row.h
vendored
@ -23,6 +23,26 @@ extern "C" {
|
||||
(defined(__i386__) && !defined(__SSE2__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// GCC >= 4.7.0 required for AVX2.
|
||||
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
|
||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
|
||||
#define GCC_HAS_AVX2 1
|
||||
#endif // GNUC >= 4.7
|
||||
#endif // __GNUC__
|
||||
|
||||
// clang >= 3.4.0 required for AVX2.
|
||||
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
||||
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
|
||||
#define CLANG_HAS_AVX2 1
|
||||
#endif // clang >= 3.4
|
||||
#endif // __clang__
|
||||
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if defined(_M_IX86) && !defined(__clang__) && \
|
||||
@ -42,24 +62,23 @@ extern "C" {
|
||||
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
|
||||
#define HAS_SCALECOLSUP2_SSE2
|
||||
#define HAS_SCALEFILTERCOLS_SSSE3
|
||||
#define HAS_SCALEROWDOWN2_SSE2
|
||||
#define HAS_SCALEROWDOWN2_SSSE3
|
||||
#define HAS_SCALEROWDOWN34_SSSE3
|
||||
#define HAS_SCALEROWDOWN38_SSSE3
|
||||
#define HAS_SCALEROWDOWN4_SSE2
|
||||
#define HAS_SCALEROWDOWN4_SSSE3
|
||||
#define HAS_SCALEADDROW_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available on VS2012:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
|
||||
// The following are available on all x86 platforms, but
|
||||
// require VS2012, clang 3.4 or gcc 4.7.
|
||||
// The code supports NaCL but requires a new compiler and validator.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
|
||||
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||
#define HAS_SCALEADDROW_AVX2
|
||||
#define HAS_SCALEROWDOWN2_AVX2
|
||||
#define HAS_SCALEROWDOWN4_AVX2
|
||||
#endif
|
||||
|
||||
// The following are available on Visual C:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && !defined(__clang__)
|
||||
#define HAS_SCALEADDROW_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available on Neon platforms:
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
|
||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
|
||||
@ -77,10 +96,10 @@ extern "C" {
|
||||
// The following are available on Mips platforms:
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
|
||||
defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
#define HAS_SCALEROWDOWN2_MIPS_DSPR2
|
||||
#define HAS_SCALEROWDOWN4_MIPS_DSPR2
|
||||
#define HAS_SCALEROWDOWN34_MIPS_DSPR2
|
||||
#define HAS_SCALEROWDOWN38_MIPS_DSPR2
|
||||
#define HAS_SCALEROWDOWN2_DSPR2
|
||||
#define HAS_SCALEROWDOWN4_DSPR2
|
||||
#define HAS_SCALEROWDOWN34_DSPR2
|
||||
#define HAS_SCALEROWDOWN38_DSPR2
|
||||
#endif
|
||||
|
||||
// Scale ARGB vertically with bilinear interpolation.
|
||||
@ -133,6 +152,8 @@ void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width);
|
||||
void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width);
|
||||
void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
@ -214,22 +235,22 @@ void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx);
|
||||
|
||||
// Specialized scalers for x86.
|
||||
void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
@ -251,22 +272,26 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
|
||||
void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Linear_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Box_Odd_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Box_Odd_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
@ -418,6 +443,8 @@ void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Box_Odd_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
@ -447,28 +474,26 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
||||
void ScaleFilterCols_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx);
|
||||
|
||||
|
||||
void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width);
|
||||
void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width);
|
||||
void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width);
|
||||
void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width);
|
||||
void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
2
third_party/libyuv/include/libyuv/version.h
vendored
2
third_party/libyuv/include/libyuv/version.h
vendored
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1456
|
||||
#define LIBYUV_VERSION 1614
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
@ -62,7 +62,7 @@ enum FourCC {
|
||||
|
||||
// 2 Secondary YUV formats: row biplanar.
|
||||
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
|
||||
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
|
||||
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
|
||||
|
||||
// 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
|
||||
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
|
||||
@ -90,7 +90,8 @@ enum FourCC {
|
||||
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
|
||||
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
|
||||
FOURCC_J420 = FOURCC('J', '4', '2', '0'),
|
||||
FOURCC_J400 = FOURCC('J', '4', '0', '0'),
|
||||
FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc
|
||||
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc
|
||||
|
||||
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
|
||||
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
|
||||
@ -150,6 +151,7 @@ enum FourCCBpp {
|
||||
FOURCC_BPP_YU12 = 12,
|
||||
FOURCC_BPP_J420 = 12,
|
||||
FOURCC_BPP_J400 = 8,
|
||||
FOURCC_BPP_H420 = 12,
|
||||
FOURCC_BPP_MJPG = 0, // 0 means unknown.
|
||||
FOURCC_BPP_H264 = 0,
|
||||
FOURCC_BPP_IYUV = 12,
|
||||
|
39
third_party/libyuv/source/compare.cc
vendored
39
third_party/libyuv/source/compare.cc
vendored
@ -17,6 +17,7 @@
|
||||
#endif
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/compare_row.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/video_common.h"
|
||||
@ -26,30 +27,13 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// hash seed of 5381 recommended.
|
||||
// Internal C version of HashDjb2 with int sized count for efficiency.
|
||||
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
|
||||
|
||||
// This module is for Visual C x86
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
|
||||
#define HAS_HASHDJB2_SSE41
|
||||
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
|
||||
|
||||
#ifdef VISUALC_HAS_AVX2
|
||||
#define HAS_HASHDJB2_AVX2
|
||||
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
|
||||
#endif
|
||||
|
||||
#endif // HAS_HASHDJB2_SSE41
|
||||
|
||||
// hash seed of 5381 recommended.
|
||||
LIBYUV_API
|
||||
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
|
||||
const int kBlockSize = 1 << 15; // 32768;
|
||||
int remainder;
|
||||
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
|
||||
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) =
|
||||
HashDjb2_C;
|
||||
#if defined(HAS_HASHDJB2_SSE41)
|
||||
if (TestCpuFlag(kCpuHasSSE41)) {
|
||||
HashDjb2_SSE = HashDjb2_SSE41;
|
||||
@ -127,23 +111,6 @@ uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
|
||||
return fourcc;
|
||||
}
|
||||
|
||||
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
|
||||
#define HAS_SUMSQUAREERROR_NEON
|
||||
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
|
||||
#endif
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
||||
#define HAS_SUMSQUAREERROR_SSE2
|
||||
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
|
||||
#endif
|
||||
|
||||
#ifdef VISUALC_HAS_AVX2
|
||||
#define HAS_SUMSQUAREERROR_AVX2
|
||||
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
|
||||
#endif
|
||||
|
||||
// TODO(fbarchard): Refactor into row function.
|
||||
LIBYUV_API
|
||||
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
|
||||
|
2
third_party/libyuv/source/compare_common.cc
vendored
2
third_party/libyuv/source/compare_common.cc
vendored
@ -10,6 +10,8 @@
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/compare_row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
|
19
third_party/libyuv/source/compare_gcc.cc
vendored
19
third_party/libyuv/source/compare_gcc.cc
vendored
@ -9,6 +9,8 @@
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/compare_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -16,11 +18,13 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||
// This module is for GCC x86 and x64.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||
|
||||
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
uint32 sse;
|
||||
asm volatile ( // NOLINT
|
||||
asm volatile (
|
||||
"pxor %%xmm0,%%xmm0 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
LABELALIGN
|
||||
@ -54,15 +58,10 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
"+r"(count), // %2
|
||||
"=g"(sse) // %3
|
||||
:: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
); // NOLINT
|
||||
);
|
||||
return sse;
|
||||
}
|
||||
|
||||
#endif // defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
|
||||
#define HAS_HASHDJB2_SSE41
|
||||
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
|
||||
static uvec32 kHashMul0 = {
|
||||
0x0c3525e1, // 33 ^ 15
|
||||
@ -91,7 +90,7 @@ static uvec32 kHashMul3 = {
|
||||
|
||||
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
uint32 hash;
|
||||
asm volatile ( // NOLINT
|
||||
asm volatile (
|
||||
"movd %2,%%xmm0 \n"
|
||||
"pxor %%xmm7,%%xmm7 \n"
|
||||
"movdqa %4,%%xmm6 \n"
|
||||
@ -140,7 +139,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
"m"(kHashMul3) // %8
|
||||
: "memory", "cc"
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
); // NOLINT
|
||||
);
|
||||
return hash;
|
||||
}
|
||||
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
|
||||
|
3
third_party/libyuv/source/compare_neon.cc
vendored
3
third_party/libyuv/source/compare_neon.cc
vendored
@ -9,6 +9,8 @@
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/compare_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -27,7 +29,6 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
|
||||
"vmov.u8 q9, #0 \n"
|
||||
"vmov.u8 q11, #0 \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0]! \n"
|
||||
|
3
third_party/libyuv/source/compare_neon64.cc
vendored
3
third_party/libyuv/source/compare_neon64.cc
vendored
@ -9,6 +9,8 @@
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/compare_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -26,7 +28,6 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
|
||||
"eor v17.16b, v17.16b, v17.16b \n"
|
||||
"eor v19.16b, v19.16b, v19.16b \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.16b}, [%0], #16 \n"
|
||||
|
87
third_party/libyuv/source/compare_win.cc
vendored
87
third_party/libyuv/source/compare_win.cc
vendored
@ -9,6 +9,8 @@
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/compare_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -16,9 +18,8 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for Visual C x86.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
||||
defined(_MSC_VER) && !defined(__clang__)
|
||||
// This module is for 32 bit Visual C x86 and clangcl
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
|
||||
__declspec(naked)
|
||||
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
@ -100,41 +101,32 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
}
|
||||
#endif // _MSC_VER >= 1700
|
||||
|
||||
#define HAS_HASHDJB2_SSE41
|
||||
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
|
||||
static uvec32 kHashMul0 = {
|
||||
uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
|
||||
uvec32 kHashMul0 = {
|
||||
0x0c3525e1, // 33 ^ 15
|
||||
0xa3476dc1, // 33 ^ 14
|
||||
0x3b4039a1, // 33 ^ 13
|
||||
0x4f5f0981, // 33 ^ 12
|
||||
};
|
||||
static uvec32 kHashMul1 = {
|
||||
uvec32 kHashMul1 = {
|
||||
0x30f35d61, // 33 ^ 11
|
||||
0x855cb541, // 33 ^ 10
|
||||
0x040a9121, // 33 ^ 9
|
||||
0x747c7101, // 33 ^ 8
|
||||
};
|
||||
static uvec32 kHashMul2 = {
|
||||
uvec32 kHashMul2 = {
|
||||
0xec41d4e1, // 33 ^ 7
|
||||
0x4cfa3cc1, // 33 ^ 6
|
||||
0x025528a1, // 33 ^ 5
|
||||
0x00121881, // 33 ^ 4
|
||||
};
|
||||
static uvec32 kHashMul3 = {
|
||||
uvec32 kHashMul3 = {
|
||||
0x00008c61, // 33 ^ 3
|
||||
0x00000441, // 33 ^ 2
|
||||
0x00000021, // 33 ^ 1
|
||||
0x00000001, // 33 ^ 0
|
||||
};
|
||||
|
||||
// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6
|
||||
// 44: 66 0F 38 40 DD pmulld xmm3,xmm5
|
||||
// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5
|
||||
// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5
|
||||
// 83: 66 0F 38 40 CD pmulld xmm1,xmm5
|
||||
#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
|
||||
_asm _emit 0x40 _asm _emit reg
|
||||
|
||||
__declspec(naked)
|
||||
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
__asm {
|
||||
@ -143,30 +135,30 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
movd xmm0, [esp + 12] // seed
|
||||
|
||||
pxor xmm7, xmm7 // constant 0 for unpck
|
||||
movdqa xmm6, kHash16x33
|
||||
movdqa xmm6, xmmword ptr kHash16x33
|
||||
|
||||
wloop:
|
||||
movdqu xmm1, [eax] // src[0-15]
|
||||
lea eax, [eax + 16]
|
||||
pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16
|
||||
movdqa xmm5, kHashMul0
|
||||
pmulld xmm0, xmm6 // hash *= 33 ^ 16
|
||||
movdqa xmm5, xmmword ptr kHashMul0
|
||||
movdqa xmm2, xmm1
|
||||
punpcklbw xmm2, xmm7 // src[0-7]
|
||||
movdqa xmm3, xmm2
|
||||
punpcklwd xmm3, xmm7 // src[0-3]
|
||||
pmulld(0xdd) // pmulld xmm3, xmm5
|
||||
movdqa xmm5, kHashMul1
|
||||
pmulld xmm3, xmm5
|
||||
movdqa xmm5, xmmword ptr kHashMul1
|
||||
movdqa xmm4, xmm2
|
||||
punpckhwd xmm4, xmm7 // src[4-7]
|
||||
pmulld(0xe5) // pmulld xmm4, xmm5
|
||||
movdqa xmm5, kHashMul2
|
||||
pmulld xmm4, xmm5
|
||||
movdqa xmm5, xmmword ptr kHashMul2
|
||||
punpckhbw xmm1, xmm7 // src[8-15]
|
||||
movdqa xmm2, xmm1
|
||||
punpcklwd xmm2, xmm7 // src[8-11]
|
||||
pmulld(0xd5) // pmulld xmm2, xmm5
|
||||
movdqa xmm5, kHashMul3
|
||||
pmulld xmm2, xmm5
|
||||
movdqa xmm5, xmmword ptr kHashMul3
|
||||
punpckhwd xmm1, xmm7 // src[12-15]
|
||||
pmulld(0xcd) // pmulld xmm1, xmm5
|
||||
pmulld xmm1, xmm5
|
||||
paddd xmm3, xmm4 // add 16 results
|
||||
paddd xmm1, xmm2
|
||||
paddd xmm1, xmm3
|
||||
@ -191,36 +183,37 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov ecx, [esp + 8] // count
|
||||
movd xmm0, [esp + 12] // seed
|
||||
movdqa xmm6, kHash16x33
|
||||
vmovd xmm0, [esp + 12] // seed
|
||||
|
||||
wloop:
|
||||
vpmovzxbd xmm3, dword ptr [eax] // src[0-3]
|
||||
pmulld xmm0, xmm6 // hash *= 33 ^ 16
|
||||
vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7]
|
||||
pmulld xmm3, kHashMul0
|
||||
vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11]
|
||||
pmulld xmm4, kHashMul1
|
||||
vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15]
|
||||
pmulld xmm2, kHashMul2
|
||||
vpmovzxbd xmm3, [eax] // src[0-3]
|
||||
vpmulld xmm0, xmm0, xmmword ptr kHash16x33 // hash *= 33 ^ 16
|
||||
vpmovzxbd xmm4, [eax + 4] // src[4-7]
|
||||
vpmulld xmm3, xmm3, xmmword ptr kHashMul0
|
||||
vpmovzxbd xmm2, [eax + 8] // src[8-11]
|
||||
vpmulld xmm4, xmm4, xmmword ptr kHashMul1
|
||||
vpmovzxbd xmm1, [eax + 12] // src[12-15]
|
||||
vpmulld xmm2, xmm2, xmmword ptr kHashMul2
|
||||
lea eax, [eax + 16]
|
||||
pmulld xmm1, kHashMul3
|
||||
paddd xmm3, xmm4 // add 16 results
|
||||
paddd xmm1, xmm2
|
||||
paddd xmm1, xmm3
|
||||
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
|
||||
paddd xmm1, xmm2
|
||||
pshufd xmm2, xmm1, 0x01
|
||||
paddd xmm1, xmm2
|
||||
paddd xmm0, xmm1
|
||||
vpmulld xmm1, xmm1, xmmword ptr kHashMul3
|
||||
vpaddd xmm3, xmm3, xmm4 // add 16 results
|
||||
vpaddd xmm1, xmm1, xmm2
|
||||
vpaddd xmm1, xmm1, xmm3
|
||||
vpshufd xmm2, xmm1, 0x0e // upper 2 dwords
|
||||
vpaddd xmm1, xmm1,xmm2
|
||||
vpshufd xmm2, xmm1, 0x01
|
||||
vpaddd xmm1, xmm1, xmm2
|
||||
vpaddd xmm0, xmm0, xmm1
|
||||
sub ecx, 16
|
||||
jg wloop
|
||||
|
||||
movd eax, xmm0 // return hash
|
||||
vmovd eax, xmm0 // return hash
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // _MSC_VER >= 1700
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
198
third_party/libyuv/source/convert.cc
vendored
198
third_party/libyuv/source/convert.cc
vendored
@ -40,13 +40,14 @@ static int I4xxToI420(const uint8* src_y, int src_stride_y,
|
||||
const int dst_y_height = Abs(src_y_height);
|
||||
const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
|
||||
const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
|
||||
if (src_y_width == 0 || src_y_height == 0 ||
|
||||
src_uv_width == 0 || src_uv_height == 0) {
|
||||
if (src_uv_width == 0 || src_uv_height == 0) {
|
||||
return -1;
|
||||
}
|
||||
ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
|
||||
dst_y, dst_stride_y, dst_y_width, dst_y_height,
|
||||
kFilterBilinear);
|
||||
if (dst_y) {
|
||||
ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
|
||||
dst_y, dst_stride_y, dst_y_width, dst_y_height,
|
||||
kFilterBilinear);
|
||||
}
|
||||
ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height,
|
||||
dst_u, dst_stride_u, dst_uv_width, dst_uv_height,
|
||||
kFilterBilinear);
|
||||
@ -69,8 +70,8 @@ int I420Copy(const uint8* src_y, int src_stride_y,
|
||||
int width, int height) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_y || !dst_u || !dst_v ||
|
||||
if (!src_u || !src_v ||
|
||||
!dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@ -166,7 +167,7 @@ int I400ToI420(const uint8* src_y, int src_stride_y,
|
||||
int width, int height) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if (!src_y || !dst_y || !dst_u || !dst_v ||
|
||||
if (!dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@ -177,7 +178,9 @@ int I400ToI420(const uint8* src_y, int src_stride_y,
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_stride_y = -src_stride_y;
|
||||
}
|
||||
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
if (dst_y) {
|
||||
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
}
|
||||
SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128);
|
||||
SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128);
|
||||
return 0;
|
||||
@ -242,13 +245,9 @@ static int X420ToI420(const uint8* src_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
int y;
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
|
||||
SplitUVRow_C;
|
||||
if (!src_y || !src_uv ||
|
||||
!dst_y || !dst_u || !dst_v ||
|
||||
if (!src_uv || !dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@ -256,7 +255,9 @@ static int X420ToI420(const uint8* src_y,
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
halfheight = (height + 1) >> 1;
|
||||
dst_y = dst_y + (height - 1) * dst_stride_y;
|
||||
if (dst_y) {
|
||||
dst_y = dst_y + (height - 1) * dst_stride_y;
|
||||
}
|
||||
dst_u = dst_u + (halfheight - 1) * dst_stride_u;
|
||||
dst_v = dst_v + (halfheight - 1) * dst_stride_v;
|
||||
dst_stride_y = -dst_stride_y;
|
||||
@ -279,41 +280,6 @@ static int X420ToI420(const uint8* src_y,
|
||||
halfheight = 1;
|
||||
src_stride_uv = dst_stride_u = dst_stride_v = 0;
|
||||
}
|
||||
#if defined(HAS_SPLITUVROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
SplitUVRow = SplitUVRow_Any_SSE2;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
SplitUVRow = SplitUVRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SPLITUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
SplitUVRow = SplitUVRow_Any_AVX2;
|
||||
if (IS_ALIGNED(halfwidth, 32)) {
|
||||
SplitUVRow = SplitUVRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SPLITUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
SplitUVRow = SplitUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
SplitUVRow = SplitUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SPLITUVROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
|
||||
IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) &&
|
||||
IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) &&
|
||||
IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) {
|
||||
SplitUVRow = SplitUVRow_Any_MIPS_DSPR2;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
SplitUVRow = SplitUVRow_MIPS_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (dst_y) {
|
||||
if (src_stride_y0 == src_stride_y1) {
|
||||
@ -324,13 +290,10 @@ static int X420ToI420(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
for (y = 0; y < halfheight; ++y) {
|
||||
// Copy a row of UV.
|
||||
SplitUVRow(src_uv, dst_u, dst_v, halfwidth);
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
src_uv += src_stride_uv;
|
||||
}
|
||||
// Split UV plane - NV12 / NV21
|
||||
SplitUVPlane(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v, dst_stride_v,
|
||||
halfwidth, halfheight);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -390,9 +353,9 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_u, uint8* dst_v, int pix) = YUY2ToUVRow_C;
|
||||
uint8* dst_u, uint8* dst_v, int width) = YUY2ToUVRow_C;
|
||||
void (*YUY2ToYRow)(const uint8* src_yuy2,
|
||||
uint8* dst_y, int pix) = YUY2ToYRow_C;
|
||||
uint8* dst_y, int width) = YUY2ToYRow_C;
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
@ -455,9 +418,9 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_u, uint8* dst_v, int pix) = UYVYToUVRow_C;
|
||||
uint8* dst_u, uint8* dst_v, int width) = UYVYToUVRow_C;
|
||||
void (*UYVYToYRow)(const uint8* src_uyvy,
|
||||
uint8* dst_y, int pix) = UYVYToYRow_C;
|
||||
uint8* dst_y, int width) = UYVYToYRow_C;
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
@ -521,7 +484,7 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
|
||||
int y;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
if (!src_argb ||
|
||||
!dst_y || !dst_u || !dst_v ||
|
||||
@ -597,7 +560,7 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
|
||||
int y;
|
||||
void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra,
|
||||
uint8* dst_u, uint8* dst_v, int width) = BGRAToUVRow_C;
|
||||
void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix) =
|
||||
void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int width) =
|
||||
BGRAToYRow_C;
|
||||
if (!src_bgra ||
|
||||
!dst_y || !dst_u || !dst_v ||
|
||||
@ -663,7 +626,7 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
|
||||
int y;
|
||||
void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ABGRToUVRow_C;
|
||||
void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix) =
|
||||
void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int width) =
|
||||
ABGRToYRow_C;
|
||||
if (!src_abgr ||
|
||||
!dst_y || !dst_u || !dst_v ||
|
||||
@ -729,7 +692,7 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
|
||||
int y;
|
||||
void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int width) = RGBAToUVRow_C;
|
||||
void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix) =
|
||||
void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int width) =
|
||||
RGBAToYRow_C;
|
||||
if (!src_rgba ||
|
||||
!dst_y || !dst_u || !dst_v ||
|
||||
@ -796,14 +759,14 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
#if defined(HAS_RGB24TOYROW_NEON)
|
||||
void (*RGB24ToUVRow)(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
uint8* dst_u, uint8* dst_v, int width) = RGB24ToUVRow_C;
|
||||
void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int pix) =
|
||||
void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int width) =
|
||||
RGB24ToYRow_C;
|
||||
#else
|
||||
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
|
||||
RGB24ToARGBRow_C;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
#endif
|
||||
if (!src_rgb24 || !dst_y || !dst_u || !dst_v ||
|
||||
@ -910,14 +873,14 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
|
||||
#if defined(HAS_RAWTOYROW_NEON)
|
||||
void (*RAWToUVRow)(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_u, uint8* dst_v, int width) = RAWToUVRow_C;
|
||||
void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int pix) =
|
||||
void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int width) =
|
||||
RAWToYRow_C;
|
||||
#else
|
||||
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
|
||||
RAWToARGBRow_C;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
#endif
|
||||
if (!src_raw || !dst_y || !dst_u || !dst_v ||
|
||||
@ -1024,14 +987,14 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
#if defined(HAS_RGB565TOYROW_NEON)
|
||||
void (*RGB565ToUVRow)(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int width) = RGB565ToUVRow_C;
|
||||
void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int pix) =
|
||||
void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int width) =
|
||||
RGB565ToYRow_C;
|
||||
#else
|
||||
void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
|
||||
RGB565ToARGBRow_C;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
#endif
|
||||
if (!src_rgb565 || !dst_y || !dst_u || !dst_v ||
|
||||
@ -1146,14 +1109,14 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
#if defined(HAS_ARGB1555TOYROW_NEON)
|
||||
void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGB1555ToUVRow_C;
|
||||
void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int pix) =
|
||||
void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int width) =
|
||||
ARGB1555ToYRow_C;
|
||||
#else
|
||||
void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
|
||||
ARGB1555ToARGBRow_C;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
#endif
|
||||
if (!src_argb1555 || !dst_y || !dst_u || !dst_v ||
|
||||
@ -1270,14 +1233,14 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
#if defined(HAS_ARGB4444TOYROW_NEON)
|
||||
void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGB4444ToUVRow_C;
|
||||
void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int pix) =
|
||||
void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int width) =
|
||||
ARGB4444ToYRow_C;
|
||||
#else
|
||||
void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
|
||||
ARGB4444ToARGBRow_C;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
#endif
|
||||
if (!src_argb4444 || !dst_y || !dst_u || !dst_v ||
|
||||
@ -1383,6 +1346,81 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void SplitPixels(const uint8* src_u, int src_pixel_stride_uv,
|
||||
uint8* dst_u, int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
*dst_u = *src_u;
|
||||
++dst_u;
|
||||
src_u += src_pixel_stride_uv;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert Android420 to I420.
|
||||
LIBYUV_API
|
||||
int Android420ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
int src_pixel_stride_uv,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
int y;
|
||||
const int vu_off = src_v - src_u;
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if (!src_u || !src_v ||
|
||||
!dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
halfheight = (height + 1) >> 1;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (halfheight - 1) * src_stride_u;
|
||||
src_v = src_v + (halfheight - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
if (dst_y) {
|
||||
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
}
|
||||
|
||||
// Copy UV planes as is - I420
|
||||
if (src_pixel_stride_uv == 1) {
|
||||
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
|
||||
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
|
||||
return 0;
|
||||
// Split UV planes - NV21
|
||||
} else if (src_pixel_stride_uv == 2 && vu_off == -1 &&
|
||||
src_stride_u == src_stride_v) {
|
||||
SplitUVPlane(src_v, src_stride_v, dst_v, dst_stride_v, dst_u, dst_stride_u,
|
||||
halfwidth, halfheight);
|
||||
return 0;
|
||||
// Split UV planes - NV12
|
||||
} else if (src_pixel_stride_uv == 2 && vu_off == 1 &&
|
||||
src_stride_u == src_stride_v) {
|
||||
SplitUVPlane(src_u, src_stride_u, dst_u, dst_stride_u, dst_v, dst_stride_v,
|
||||
halfwidth, halfheight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (y = 0; y < halfheight; ++y) {
|
||||
SplitPixels(src_u, src_pixel_stride_uv, dst_u, halfwidth);
|
||||
SplitPixels(src_v, src_pixel_stride_uv, dst_v, halfwidth);
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
769
third_party/libyuv/source/convert_argb.cc
vendored
769
third_party/libyuv/source/convert_argb.cc
vendored
@ -14,6 +14,7 @@
|
||||
#ifdef HAVE_JPEG
|
||||
#include "libyuv/mjpeg_decoder.h"
|
||||
#endif
|
||||
#include "libyuv/planar_functions.h" // For CopyPlane and ARGBShuffle.
|
||||
#include "libyuv/rotate_argb.h"
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/video_common.h"
|
||||
@ -44,18 +45,347 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I444 to ARGB.
|
||||
// Convert I422 to ARGB with matrix
|
||||
static int I420ToARGBMatrix(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToARGBRow = I422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
I422ToARGBRow = I422ToARGBRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 to ARGB.
|
||||
LIBYUV_API
|
||||
int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
int I420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
return I420ToARGBMatrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_argb, dst_stride_argb,
|
||||
&kYuvI601Constants,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to ABGR.
|
||||
LIBYUV_API
|
||||
int I420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
return I420ToARGBMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_abgr, dst_stride_abgr,
|
||||
&kYvuI601Constants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert J420 to ARGB.
|
||||
LIBYUV_API
|
||||
int J420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
return I420ToARGBMatrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_argb, dst_stride_argb,
|
||||
&kYuvJPEGConstants,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert J420 to ABGR.
|
||||
LIBYUV_API
|
||||
int J420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
return I420ToARGBMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_abgr, dst_stride_abgr,
|
||||
&kYvuJPEGConstants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert H420 to ARGB.
|
||||
LIBYUV_API
|
||||
int H420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
return I420ToARGBMatrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_argb, dst_stride_argb,
|
||||
&kYuvH709Constants,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert H420 to ABGR.
|
||||
LIBYUV_API
|
||||
int H420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
return I420ToARGBMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_abgr, dst_stride_abgr,
|
||||
&kYvuH709Constants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I422 to ARGB with matrix
|
||||
static int I422ToARGBMatrix(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width &&
|
||||
src_stride_u * 2 == width &&
|
||||
src_stride_v * 2 == width &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToARGBRow = I422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
I422ToARGBRow = I422ToARGBRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I422 to ARGB.
|
||||
LIBYUV_API
|
||||
int I422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
return I422ToARGBMatrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_argb, dst_stride_argb,
|
||||
&kYuvI601Constants,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I422 to ABGR.
|
||||
LIBYUV_API
|
||||
int I422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
return I422ToARGBMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_abgr, dst_stride_abgr,
|
||||
&kYvuI601Constants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert J422 to ARGB.
|
||||
LIBYUV_API
|
||||
int J422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
return I422ToARGBMatrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_argb, dst_stride_argb,
|
||||
&kYuvJPEGConstants,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert J422 to ABGR.
|
||||
LIBYUV_API
|
||||
int J422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
return I422ToARGBMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_abgr, dst_stride_abgr,
|
||||
&kYvuJPEGConstants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert H422 to ARGB.
|
||||
LIBYUV_API
|
||||
int H422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
return I422ToARGBMatrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_argb, dst_stride_argb,
|
||||
&kYuvH709Constants,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert H422 to ABGR.
|
||||
LIBYUV_API
|
||||
int H422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
return I422ToARGBMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_abgr, dst_stride_abgr,
|
||||
&kYvuH709Constants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I444 to ARGB with matrix
|
||||
static int I444ToARGBMatrix(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I444ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I444ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_argb ||
|
||||
@ -103,7 +433,7 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
@ -112,81 +442,49 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I422 to ARGB.
|
||||
// Convert I444 to ARGB.
|
||||
LIBYUV_API
|
||||
int I422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = I422ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width &&
|
||||
src_stride_u * 2 == width &&
|
||||
src_stride_v * 2 == width &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToARGBRow = I422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
return I444ToARGBMatrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_argb, dst_stride_argb,
|
||||
&kYuvI601Constants,
|
||||
width, height);
|
||||
}
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
return 0;
|
||||
// Convert I444 to ABGR.
|
||||
LIBYUV_API
|
||||
int I444ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
return I444ToARGBMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_abgr, dst_stride_abgr,
|
||||
&kYvuI601Constants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert J444 to ARGB.
|
||||
LIBYUV_API
|
||||
int J444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
return I444ToARGBMatrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_argb, dst_stride_argb,
|
||||
&kYuvJPEGConstants,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I411 to ARGB.
|
||||
@ -201,6 +499,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I411ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_argb ||
|
||||
@ -248,7 +547,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
I411ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvI601Constants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
@ -257,6 +556,143 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 with Alpha to preattenuated ARGB.
|
||||
static int I420AlphaToARGBMatrix(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width, int height, int attenuate) {
|
||||
int y;
|
||||
void (*I422AlphaToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422AlphaToARGBRow_C;
|
||||
void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
|
||||
int width) = ARGBAttenuateRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
#if defined(HAS_I422ALPHATOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422AlphaToARGBRow = I422AlphaToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422ALPHATOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422AlphaToARGBRow = I422AlphaToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422ALPHATOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422AlphaToARGBRow = I422AlphaToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422ALPHATOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
I422AlphaToARGBRow = I422AlphaToARGBRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBATTENUATEROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBATTENUATEROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBAttenuateRow = ARGBAttenuateRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
|
||||
width);
|
||||
if (attenuate) {
|
||||
ARGBAttenuateRow(dst_argb, dst_argb, width);
|
||||
}
|
||||
dst_argb += dst_stride_argb;
|
||||
src_a += src_stride_a;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 with Alpha to ARGB.
|
||||
LIBYUV_API
|
||||
int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height, int attenuate) {
|
||||
return I420AlphaToARGBMatrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
src_a, src_stride_a,
|
||||
dst_argb, dst_stride_argb,
|
||||
&kYuvI601Constants,
|
||||
width, height, attenuate);
|
||||
}
|
||||
|
||||
// Convert I420 with Alpha to ABGR.
|
||||
LIBYUV_API
|
||||
int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height, int attenuate) {
|
||||
return I420AlphaToARGBMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
src_a, src_stride_a,
|
||||
dst_abgr, dst_stride_abgr,
|
||||
&kYvuI601Constants, // Use Yvu matrix
|
||||
width, height, attenuate);
|
||||
}
|
||||
|
||||
// Convert I400 to ARGB.
|
||||
LIBYUV_API
|
||||
int I400ToARGB(const uint8* src_y, int src_stride_y,
|
||||
@ -322,7 +758,7 @@ int J400ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
|
||||
void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int width) =
|
||||
J400ToARGBRow_C;
|
||||
if (!src_y || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -449,7 +885,7 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
|
||||
RGB24ToARGBRow_C;
|
||||
if (!src_rgb24 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -499,7 +935,7 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
|
||||
RAWToARGBRow_C;
|
||||
if (!src_raw || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -549,7 +985,7 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
|
||||
void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int width) =
|
||||
RGB565ToARGBRow_C;
|
||||
if (!src_rgb565 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -608,7 +1044,7 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
|
||||
int pix) = ARGB1555ToARGBRow_C;
|
||||
int width) = ARGB1555ToARGBRow_C;
|
||||
if (!src_argb1555 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -666,7 +1102,7 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
|
||||
int pix) = ARGB4444ToARGBRow_C;
|
||||
int width) = ARGB4444ToARGBRow_C;
|
||||
if (!src_argb4444 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -727,6 +1163,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
|
||||
void (*NV12ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = NV12ToARGBRow_C;
|
||||
if (!src_y || !src_uv || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -764,7 +1201,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV12ToARGBRow(src_y, src_uv, dst_argb, width);
|
||||
NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -784,6 +1221,7 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
|
||||
void (*NV21ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = NV21ToARGBRow_C;
|
||||
if (!src_y || !src_uv || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -821,7 +1259,7 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV21ToARGBRow(src_y, src_uv, dst_argb, width);
|
||||
NV21ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -840,6 +1278,7 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
|
||||
void (*NV12ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = NV12ToARGBRow_C;
|
||||
if (!src_m420 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -877,14 +1316,16 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
|
||||
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
|
||||
&kYuvI601Constants, width);
|
||||
NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
|
||||
dst_argb + dst_stride_argb, width);
|
||||
dst_argb + dst_stride_argb, &kYuvI601Constants, width);
|
||||
dst_argb += dst_stride_argb * 2;
|
||||
src_m420 += src_stride_m420 * 3;
|
||||
}
|
||||
if (height & 1) {
|
||||
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
|
||||
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
|
||||
&kYuvI601Constants, width);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -895,7 +1336,10 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
|
||||
void (*YUY2ToARGBRow)(const uint8* src_yuy2,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) =
|
||||
YUY2ToARGBRow_C;
|
||||
if (!src_yuy2 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -939,7 +1383,7 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
}
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
YUY2ToARGBRow(src_yuy2, dst_argb, width);
|
||||
YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvI601Constants, width);
|
||||
src_yuy2 += src_stride_yuy2;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
@ -952,7 +1396,10 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
|
||||
void (*UYVYToARGBRow)(const uint8* src_uyvy,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) =
|
||||
UYVYToARGBRow_C;
|
||||
if (!src_uyvy || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -996,159 +1443,13 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
}
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
UYVYToARGBRow(src_uyvy, dst_argb, width);
|
||||
UYVYToARGBRow(src_uyvy, dst_argb, &kYuvI601Constants, width);
|
||||
src_uyvy += src_stride_uyvy;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert J420 to ARGB.
|
||||
LIBYUV_API
|
||||
int J420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*J422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = J422ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
#if defined(HAS_J422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
J422ToARGBRow = J422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
J422ToARGBRow = J422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
J422ToARGBRow = J422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
J422ToARGBRow = J422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
J422ToARGBRow = J422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert J422 to ARGB.
|
||||
LIBYUV_API
|
||||
int J422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*J422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = J422ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width &&
|
||||
src_stride_u * 2 == width &&
|
||||
src_stride_v * 2 == width &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_J422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
J422ToARGBRow = J422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
J422ToARGBRow = J422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
J422ToARGBRow = J422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
J422ToARGBRow = J422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
J422ToARGBRow = J422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
452
third_party/libyuv/source/convert_from.cc
vendored
452
third_party/libyuv/source/convert_from.cc
vendored
@ -46,9 +46,11 @@ static int I420ToI4xx(const uint8* src_y, int src_stride_y,
|
||||
dst_uv_width <= 0 || dst_uv_height <= 0) {
|
||||
return -1;
|
||||
}
|
||||
ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
|
||||
dst_y, dst_stride_y, dst_y_width, dst_y_height,
|
||||
kFilterBilinear);
|
||||
if (dst_y) {
|
||||
ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
|
||||
dst_y, dst_stride_y, dst_y_width, dst_y_height,
|
||||
kFilterBilinear);
|
||||
}
|
||||
ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height,
|
||||
dst_u, dst_stride_u, dst_uv_width, dst_uv_height,
|
||||
kFilterBilinear);
|
||||
@ -359,6 +361,7 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): test negative height for invert.
|
||||
LIBYUV_API
|
||||
int I420ToNV12(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
@ -366,72 +369,19 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_uv, int dst_stride_uv,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
int width) = MergeUVRow_C;
|
||||
// Coalesce rows.
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if (!src_y || !src_u || !src_v || !dst_y || !dst_uv ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
halfheight = (height + 1) >> 1;
|
||||
dst_y = dst_y + (height - 1) * dst_stride_y;
|
||||
dst_uv = dst_uv + (halfheight - 1) * dst_stride_uv;
|
||||
dst_stride_y = -dst_stride_y;
|
||||
dst_stride_uv = -dst_stride_uv;
|
||||
}
|
||||
if (src_stride_y == width &&
|
||||
dst_stride_y == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = dst_stride_y = 0;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_u == halfwidth &&
|
||||
src_stride_v == halfwidth &&
|
||||
dst_stride_uv == halfwidth * 2) {
|
||||
halfwidth *= halfheight;
|
||||
halfheight = 1;
|
||||
src_stride_u = src_stride_v = dst_stride_uv = 0;
|
||||
}
|
||||
#if defined(HAS_MERGEUVROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_SSE2;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
MergeUVRow_ = MergeUVRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_AVX2;
|
||||
if (IS_ALIGNED(halfwidth, 32)) {
|
||||
MergeUVRow_ = MergeUVRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
MergeUVRow_ = MergeUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
for (y = 0; y < halfheight; ++y) {
|
||||
// Merge a row of U and V into a row of UV.
|
||||
MergeUVRow_(src_u, src_v, dst_uv, halfwidth);
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
dst_uv += dst_stride_uv;
|
||||
int halfwidth = (width + 1) / 2;
|
||||
int halfheight = height > 0 ? (height + 1) / 2 : (height - 1) / 2;
|
||||
if (dst_y) {
|
||||
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
}
|
||||
MergeUVPlane(src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_uv, dst_stride_uv,
|
||||
halfwidth, halfheight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -445,221 +395,24 @@ int I420ToNV21(const uint8* src_y, int src_stride_y,
|
||||
return I420ToNV12(src_y, src_stride_y,
|
||||
src_v, src_stride_v,
|
||||
src_u, src_stride_u,
|
||||
dst_y, src_stride_y,
|
||||
dst_y, dst_stride_y,
|
||||
dst_vu, dst_stride_vu,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to ARGB.
|
||||
LIBYUV_API
|
||||
int I420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = I422ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToARGBRow = I422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 to BGRA.
|
||||
LIBYUV_API
|
||||
int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_bgra, int dst_stride_bgra,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToBGRARow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = I422ToBGRARow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_bgra ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
|
||||
dst_stride_bgra = -dst_stride_bgra;
|
||||
}
|
||||
#if defined(HAS_I422TOBGRAROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToBGRARow = I422ToBGRARow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOBGRAROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToBGRARow = I422ToBGRARow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToBGRARow = I422ToBGRARow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOBGRAROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToBGRARow = I422ToBGRARow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToBGRARow = I422ToBGRARow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
|
||||
I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
|
||||
dst_bgra += dst_stride_bgra;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 to ABGR.
|
||||
LIBYUV_API
|
||||
int I420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToABGRRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = I422ToABGRRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_abgr ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
|
||||
dst_stride_abgr = -dst_stride_abgr;
|
||||
}
|
||||
#if defined(HAS_I422TOABGRROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToABGRRow = I422ToABGRRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOABGRROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToABGRRow = I422ToABGRRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToABGRRow = I422ToABGRRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOABGRROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToABGRRow = I422ToABGRRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToABGRRow = I422ToABGRRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
|
||||
dst_abgr += dst_stride_abgr;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 to RGBA.
|
||||
LIBYUV_API
|
||||
int I420ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height) {
|
||||
// Convert I422 to RGBA with matrix
|
||||
static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToRGBARow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToRGBARow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgba ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -695,9 +448,18 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORGBAROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
|
||||
I422ToRGBARow = I422ToRGBARow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
|
||||
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
|
||||
dst_rgba += dst_stride_rgba;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -708,18 +470,49 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 to RGB24.
|
||||
// Convert I420 to RGBA.
|
||||
LIBYUV_API
|
||||
int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
int width, int height) {
|
||||
int I420ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height) {
|
||||
return I420ToRGBAMatrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_rgba, dst_stride_rgba,
|
||||
&kYuvI601Constants,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to BGRA.
|
||||
LIBYUV_API
|
||||
int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_bgra, int dst_stride_bgra,
|
||||
int width, int height) {
|
||||
return I420ToRGBAMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_bgra, dst_stride_bgra,
|
||||
&kYvuI601Constants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to RGB24 with matrix
|
||||
static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToRGB24Row)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToRGB24Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgb24 ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -757,7 +550,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width);
|
||||
I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
|
||||
dst_rgb24 += dst_stride_rgb24;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -768,64 +561,34 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 to RGB24.
|
||||
LIBYUV_API
|
||||
int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
int width, int height) {
|
||||
return I420ToRGB24Matrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_rgb24, dst_stride_rgb24,
|
||||
&kYuvI601Constants,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to RAW.
|
||||
LIBYUV_API
|
||||
int I420ToRAW(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_raw, int dst_stride_raw,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToRAWRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = I422ToRAWRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_raw ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_raw = dst_raw + (height - 1) * dst_stride_raw;
|
||||
dst_stride_raw = -dst_stride_raw;
|
||||
}
|
||||
#if defined(HAS_I422TORAWROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToRAWRow = I422ToRAWRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToRAWRow = I422ToRAWRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORAWROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToRAWRow = I422ToRAWRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToRAWRow = I422ToRAWRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORAWROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToRAWRow = I422ToRAWRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToRAWRow = I422ToRAWRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToRAWRow(src_y, src_u, src_v, dst_raw, width);
|
||||
dst_raw += dst_stride_raw;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_raw, int dst_stride_raw,
|
||||
int width, int height) {
|
||||
return I420ToRGB24Matrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_raw, dst_stride_raw,
|
||||
&kYvuI601Constants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to ARGB1555.
|
||||
@ -840,6 +603,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGB1555Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb1555 ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -877,7 +641,8 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width);
|
||||
I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvI601Constants,
|
||||
width);
|
||||
dst_argb1555 += dst_stride_argb1555;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -901,6 +666,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGB4444Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb4444 ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -938,7 +704,8 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width);
|
||||
I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvI601Constants,
|
||||
width);
|
||||
dst_argb4444 += dst_stride_argb4444;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -961,6 +728,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToRGB565Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgb565 ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -998,7 +766,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width);
|
||||
I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width);
|
||||
dst_rgb565 += dst_stride_rgb565;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1029,9 +797,10 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGBRow_C;
|
||||
void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
|
||||
const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
|
||||
const uint32 dither4, int width) = ARGBToRGB565DitherRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgb565 ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -1069,12 +838,12 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
#if defined(HAS_I422TOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
I422ToARGBRow = I422ToARGBRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
|
||||
@ -1105,7 +874,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
|
||||
// Allocate a row of argb.
|
||||
align_buffer_64(row_argb, width * 4);
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGBRow(src_y, src_u, src_v, row_argb, width);
|
||||
I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width);
|
||||
ARGBToRGB565DitherRow(row_argb, dst_rgb565,
|
||||
*(uint32*)(dither4x4 + ((y & 3) << 2)), width);
|
||||
dst_rgb565 += dst_stride_rgb565;
|
||||
@ -1258,7 +1027,6 @@ int ConvertFromI420(const uint8* y, int y_stride,
|
||||
// Triplanar formats
|
||||
// TODO(fbarchard): halfstride instead of halfwidth
|
||||
case FOURCC_I420:
|
||||
case FOURCC_YU12:
|
||||
case FOURCC_YV12: {
|
||||
int halfwidth = (width + 1) / 2;
|
||||
int halfheight = (height + 1) / 2;
|
||||
|
211
third_party/libyuv/source/convert_from_argb.cc
vendored
211
third_party/libyuv/source/convert_from_argb.cc
vendored
@ -28,10 +28,10 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix) = ARGBToUV444Row_C;
|
||||
int width) = ARGBToUV444Row_C;
|
||||
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@ -109,13 +109,16 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix) = ARGBToUV422Row_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
if (!src_argb ||
|
||||
!dst_y || !dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||
@ -130,34 +133,22 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
|
||||
}
|
||||
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUV422ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_AVX2)
|
||||
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
|
||||
ARGBToYRow = ARGBToYRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToUVRow = ARGBToUVRow_AVX2;
|
||||
ARGBToYRow = ARGBToYRow_AVX2;
|
||||
}
|
||||
}
|
||||
@ -170,9 +161,17 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToUV422Row(src_argb, dst_u, dst_v, width);
|
||||
ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(src_argb, dst_y, width);
|
||||
src_argb += src_stride_argb;
|
||||
dst_y += dst_stride_y;
|
||||
@ -191,8 +190,8 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix) = ARGBToUV411Row_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
int width) = ARGBToUV411Row_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -264,7 +263,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
int width) = MergeUVRow_C;
|
||||
@ -373,7 +372,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
int width) = MergeUVRow_C;
|
||||
@ -478,9 +477,9 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yuy2, int dst_stride_yuy2,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix) = ARGBToUV422Row_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_yuy2, int width) = I422ToYUY2Row_C;
|
||||
@ -502,34 +501,22 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_yuy2 = 0;
|
||||
}
|
||||
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUV422ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_AVX2)
|
||||
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
|
||||
ARGBToYRow = ARGBToYRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToUVRow = ARGBToUVRow_AVX2;
|
||||
ARGBToYRow = ARGBToYRow_AVX2;
|
||||
}
|
||||
}
|
||||
@ -542,7 +529,14 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_ARGBTOUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOYUY2ROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
|
||||
@ -567,7 +561,7 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* row_v = row_u + ((width + 63) & ~63) / 2;
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToUV422Row(src_argb, row_u, row_v, width);
|
||||
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
|
||||
ARGBToYRow(src_argb, row_y, width);
|
||||
I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width);
|
||||
src_argb += src_stride_argb;
|
||||
@ -585,9 +579,9 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_uyvy, int dst_stride_uyvy,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix) = ARGBToUV422Row_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_uyvy, int width) = I422ToUYVYRow_C;
|
||||
@ -609,34 +603,22 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_uyvy = 0;
|
||||
}
|
||||
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUV422ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_AVX2)
|
||||
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
|
||||
ARGBToYRow = ARGBToYRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToUVRow = ARGBToUVRow_AVX2;
|
||||
ARGBToYRow = ARGBToYRow_AVX2;
|
||||
}
|
||||
}
|
||||
@ -649,7 +631,14 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_ARGBTOUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOUYVYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
|
||||
@ -674,7 +663,7 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* row_v = row_u + ((width + 63) & ~63) / 2;
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToUV422Row(src_argb, row_u, row_v, width);
|
||||
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
|
||||
ARGBToYRow(src_argb, row_y, width);
|
||||
I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width);
|
||||
src_argb += src_stride_argb;
|
||||
@ -692,7 +681,7 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
if (!src_argb || !dst_y || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -764,7 +753,7 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
|
||||
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
|
||||
ARGBToRGB24Row_C;
|
||||
if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -812,7 +801,7 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_raw, int dst_stride_raw,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
|
||||
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int width) =
|
||||
ARGBToRAWRow_C;
|
||||
if (!src_argb || !dst_raw || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -869,7 +858,7 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
|
||||
const uint8* dither4x4, int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
|
||||
const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
|
||||
const uint32 dither4, int width) = ARGBToRGB565DitherRow_C;
|
||||
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@ -921,7 +910,7 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
|
||||
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
|
||||
ARGBToRGB565Row_C;
|
||||
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -977,7 +966,7 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb1555, int dst_stride_argb1555,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
|
||||
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
|
||||
ARGBToARGB1555Row_C;
|
||||
if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -1033,7 +1022,7 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb4444, int dst_stride_argb4444,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
|
||||
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
|
||||
ARGBToARGB4444Row_C;
|
||||
if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -1093,7 +1082,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
|
||||
int y;
|
||||
void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
|
||||
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
|
||||
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
|
||||
ARGBToYJRow_C;
|
||||
if (!src_argb ||
|
||||
!dst_yj || !dst_u || !dst_v ||
|
||||
@ -1157,21 +1146,24 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ARGB little endian (bgra in memory) to J422
|
||||
// Convert ARGB to J422. (JPeg full range I422).
|
||||
LIBYUV_API
|
||||
int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_yj, int dst_stride_yj,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToUVJ422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix) = ARGBToUVJ422Row_C;
|
||||
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
|
||||
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
|
||||
ARGBToYJRow_C;
|
||||
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
if (!src_argb ||
|
||||
!dst_yj || !dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||
@ -1179,34 +1171,19 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 &&
|
||||
dst_stride_y == width &&
|
||||
dst_stride_yj == width &&
|
||||
dst_stride_u * 2 == width &&
|
||||
dst_stride_v * 2 == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
|
||||
src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0;
|
||||
}
|
||||
#if defined(HAS_ARGBTOUVJ422ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUVJ422Row = ARGBToUVJ422Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVJ422Row = ARGBToUVJ422Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVJ422ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBToUVJ422Row = ARGBToUVJ422Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVJ422Row = ARGBToUVJ422Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_ARGBTOYJROW_SSSE3)
|
||||
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
|
||||
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
|
||||
ARGBToYJRow = ARGBToYJRow_SSSE3;
|
||||
}
|
||||
}
|
||||
@ -1227,12 +1204,20 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVJROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVJRow = ARGBToUVJRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToUVJ422Row(src_argb, dst_u, dst_v, width);
|
||||
ARGBToYJRow(src_argb, dst_y, width);
|
||||
ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
|
||||
ARGBToYJRow(src_argb, dst_yj, width);
|
||||
src_argb += src_stride_argb;
|
||||
dst_y += dst_stride_y;
|
||||
dst_yj += dst_stride_yj;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
@ -1245,7 +1230,7 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yj, int dst_stride_yj,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
|
||||
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
|
||||
ARGBToYJRow_C;
|
||||
if (!src_argb || !dst_yj || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
|
1
third_party/libyuv/source/convert_jpeg.cc
vendored
1
third_party/libyuv/source/convert_jpeg.cc
vendored
@ -9,6 +9,7 @@
|
||||
*/
|
||||
|
||||
#include "libyuv/convert.h"
|
||||
#include "libyuv/convert_argb.h"
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
#include "libyuv/mjpeg_decoder.h"
|
||||
|
13
third_party/libyuv/source/convert_to_argb.cc
vendored
13
third_party/libyuv/source/convert_to_argb.cc
vendored
@ -23,7 +23,7 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Convert camera sample to I420 with cropping, rotation and vertical flip.
|
||||
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
|
||||
// src_width is used for source stride computation
|
||||
// src_height is used to compute location of planes, and indicate inversion
|
||||
// sample_size is measured in bytes and is the size of the frame.
|
||||
@ -51,8 +51,8 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
// also enable temporary buffer.
|
||||
LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) ||
|
||||
crop_argb == sample;
|
||||
uint8* tmp_argb = crop_argb;
|
||||
int tmp_argb_stride = argb_stride;
|
||||
uint8* dest_argb = crop_argb;
|
||||
int dest_argb_stride = argb_stride;
|
||||
uint8* rotate_buffer = NULL;
|
||||
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
|
||||
@ -66,13 +66,13 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
}
|
||||
|
||||
if (need_buf) {
|
||||
int argb_size = crop_width * abs_crop_height * 4;
|
||||
int argb_size = crop_width * 4 * abs_crop_height;
|
||||
rotate_buffer = (uint8*)malloc(argb_size);
|
||||
if (!rotate_buffer) {
|
||||
return 1; // Out of memory runtime error.
|
||||
}
|
||||
crop_argb = rotate_buffer;
|
||||
argb_stride = crop_width;
|
||||
argb_stride = crop_width * 4;
|
||||
}
|
||||
|
||||
switch (format) {
|
||||
@ -176,7 +176,6 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
break;
|
||||
// Triplanar formats
|
||||
case FOURCC_I420:
|
||||
case FOURCC_YU12:
|
||||
case FOURCC_YV12: {
|
||||
const uint8* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8* src_u;
|
||||
@ -291,7 +290,7 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
if (need_buf) {
|
||||
if (!r) {
|
||||
r = ARGBRotate(crop_argb, argb_stride,
|
||||
tmp_argb, tmp_argb_stride,
|
||||
dest_argb, dest_argb_stride,
|
||||
crop_width, abs_crop_height, rotation);
|
||||
}
|
||||
free(rotate_buffer);
|
||||
|
14
third_party/libyuv/source/convert_to_i420.cc
vendored
14
third_party/libyuv/source/convert_to_i420.cc
vendored
@ -39,12 +39,13 @@ int ConvertToI420(const uint8* sample,
|
||||
int aligned_src_width = (src_width + 1) & ~1;
|
||||
const uint8* src;
|
||||
const uint8* src_uv;
|
||||
int abs_src_height = (src_height < 0) ? -src_height : src_height;
|
||||
int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
const int abs_src_height = (src_height < 0) ? -src_height : src_height;
|
||||
// TODO(nisse): Why allow crop_height < 0?
|
||||
const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
int r = 0;
|
||||
LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 &&
|
||||
format != FOURCC_NV12 && format != FOURCC_NV21 &&
|
||||
format != FOURCC_YU12 && format != FOURCC_YV12) || y == sample;
|
||||
format != FOURCC_YV12) || y == sample;
|
||||
uint8* tmp_y = y;
|
||||
uint8* tmp_u = u;
|
||||
uint8* tmp_v = v;
|
||||
@ -52,16 +53,14 @@ int ConvertToI420(const uint8* sample,
|
||||
int tmp_u_stride = u_stride;
|
||||
int tmp_v_stride = v_stride;
|
||||
uint8* rotate_buffer = NULL;
|
||||
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
const int inv_crop_height =
|
||||
(src_height < 0) ? -abs_crop_height : abs_crop_height;
|
||||
|
||||
if (!y || !u || !v || !sample ||
|
||||
src_width <= 0 || crop_width <= 0 ||
|
||||
src_height == 0 || crop_height == 0) {
|
||||
return -1;
|
||||
}
|
||||
if (src_height < 0) {
|
||||
inv_crop_height = -inv_crop_height;
|
||||
}
|
||||
|
||||
// One pass rotation is available for some formats. For the rest, convert
|
||||
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
|
||||
@ -214,7 +213,6 @@ int ConvertToI420(const uint8* sample,
|
||||
break;
|
||||
// Triplanar formats
|
||||
case FOURCC_I420:
|
||||
case FOURCC_YU12:
|
||||
case FOURCC_YV12: {
|
||||
const uint8* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8* src_u;
|
||||
|
157
third_party/libyuv/source/cpu_id.cc
vendored
157
third_party/libyuv/source/cpu_id.cc
vendored
@ -10,12 +10,12 @@
|
||||
|
||||
#include "libyuv/cpu_id.h"
|
||||
|
||||
#if (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h> // For __cpuidex()
|
||||
#endif
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
|
||||
!defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
|
||||
defined(_MSC_VER) && !defined(__clang__) && (_MSC_FULL_VER >= 160040219)
|
||||
defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
#include <immintrin.h> // For _xgetbv()
|
||||
#endif
|
||||
|
||||
@ -36,7 +36,8 @@ extern "C" {
|
||||
|
||||
// For functions that use the stack and have runtime checks for overflow,
|
||||
// use SAFEBUFFERS to avoid additional check.
|
||||
#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219)
|
||||
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) && \
|
||||
!defined(__clang__)
|
||||
#define SAFEBUFFERS __declspec(safebuffers)
|
||||
#else
|
||||
#define SAFEBUFFERS
|
||||
@ -48,9 +49,9 @@ extern "C" {
|
||||
!defined(__pnacl__) && !defined(__CLR_VER)
|
||||
LIBYUV_API
|
||||
void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
|
||||
#if (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
|
||||
#if defined(_MSC_VER)
|
||||
// Visual C version uses intrinsic or inline x86 assembly.
|
||||
#if (_MSC_FULL_VER >= 160040219)
|
||||
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
__cpuidex((int*)(cpu_info), info_eax, info_ecx);
|
||||
#elif defined(_M_IX86)
|
||||
__asm {
|
||||
@ -63,7 +64,7 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
|
||||
mov [edi + 8], ecx
|
||||
mov [edi + 12], edx
|
||||
}
|
||||
#else
|
||||
#else // Visual C but not x86
|
||||
if (info_ecx == 0) {
|
||||
__cpuid((int*)(cpu_info), info_eax);
|
||||
} else {
|
||||
@ -71,9 +72,9 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
|
||||
}
|
||||
#endif
|
||||
// GCC version uses inline x86 assembly.
|
||||
#else // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
|
||||
#else // defined(_MSC_VER)
|
||||
uint32 info_ebx, info_edx;
|
||||
asm volatile ( // NOLINT
|
||||
asm volatile (
|
||||
#if defined( __i386__) && defined(__PIC__)
|
||||
// Preserve ebx for fpic 32 bit.
|
||||
"mov %%ebx, %%edi \n"
|
||||
@ -89,7 +90,7 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
|
||||
cpu_info[1] = info_ebx;
|
||||
cpu_info[2] = info_ecx;
|
||||
cpu_info[3] = info_edx;
|
||||
#endif // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
|
||||
#endif // defined(_MSC_VER)
|
||||
}
|
||||
#else // (defined(_M_IX86) || defined(_M_X64) ...
|
||||
LIBYUV_API
|
||||
@ -98,28 +99,37 @@ void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// TODO(fbarchard): Enable xgetbv when validator supports it.
|
||||
// For VS2010 and earlier emit can be used:
|
||||
// _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
|
||||
// __asm {
|
||||
// xor ecx, ecx // xcr 0
|
||||
// xgetbv
|
||||
// mov xcr0, eax
|
||||
// }
|
||||
// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
|
||||
// https://code.google.com/p/libyuv/issues/detail?id=529
|
||||
#if defined(_M_IX86) && (_MSC_VER < 1900)
|
||||
#pragma optimize("g", off)
|
||||
#endif
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || \
|
||||
defined(__i386__) || defined(__x86_64__)) && \
|
||||
!defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
|
||||
#define HAS_XGETBV
|
||||
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
|
||||
int TestOsSaveYmm() {
|
||||
int GetXCR0() {
|
||||
uint32 xcr0 = 0u;
|
||||
#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219)
|
||||
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
|
||||
#elif defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__)
|
||||
__asm {
|
||||
xor ecx, ecx // xcr 0
|
||||
_asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
|
||||
mov xcr0, eax
|
||||
}
|
||||
#elif defined(__i386__) || defined(__x86_64__)
|
||||
asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
|
||||
#endif // defined(__i386__) || defined(__x86_64__)
|
||||
return((xcr0 & 6) == 6); // Is ymm saved?
|
||||
return xcr0;
|
||||
}
|
||||
#endif // defined(_M_IX86) || defined(_M_X64) ..
|
||||
// Return optimization to previous setting.
|
||||
#if defined(_M_IX86) && (_MSC_VER < 1900)
|
||||
#pragma optimize("g", on)
|
||||
#endif
|
||||
|
||||
// based on libvpx arm_cpudetect.c
|
||||
// For Arm, but public to allow testing on any CPU
|
||||
@ -151,30 +161,9 @@ int ArmCpuCaps(const char* cpuinfo_name) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(__mips__) && defined(__linux__)
|
||||
static int MipsCpuCaps(const char* search_string) {
|
||||
char cpuinfo_line[512];
|
||||
const char* file_name = "/proc/cpuinfo";
|
||||
FILE* f = fopen(file_name, "r");
|
||||
if (!f) {
|
||||
// Assume DSP if /proc/cpuinfo is unavailable.
|
||||
// This will occur for Chrome sandbox for Pepper or Render process.
|
||||
return kCpuHasMIPS_DSP;
|
||||
}
|
||||
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f) != NULL) {
|
||||
if (strstr(cpuinfo_line, search_string) != NULL) {
|
||||
fclose(f);
|
||||
return kCpuHasMIPS_DSP;
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// CPU detect function for SIMD instruction sets.
|
||||
LIBYUV_API
|
||||
int cpu_info_ = kCpuInit; // cpu_info is not initialized yet.
|
||||
int cpu_info_ = 0; // cpu_info is not initialized yet.
|
||||
|
||||
// Test environment variable for disabling CPU features. Any non-zero value
|
||||
// to disable. Zero ignored to make it easy to set the variable on/off.
|
||||
@ -197,8 +186,9 @@ static LIBYUV_BOOL TestEnv(const char*) {
|
||||
|
||||
LIBYUV_API SAFEBUFFERS
|
||||
int InitCpuFlags(void) {
|
||||
// TODO(fbarchard): swap kCpuInit logic so 0 means uninitialized.
|
||||
int cpu_info = 0;
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
|
||||
|
||||
uint32 cpu_info0[4] = { 0, 0, 0, 0 };
|
||||
uint32 cpu_info1[4] = { 0, 0, 0, 0 };
|
||||
uint32 cpu_info7[4] = { 0, 0, 0, 0 };
|
||||
@ -207,66 +197,66 @@ int InitCpuFlags(void) {
|
||||
if (cpu_info0[0] >= 7) {
|
||||
CpuId(7, 0, cpu_info7);
|
||||
}
|
||||
cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
|
||||
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
|
||||
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
|
||||
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
|
||||
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
|
||||
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
|
||||
kCpuHasX86;
|
||||
cpu_info = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
|
||||
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
|
||||
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
|
||||
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
|
||||
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
|
||||
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
|
||||
kCpuHasX86;
|
||||
|
||||
#ifdef HAS_XGETBV
|
||||
if ((cpu_info1[2] & 0x18000000) == 0x18000000 && // AVX and OSSave
|
||||
TestOsSaveYmm()) { // Saves YMM.
|
||||
cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
|
||||
kCpuHasAVX;
|
||||
// AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv
|
||||
if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
|
||||
((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
|
||||
cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX;
|
||||
|
||||
// Detect AVX512bw
|
||||
if ((GetXCR0() & 0xe0) == 0xe0) {
|
||||
cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Environment variable overrides for testing.
|
||||
if (TestEnv("LIBYUV_DISABLE_X86")) {
|
||||
cpu_info_ &= ~kCpuHasX86;
|
||||
cpu_info &= ~kCpuHasX86;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_SSE2")) {
|
||||
cpu_info_ &= ~kCpuHasSSE2;
|
||||
cpu_info &= ~kCpuHasSSE2;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
|
||||
cpu_info_ &= ~kCpuHasSSSE3;
|
||||
cpu_info &= ~kCpuHasSSSE3;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_SSE41")) {
|
||||
cpu_info_ &= ~kCpuHasSSE41;
|
||||
cpu_info &= ~kCpuHasSSE41;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_SSE42")) {
|
||||
cpu_info_ &= ~kCpuHasSSE42;
|
||||
cpu_info &= ~kCpuHasSSE42;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_AVX")) {
|
||||
cpu_info_ &= ~kCpuHasAVX;
|
||||
cpu_info &= ~kCpuHasAVX;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_AVX2")) {
|
||||
cpu_info_ &= ~kCpuHasAVX2;
|
||||
cpu_info &= ~kCpuHasAVX2;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_ERMS")) {
|
||||
cpu_info_ &= ~kCpuHasERMS;
|
||||
cpu_info &= ~kCpuHasERMS;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_FMA3")) {
|
||||
cpu_info_ &= ~kCpuHasFMA3;
|
||||
cpu_info &= ~kCpuHasFMA3;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_AVX3")) {
|
||||
cpu_info &= ~kCpuHasAVX3;
|
||||
}
|
||||
#endif
|
||||
#if defined(__mips__) && defined(__linux__)
|
||||
// Linux mips parse text file for dsp detect.
|
||||
cpu_info_ = MipsCpuCaps("dsp"); // set kCpuHasMIPS_DSP.
|
||||
#if defined(__mips_dspr2)
|
||||
cpu_info_ |= kCpuHasMIPS_DSPR2;
|
||||
cpu_info |= kCpuHasDSPR2;
|
||||
#endif
|
||||
cpu_info_ |= kCpuHasMIPS;
|
||||
|
||||
if (getenv("LIBYUV_DISABLE_MIPS")) {
|
||||
cpu_info_ &= ~kCpuHasMIPS;
|
||||
}
|
||||
if (getenv("LIBYUV_DISABLE_MIPS_DSP")) {
|
||||
cpu_info_ &= ~kCpuHasMIPS_DSP;
|
||||
}
|
||||
if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) {
|
||||
cpu_info_ &= ~kCpuHasMIPS_DSPR2;
|
||||
cpu_info |= kCpuHasMIPS;
|
||||
if (getenv("LIBYUV_DISABLE_DSPR2")) {
|
||||
cpu_info &= ~kCpuHasDSPR2;
|
||||
}
|
||||
#endif
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
@ -274,28 +264,31 @@ int InitCpuFlags(void) {
|
||||
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
|
||||
// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
|
||||
#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
|
||||
cpu_info_ = kCpuHasNEON;
|
||||
cpu_info = kCpuHasNEON;
|
||||
// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
|
||||
// flag in it.
|
||||
// So for aarch64, neon enabling is hard coded here.
|
||||
#endif
|
||||
#if defined(__aarch64__)
|
||||
cpu_info_ = kCpuHasNEON;
|
||||
cpu_info = kCpuHasNEON;
|
||||
#else
|
||||
// Linux arm parse text file for neon detect.
|
||||
cpu_info_ = ArmCpuCaps("/proc/cpuinfo");
|
||||
cpu_info = ArmCpuCaps("/proc/cpuinfo");
|
||||
#endif
|
||||
cpu_info_ |= kCpuHasARM;
|
||||
cpu_info |= kCpuHasARM;
|
||||
if (TestEnv("LIBYUV_DISABLE_NEON")) {
|
||||
cpu_info_ &= ~kCpuHasNEON;
|
||||
cpu_info &= ~kCpuHasNEON;
|
||||
}
|
||||
#endif // __arm__
|
||||
if (TestEnv("LIBYUV_DISABLE_ASM")) {
|
||||
cpu_info_ = 0;
|
||||
cpu_info = 0;
|
||||
}
|
||||
return cpu_info_;
|
||||
cpu_info |= kCpuInitialized;
|
||||
cpu_info_ = cpu_info;
|
||||
return cpu_info;
|
||||
}
|
||||
|
||||
// Note that use of this function is not thread safe.
|
||||
LIBYUV_API
|
||||
void MaskCpuFlags(int enable_flags) {
|
||||
cpu_info_ = InitCpuFlags() & enable_flags;
|
||||
|
15
third_party/libyuv/source/mjpeg_decoder.cc
vendored
15
third_party/libyuv/source/mjpeg_decoder.cc
vendored
@ -59,10 +59,10 @@ const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
|
||||
// Methods that are passed to jpeglib.
|
||||
boolean fill_input_buffer(jpeg_decompress_struct* cinfo);
|
||||
void init_source(jpeg_decompress_struct* cinfo);
|
||||
void skip_input_data(jpeg_decompress_struct* cinfo,
|
||||
long num_bytes); // NOLINT
|
||||
void skip_input_data(jpeg_decompress_struct* cinfo, long num_bytes); // NOLINT
|
||||
void term_source(jpeg_decompress_struct* cinfo);
|
||||
void ErrorHandler(jpeg_common_struct* cinfo);
|
||||
void OutputHandler(jpeg_common_struct* cinfo);
|
||||
|
||||
MJpegDecoder::MJpegDecoder()
|
||||
: has_scanline_padding_(LIBYUV_FALSE),
|
||||
@ -78,6 +78,7 @@ MJpegDecoder::MJpegDecoder()
|
||||
decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
|
||||
// Override standard exit()-based error handler.
|
||||
error_mgr_->base.error_exit = &ErrorHandler;
|
||||
error_mgr_->base.output_message = &OutputHandler;
|
||||
#endif
|
||||
decompress_struct_->client_data = NULL;
|
||||
source_mgr_->init_source = &init_source;
|
||||
@ -429,8 +430,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void skip_input_data(j_decompress_ptr cinfo,
|
||||
long num_bytes) { // NOLINT
|
||||
void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
|
||||
cinfo->src->next_input_byte += num_bytes;
|
||||
}
|
||||
|
||||
@ -458,7 +458,12 @@ void ErrorHandler(j_common_ptr cinfo) {
|
||||
// and causes it to return (for a second time) with value 1.
|
||||
longjmp(mgr->setjmp_buffer, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
void OutputHandler(j_common_ptr cinfo) {
|
||||
// Suppress fprintf warnings.
|
||||
}
|
||||
|
||||
#endif // HAVE_SETJMP
|
||||
|
||||
void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
|
||||
if (num_outbufs != num_outbufs_) {
|
||||
|
72
third_party/libyuv/source/mjpeg_validate.cc
vendored
72
third_party/libyuv/source/mjpeg_validate.cc
vendored
@ -17,51 +17,22 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Enable this to try scasb implementation.
|
||||
// #define ENABLE_SCASB 1
|
||||
|
||||
#ifdef ENABLE_SCASB
|
||||
|
||||
// Multiple of 1.
|
||||
__declspec(naked)
|
||||
const uint8* ScanRow_ERMS(const uint8* src, uint32 val, int count) {
|
||||
__asm {
|
||||
mov edx, edi
|
||||
mov edi, [esp + 4] // src
|
||||
mov eax, [esp + 8] // val
|
||||
mov ecx, [esp + 12] // count
|
||||
repne scasb
|
||||
jne sr99
|
||||
mov eax, edi
|
||||
sub eax, 1
|
||||
mov edi, edx
|
||||
ret
|
||||
|
||||
sr99:
|
||||
mov eax, 0
|
||||
mov edi, edx
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Helper function to scan for EOI marker.
|
||||
// Helper function to scan for EOI marker (0xff 0xd9).
|
||||
static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) {
|
||||
const uint8* end = sample + sample_size - 1;
|
||||
const uint8* it = sample;
|
||||
for (;;) {
|
||||
#ifdef ENABLE_SCASB
|
||||
it = ScanRow_ERMS(it, 0xff, end - it);
|
||||
#else
|
||||
it = static_cast<const uint8*>(memchr(it, 0xff, end - it));
|
||||
#endif
|
||||
if (it == NULL) {
|
||||
break;
|
||||
if (sample_size >= 2) {
|
||||
const uint8* end = sample + sample_size - 1;
|
||||
const uint8* it = sample;
|
||||
while (it < end) {
|
||||
// TODO(fbarchard): scan for 0xd9 instead.
|
||||
it = static_cast<const uint8 *>(memchr(it, 0xff, end - it));
|
||||
if (it == NULL) {
|
||||
break;
|
||||
}
|
||||
if (it[1] == 0xd9) {
|
||||
return LIBYUV_TRUE; // Success: Valid jpeg.
|
||||
}
|
||||
++it; // Skip over current 0xff.
|
||||
}
|
||||
if (it[1] == 0xd9) {
|
||||
return LIBYUV_TRUE; // Success: Valid jpeg.
|
||||
}
|
||||
++it; // Skip over current 0xff.
|
||||
}
|
||||
// ERROR: Invalid jpeg end code not found. Size sample_size
|
||||
return LIBYUV_FALSE;
|
||||
@ -69,20 +40,19 @@ static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) {
|
||||
|
||||
// Helper function to validate the jpeg appears intact.
|
||||
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
|
||||
// Maximum size that ValidateJpeg will consider valid.
|
||||
const size_t kMaxJpegSize = 0x7fffffffull;
|
||||
const size_t kBackSearchSize = 1024;
|
||||
if (sample_size < 64) {
|
||||
if (sample_size < 64 || sample_size > kMaxJpegSize || !sample) {
|
||||
// ERROR: Invalid jpeg size: sample_size
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
if (sample[0] != 0xff || sample[1] != 0xd8) { // Start Of Image
|
||||
if (sample[0] != 0xff || sample[1] != 0xd8) { // SOI marker
|
||||
// ERROR: Invalid jpeg initial start code
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
// Step over SOI marker.
|
||||
sample += 2;
|
||||
sample_size -= 2;
|
||||
|
||||
// Look for the End Of Image (EOI) marker in the end kilobyte of the buffer.
|
||||
// Look for the End Of Image (EOI) marker near the end of the buffer.
|
||||
if (sample_size > kBackSearchSize) {
|
||||
if (ScanEOI(sample + sample_size - kBackSearchSize, kBackSearchSize)) {
|
||||
return LIBYUV_TRUE; // Success: Valid jpeg.
|
||||
@ -90,8 +60,8 @@ LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
|
||||
// Reduce search size for forward search.
|
||||
sample_size = sample_size - kBackSearchSize + 1;
|
||||
}
|
||||
return ScanEOI(sample, sample_size);
|
||||
|
||||
// Step over SOI marker and scan for EOI.
|
||||
return ScanEOI(sample + 2, sample_size - 2);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
896
third_party/libyuv/source/planar_functions.cc
vendored
896
third_party/libyuv/source/planar_functions.cc
vendored
File diff suppressed because it is too large
Load Diff
51
third_party/libyuv/source/rotate.cc
vendored
51
third_party/libyuv/source/rotate.cc
vendored
@ -49,13 +49,13 @@ void TransposePlane(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_TRANSPOSEWX8_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
|
||||
#if defined(HAS_TRANSPOSEWX8_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
if (IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
|
||||
TransposeWx8 = TransposeWx8_Fast_MIPS_DSPR2;
|
||||
TransposeWx8 = TransposeWx8_Fast_DSPR2;
|
||||
} else {
|
||||
TransposeWx8 = TransposeWx8_MIPS_DSPR2;
|
||||
TransposeWx8 = TransposeWx8_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -117,14 +117,6 @@ void RotatePlane180(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MIRRORROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
MirrorRow = MirrorRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
MirrorRow = MirrorRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MIRRORROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
MirrorRow = MirrorRow_Any_SSSE3;
|
||||
@ -142,11 +134,11 @@ void RotatePlane180(const uint8* src, int src_stride,
|
||||
}
|
||||
#endif
|
||||
// TODO(fbarchard): Mirror on mips handle unaligned memory.
|
||||
#if defined(HAS_MIRRORROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
|
||||
#if defined(HAS_MIRRORROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
|
||||
IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
MirrorRow = MirrorRow_MIPS_DSPR2;
|
||||
MirrorRow = MirrorRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_SSE2)
|
||||
@ -204,14 +196,17 @@ void TransposeUV(const uint8* src, int src_stride,
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_TRANSPOSEUVWX8_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {
|
||||
TransposeUVWx8 = TransposeUVWx8_SSE2;
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
TransposeUVWx8 = TransposeUVWx8_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
TransposeUVWx8 = TransposeUVWx8_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_TRANSPOSEUVWx8_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&
|
||||
#if defined(HAS_TRANSPOSEUVWX8_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) &&
|
||||
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
|
||||
TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;
|
||||
TransposeUVWx8 = TransposeUVWx8_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -272,22 +267,22 @@ void RotateUV180(const uint8* src, int src_stride,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height) {
|
||||
int i;
|
||||
void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
|
||||
void (*MirrorUVRow)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
|
||||
MirrorUVRow_C;
|
||||
#if defined(HAS_MIRRORUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
|
||||
MirrorRowUV = MirrorUVRow_NEON;
|
||||
MirrorUVRow = MirrorUVRow_NEON;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MIRRORROW_UV_SSSE3)
|
||||
#if defined(HAS_MIRRORUVROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
|
||||
MirrorRowUV = MirrorUVRow_SSSE3;
|
||||
MirrorUVRow = MirrorUVRow_SSSE3;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MIRRORUVROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
|
||||
#if defined(HAS_MIRRORUVROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
|
||||
MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
|
||||
MirrorUVRow = MirrorUVRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -295,7 +290,7 @@ void RotateUV180(const uint8* src, int src_stride,
|
||||
dst_b += dst_stride_b * (height - 1);
|
||||
|
||||
for (i = 0; i < height; ++i) {
|
||||
MirrorRowUV(src, dst_a, dst_b, width);
|
||||
MirrorUVRow(src, dst_a, dst_b, width);
|
||||
src += src_stride;
|
||||
dst_a -= dst_stride_a;
|
||||
dst_b -= dst_stride_b;
|
||||
|
41
third_party/libyuv/source/rotate_any.cc
vendored
41
third_party/libyuv/source/rotate_any.cc
vendored
@ -18,7 +18,7 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define TANY(NAMEANY, TPOS_SIMD, TPOS_C, MASK) \
|
||||
#define TANY(NAMEANY, TPOS_SIMD, MASK) \
|
||||
void NAMEANY(const uint8* src, int src_stride, \
|
||||
uint8* dst, int dst_stride, int width) { \
|
||||
int r = width & MASK; \
|
||||
@ -26,24 +26,49 @@ extern "C" {
|
||||
if (n > 0) { \
|
||||
TPOS_SIMD(src, src_stride, dst, dst_stride, n); \
|
||||
} \
|
||||
TPOS_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r); \
|
||||
TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r);\
|
||||
}
|
||||
|
||||
#ifdef HAS_TRANSPOSEWX8_NEON
|
||||
TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, TransposeWx8_C, 7)
|
||||
TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEWX8_SSSE3
|
||||
TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, TransposeWx8_C, 7)
|
||||
TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
|
||||
TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, TransposeWx8_C, 15)
|
||||
TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEWX8_MIPS_DSPR2
|
||||
TANY(TransposeWx8_Any_MIPS_DSPR2, TransposeWx8_MIPS_DSPR2, TransposeWx8_C, 7)
|
||||
#ifdef HAS_TRANSPOSEWX8_DSPR2
|
||||
TANY(TransposeWx8_Any_DSPR2, TransposeWx8_DSPR2, 7)
|
||||
#endif
|
||||
|
||||
#undef TANY
|
||||
|
||||
#define TUVANY(NAMEANY, TPOS_SIMD, MASK) \
|
||||
void NAMEANY(const uint8* src, int src_stride, \
|
||||
uint8* dst_a, int dst_stride_a, \
|
||||
uint8* dst_b, int dst_stride_b, int width) { \
|
||||
int r = width & MASK; \
|
||||
int n = width - r; \
|
||||
if (n > 0) { \
|
||||
TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, \
|
||||
n); \
|
||||
} \
|
||||
TransposeUVWx8_C(src + n * 2, src_stride, \
|
||||
dst_a + n * dst_stride_a, dst_stride_a, \
|
||||
dst_b + n * dst_stride_b, dst_stride_b, r); \
|
||||
}
|
||||
|
||||
#ifdef HAS_TRANSPOSEUVWX8_NEON
|
||||
TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEUVWX8_SSE2
|
||||
TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEUVWX8_DSPR2
|
||||
TUVANY(TransposeUVWx8_Any_DSPR2, TransposeUVWx8_DSPR2, 7)
|
||||
#endif
|
||||
#undef TUVANY
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
621
third_party/libyuv/source/rotate_gcc.cc
vendored
621
third_party/libyuv/source/rotate_gcc.cc
vendored
@ -17,16 +17,17 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for GCC x86 and x64.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||
|
||||
// Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
|
||||
#if defined(HAS_TRANSPOSEWX8_SSSE3)
|
||||
void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
asm volatile (
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
".p2align 2 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movq (%0),%%xmm0 \n"
|
||||
"movq (%0,%3),%%xmm1 \n"
|
||||
@ -105,386 +106,260 @@ void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
);
|
||||
}
|
||||
#endif // defined(HAS_TRANSPOSEWX8_SSSE3)
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__) && !defined(__clang__)
|
||||
void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
asm (
|
||||
DECLARE_FUNCTION(TransposeUVWx8_SSE2)
|
||||
"push %ebx \n"
|
||||
"push %esi \n"
|
||||
"push %edi \n"
|
||||
"push %ebp \n"
|
||||
"mov 0x14(%esp),%eax \n"
|
||||
"mov 0x18(%esp),%edi \n"
|
||||
"mov 0x1c(%esp),%edx \n"
|
||||
"mov 0x20(%esp),%esi \n"
|
||||
"mov 0x24(%esp),%ebx \n"
|
||||
"mov 0x28(%esp),%ebp \n"
|
||||
"mov %esp,%ecx \n"
|
||||
"sub $0x14,%esp \n"
|
||||
"and $0xfffffff0,%esp \n"
|
||||
"mov %ecx,0x10(%esp) \n"
|
||||
"mov 0x2c(%ecx),%ecx \n"
|
||||
|
||||
"1: \n"
|
||||
"movdqu (%eax),%xmm0 \n"
|
||||
"movdqu (%eax,%edi,1),%xmm1 \n"
|
||||
"lea (%eax,%edi,2),%eax \n"
|
||||
"movdqa %xmm0,%xmm7 \n"
|
||||
"punpcklbw %xmm1,%xmm0 \n"
|
||||
"punpckhbw %xmm1,%xmm7 \n"
|
||||
"movdqa %xmm7,%xmm1 \n"
|
||||
"movdqu (%eax),%xmm2 \n"
|
||||
"movdqu (%eax,%edi,1),%xmm3 \n"
|
||||
"lea (%eax,%edi,2),%eax \n"
|
||||
"movdqa %xmm2,%xmm7 \n"
|
||||
"punpcklbw %xmm3,%xmm2 \n"
|
||||
"punpckhbw %xmm3,%xmm7 \n"
|
||||
"movdqa %xmm7,%xmm3 \n"
|
||||
"movdqu (%eax),%xmm4 \n"
|
||||
"movdqu (%eax,%edi,1),%xmm5 \n"
|
||||
"lea (%eax,%edi,2),%eax \n"
|
||||
"movdqa %xmm4,%xmm7 \n"
|
||||
"punpcklbw %xmm5,%xmm4 \n"
|
||||
"punpckhbw %xmm5,%xmm7 \n"
|
||||
"movdqa %xmm7,%xmm5 \n"
|
||||
"movdqu (%eax),%xmm6 \n"
|
||||
"movdqu (%eax,%edi,1),%xmm7 \n"
|
||||
"lea (%eax,%edi,2),%eax \n"
|
||||
"movdqu %xmm5,(%esp) \n"
|
||||
"neg %edi \n"
|
||||
"movdqa %xmm6,%xmm5 \n"
|
||||
"punpcklbw %xmm7,%xmm6 \n"
|
||||
"punpckhbw %xmm7,%xmm5 \n"
|
||||
"movdqa %xmm5,%xmm7 \n"
|
||||
"lea 0x10(%eax,%edi,8),%eax \n"
|
||||
"neg %edi \n"
|
||||
"movdqa %xmm0,%xmm5 \n"
|
||||
"punpcklwd %xmm2,%xmm0 \n"
|
||||
"punpckhwd %xmm2,%xmm5 \n"
|
||||
"movdqa %xmm5,%xmm2 \n"
|
||||
"movdqa %xmm1,%xmm5 \n"
|
||||
"punpcklwd %xmm3,%xmm1 \n"
|
||||
"punpckhwd %xmm3,%xmm5 \n"
|
||||
"movdqa %xmm5,%xmm3 \n"
|
||||
"movdqa %xmm4,%xmm5 \n"
|
||||
"punpcklwd %xmm6,%xmm4 \n"
|
||||
"punpckhwd %xmm6,%xmm5 \n"
|
||||
"movdqa %xmm5,%xmm6 \n"
|
||||
"movdqu (%esp),%xmm5 \n"
|
||||
"movdqu %xmm6,(%esp) \n"
|
||||
"movdqa %xmm5,%xmm6 \n"
|
||||
"punpcklwd %xmm7,%xmm5 \n"
|
||||
"punpckhwd %xmm7,%xmm6 \n"
|
||||
"movdqa %xmm6,%xmm7 \n"
|
||||
"movdqa %xmm0,%xmm6 \n"
|
||||
"punpckldq %xmm4,%xmm0 \n"
|
||||
"punpckhdq %xmm4,%xmm6 \n"
|
||||
"movdqa %xmm6,%xmm4 \n"
|
||||
"movdqu (%esp),%xmm6 \n"
|
||||
"movlpd %xmm0,(%edx) \n"
|
||||
"movhpd %xmm0,(%ebx) \n"
|
||||
"movlpd %xmm4,(%edx,%esi,1) \n"
|
||||
"lea (%edx,%esi,2),%edx \n"
|
||||
"movhpd %xmm4,(%ebx,%ebp,1) \n"
|
||||
"lea (%ebx,%ebp,2),%ebx \n"
|
||||
"movdqa %xmm2,%xmm0 \n"
|
||||
"punpckldq %xmm6,%xmm2 \n"
|
||||
"movlpd %xmm2,(%edx) \n"
|
||||
"movhpd %xmm2,(%ebx) \n"
|
||||
"punpckhdq %xmm6,%xmm0 \n"
|
||||
"movlpd %xmm0,(%edx,%esi,1) \n"
|
||||
"lea (%edx,%esi,2),%edx \n"
|
||||
"movhpd %xmm0,(%ebx,%ebp,1) \n"
|
||||
"lea (%ebx,%ebp,2),%ebx \n"
|
||||
"movdqa %xmm1,%xmm0 \n"
|
||||
"punpckldq %xmm5,%xmm1 \n"
|
||||
"movlpd %xmm1,(%edx) \n"
|
||||
"movhpd %xmm1,(%ebx) \n"
|
||||
"punpckhdq %xmm5,%xmm0 \n"
|
||||
"movlpd %xmm0,(%edx,%esi,1) \n"
|
||||
"lea (%edx,%esi,2),%edx \n"
|
||||
"movhpd %xmm0,(%ebx,%ebp,1) \n"
|
||||
"lea (%ebx,%ebp,2),%ebx \n"
|
||||
"movdqa %xmm3,%xmm0 \n"
|
||||
"punpckldq %xmm7,%xmm3 \n"
|
||||
"movlpd %xmm3,(%edx) \n"
|
||||
"movhpd %xmm3,(%ebx) \n"
|
||||
"punpckhdq %xmm7,%xmm0 \n"
|
||||
"sub $0x8,%ecx \n"
|
||||
"movlpd %xmm0,(%edx,%esi,1) \n"
|
||||
"lea (%edx,%esi,2),%edx \n"
|
||||
"movhpd %xmm0,(%ebx,%ebp,1) \n"
|
||||
"lea (%ebx,%ebp,2),%ebx \n"
|
||||
"jg 1b \n"
|
||||
"mov 0x10(%esp),%esp \n"
|
||||
"pop %ebp \n"
|
||||
"pop %edi \n"
|
||||
"pop %esi \n"
|
||||
"pop %ebx \n"
|
||||
#if defined(__native_client__)
|
||||
"pop %ecx \n"
|
||||
"and $0xffffffe0,%ecx \n"
|
||||
"jmp *%ecx \n"
|
||||
#else
|
||||
"ret \n"
|
||||
#endif
|
||||
);
|
||||
#endif
|
||||
#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
|
||||
defined(__x86_64__)
|
||||
// 64 bit version has enough registers to do 16x8 to 8x16 at a time.
|
||||
// Transpose 16x8. 64 bit
|
||||
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
|
||||
void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
asm volatile (
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%3),%%xmm1 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm9 \n"
|
||||
"palignr $0x8,%%xmm1,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm9,%%xmm9 \n"
|
||||
"movdqu (%0,%3),%%xmm3 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm10 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm10 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"movdqa %%xmm10,%%xmm11 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"movdqu (%0,%3),%%xmm5 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm12 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm12 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"movdqa %%xmm12,%%xmm13 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movdqu (%0,%3),%%xmm7 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm14 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"punpckhbw %%xmm7,%%xmm14 \n"
|
||||
"neg %3 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"movdqa %%xmm14,%%xmm15 \n"
|
||||
"lea 0x10(%0,%3,8),%0 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"neg %3 \n"
|
||||
// Second round of bit swap.
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm2,%%xmm2 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"movdqa %%xmm5,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"punpcklwd %%xmm10,%%xmm8 \n"
|
||||
"punpcklwd %%xmm11,%%xmm9 \n"
|
||||
"movdqa %%xmm8,%%xmm10 \n"
|
||||
"movdqa %%xmm9,%%xmm11 \n"
|
||||
"palignr $0x8,%%xmm10,%%xmm10 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"punpcklwd %%xmm14,%%xmm12 \n"
|
||||
"punpcklwd %%xmm15,%%xmm13 \n"
|
||||
"movdqa %%xmm12,%%xmm14 \n"
|
||||
"movdqa %%xmm13,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm4,%%xmm4 \n"
|
||||
"movq %%xmm4,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"movq %%xmm2,(%1) \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movq %%xmm6,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"movq %%xmm1,(%1) \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq %%xmm5,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movq %%xmm3,(%1) \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"movq %%xmm7,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm12,%%xmm8 \n"
|
||||
"movq %%xmm8,(%1) \n"
|
||||
"movdqa %%xmm8,%%xmm12 \n"
|
||||
"palignr $0x8,%%xmm12,%%xmm12 \n"
|
||||
"movq %%xmm12,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm14,%%xmm10 \n"
|
||||
"movdqa %%xmm10,%%xmm14 \n"
|
||||
"movq %%xmm10,(%1) \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"punpckldq %%xmm13,%%xmm9 \n"
|
||||
"movq %%xmm14,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm9,%%xmm13 \n"
|
||||
"movq %%xmm9,(%1) \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movq %%xmm13,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm15,%%xmm11 \n"
|
||||
"movq %%xmm11,(%1) \n"
|
||||
"movdqa %%xmm11,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"movq %%xmm15,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((intptr_t)(src_stride)), // %3
|
||||
"r"((intptr_t)(dst_stride)) // %4
|
||||
: "memory", "cc",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
|
||||
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
|
||||
);
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%3),%%xmm1 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm9 \n"
|
||||
"palignr $0x8,%%xmm1,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm9,%%xmm9 \n"
|
||||
"movdqu (%0,%3),%%xmm3 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm10 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm10 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"movdqa %%xmm10,%%xmm11 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"movdqu (%0,%3),%%xmm5 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm12 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm12 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"movdqa %%xmm12,%%xmm13 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movdqu (%0,%3),%%xmm7 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm14 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"punpckhbw %%xmm7,%%xmm14 \n"
|
||||
"neg %3 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"movdqa %%xmm14,%%xmm15 \n"
|
||||
"lea 0x10(%0,%3,8),%0 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"neg %3 \n"
|
||||
// Second round of bit swap.
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm2,%%xmm2 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"movdqa %%xmm5,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"punpcklwd %%xmm10,%%xmm8 \n"
|
||||
"punpcklwd %%xmm11,%%xmm9 \n"
|
||||
"movdqa %%xmm8,%%xmm10 \n"
|
||||
"movdqa %%xmm9,%%xmm11 \n"
|
||||
"palignr $0x8,%%xmm10,%%xmm10 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"punpcklwd %%xmm14,%%xmm12 \n"
|
||||
"punpcklwd %%xmm15,%%xmm13 \n"
|
||||
"movdqa %%xmm12,%%xmm14 \n"
|
||||
"movdqa %%xmm13,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm4,%%xmm4 \n"
|
||||
"movq %%xmm4,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"movq %%xmm2,(%1) \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movq %%xmm6,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"movq %%xmm1,(%1) \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq %%xmm5,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movq %%xmm3,(%1) \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"movq %%xmm7,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm12,%%xmm8 \n"
|
||||
"movq %%xmm8,(%1) \n"
|
||||
"movdqa %%xmm8,%%xmm12 \n"
|
||||
"palignr $0x8,%%xmm12,%%xmm12 \n"
|
||||
"movq %%xmm12,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm14,%%xmm10 \n"
|
||||
"movdqa %%xmm10,%%xmm14 \n"
|
||||
"movq %%xmm10,(%1) \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"punpckldq %%xmm13,%%xmm9 \n"
|
||||
"movq %%xmm14,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm9,%%xmm13 \n"
|
||||
"movq %%xmm9,(%1) \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movq %%xmm13,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm15,%%xmm11 \n"
|
||||
"movq %%xmm11,(%1) \n"
|
||||
"movdqa %%xmm11,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"movq %%xmm15,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((intptr_t)(src_stride)), // %3
|
||||
"r"((intptr_t)(dst_stride)) // %4
|
||||
: "memory", "cc",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
|
||||
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
|
||||
);
|
||||
}
|
||||
#endif // defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
|
||||
|
||||
// Transpose UV 8x8. 64 bit.
|
||||
#if defined(HAS_TRANSPOSEUVWX8_SSE2)
|
||||
void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width) {
|
||||
asm volatile (
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%4),%%xmm1 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm1 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqu (%0,%4),%%xmm3 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm3 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"movdqu (%0,%4),%%xmm5 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm5 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"movdqu (%0,%4),%%xmm7 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm8 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"neg %4 \n"
|
||||
"lea 0x10(%0,%4,8),%0 \n"
|
||||
"punpckhbw %%xmm7,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm7 \n"
|
||||
"neg %4 \n"
|
||||
// Second round of bit swap.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"movdqa %%xmm1,%%xmm9 \n"
|
||||
"punpckhwd %%xmm2,%%xmm8 \n"
|
||||
"punpckhwd %%xmm3,%%xmm9 \n"
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm2 \n"
|
||||
"movdqa %%xmm9,%%xmm3 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"movdqa %%xmm5,%%xmm9 \n"
|
||||
"punpckhwd %%xmm6,%%xmm8 \n"
|
||||
"punpckhwd %%xmm7,%%xmm9 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm8,%%xmm6 \n"
|
||||
"movdqa %%xmm9,%%xmm7 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movlpd %%xmm0,(%1) \n" // Write back U channel
|
||||
"movhpd %%xmm0,(%2) \n" // Write back V channel
|
||||
"punpckhdq %%xmm4,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movlpd %%xmm2,(%1) \n"
|
||||
"movhpd %%xmm2,(%2) \n"
|
||||
"punpckhdq %%xmm6,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm1,%%xmm8 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movlpd %%xmm1,(%1) \n"
|
||||
"movhpd %%xmm1,(%2) \n"
|
||||
"punpckhdq %%xmm5,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm3,%%xmm8 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movlpd %%xmm3,(%1) \n"
|
||||
"movhpd %%xmm3,(%2) \n"
|
||||
"punpckhdq %%xmm7,%%xmm8 \n"
|
||||
"sub $0x8,%3 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst_a), // %1
|
||||
"+r"(dst_b), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"((intptr_t)(src_stride)), // %4
|
||||
"r"((intptr_t)(dst_stride_a)), // %5
|
||||
"r"((intptr_t)(dst_stride_b)) // %6
|
||||
: "memory", "cc",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
|
||||
"xmm8", "xmm9"
|
||||
);
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%4),%%xmm1 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm1 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqu (%0,%4),%%xmm3 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm3 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"movdqu (%0,%4),%%xmm5 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm5 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"movdqu (%0,%4),%%xmm7 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm8 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"neg %4 \n"
|
||||
"lea 0x10(%0,%4,8),%0 \n"
|
||||
"punpckhbw %%xmm7,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm7 \n"
|
||||
"neg %4 \n"
|
||||
// Second round of bit swap.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"movdqa %%xmm1,%%xmm9 \n"
|
||||
"punpckhwd %%xmm2,%%xmm8 \n"
|
||||
"punpckhwd %%xmm3,%%xmm9 \n"
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm2 \n"
|
||||
"movdqa %%xmm9,%%xmm3 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"movdqa %%xmm5,%%xmm9 \n"
|
||||
"punpckhwd %%xmm6,%%xmm8 \n"
|
||||
"punpckhwd %%xmm7,%%xmm9 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm8,%%xmm6 \n"
|
||||
"movdqa %%xmm9,%%xmm7 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movlpd %%xmm0,(%1) \n" // Write back U channel
|
||||
"movhpd %%xmm0,(%2) \n" // Write back V channel
|
||||
"punpckhdq %%xmm4,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movlpd %%xmm2,(%1) \n"
|
||||
"movhpd %%xmm2,(%2) \n"
|
||||
"punpckhdq %%xmm6,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm1,%%xmm8 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movlpd %%xmm1,(%1) \n"
|
||||
"movhpd %%xmm1,(%2) \n"
|
||||
"punpckhdq %%xmm5,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm3,%%xmm8 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movlpd %%xmm3,(%1) \n"
|
||||
"movhpd %%xmm3,(%2) \n"
|
||||
"punpckhdq %%xmm7,%%xmm8 \n"
|
||||
"sub $0x8,%3 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst_a), // %1
|
||||
"+r"(dst_b), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"((intptr_t)(src_stride)), // %4
|
||||
"r"((intptr_t)(dst_stride_a)), // %5
|
||||
"r"((intptr_t)(dst_stride_b)) // %6
|
||||
: "memory", "cc",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
|
||||
"xmm8", "xmm9"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif // defined(HAS_TRANSPOSEUVWX8_SSE2)
|
||||
#endif // defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
16
third_party/libyuv/source/rotate_mips.cc
vendored
16
third_party/libyuv/source/rotate_mips.cc
vendored
@ -22,8 +22,8 @@ extern "C" {
|
||||
defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
|
||||
(_MIPS_SIM == _MIPS_SIM_ABI32)
|
||||
|
||||
void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
void TransposeWx8_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
@ -106,8 +106,8 @@ void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void TransposeWx8_Fast_MIPS_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set noat \n"
|
||||
".set push \n"
|
||||
@ -308,10 +308,10 @@ void TransposeWx8_Fast_MIPS_DSPR2(const uint8* src, int src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width) {
|
||||
void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
10
third_party/libyuv/source/rotate_neon.cc
vendored
10
third_party/libyuv/source/rotate_neon.cc
vendored
@ -27,7 +27,7 @@ static uvec8 kVTbl4x4Transpose =
|
||||
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width) {
|
||||
const uint8* src_temp = NULL;
|
||||
const uint8* src_temp;
|
||||
asm volatile (
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
@ -35,7 +35,6 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
"sub %5, #8 \n"
|
||||
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
|
||||
@ -230,7 +229,7 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
|
||||
"4: \n"
|
||||
|
||||
: "+r"(src_temp), // %0
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(src_stride), // %2
|
||||
"+r"(dst), // %3
|
||||
@ -248,7 +247,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width) {
|
||||
const uint8* src_temp = NULL;
|
||||
const uint8* src_temp;
|
||||
asm volatile (
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
@ -256,7 +255,6 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
"sub %7, #8 \n"
|
||||
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
|
||||
@ -514,7 +512,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
|
||||
"4: \n"
|
||||
|
||||
: "+r"(src_temp), // %0
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(src_stride), // %2
|
||||
"+r"(dst_a), // %3
|
||||
|
8
third_party/libyuv/source/rotate_neon64.cc
vendored
8
third_party/libyuv/source/rotate_neon64.cc
vendored
@ -26,7 +26,7 @@ static uvec8 kVTbl4x4Transpose =
|
||||
|
||||
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
const uint8* src_temp = NULL;
|
||||
const uint8* src_temp;
|
||||
int64 width64 = (int64) width; // Work around clang 3.4 warning.
|
||||
asm volatile (
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
@ -235,7 +235,7 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
|
||||
"4: \n"
|
||||
|
||||
: "+r"(src_temp), // %0
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst), // %2
|
||||
"+r"(width64) // %3
|
||||
@ -255,7 +255,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width) {
|
||||
const uint8* src_temp = NULL;
|
||||
const uint8* src_temp;
|
||||
int64 width64 = (int64) width; // Work around clang 3.4 warning.
|
||||
asm volatile (
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
@ -520,7 +520,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
|
||||
"4: \n"
|
||||
|
||||
: "+r"(src_temp), // %0
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst_a), // %2
|
||||
"+r"(dst_b), // %3
|
||||
|
5
third_party/libyuv/source/rotate_win.cc
vendored
5
third_party/libyuv/source/rotate_win.cc
vendored
@ -16,9 +16,8 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for Visual C x86.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
||||
defined(_MSC_VER) && !defined(__clang__)
|
||||
// This module is for 32 bit Visual C x86 and clangcl
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
|
||||
__declspec(naked)
|
||||
void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
||||
|
394
third_party/libyuv/source/row_any.cc
vendored
394
third_party/libyuv/source/row_any.cc
vendored
@ -22,6 +22,39 @@ extern "C" {
|
||||
// Subsampled source needs to be increase by 1 of not even.
|
||||
#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
|
||||
|
||||
// Any 4 planes to 1 with yuvconstants
|
||||
#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
|
||||
const uint8* a_buf, uint8* dst_ptr, \
|
||||
const struct YuvConstants* yuvconstants, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 5]); \
|
||||
memset(temp, 0, 64 * 4); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n, r); \
|
||||
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
memcpy(temp + 192, a_buf + n, r); \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \
|
||||
yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
|
||||
SS(r, DUVSHIFT) * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_I422ALPHATOARGBROW_SSSE3
|
||||
ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422ALPHATOARGBROW_AVX2
|
||||
ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422ALPHATOARGBROW_NEON
|
||||
ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7)
|
||||
#endif
|
||||
#undef ANY41C
|
||||
|
||||
// Any 3 planes to 1.
|
||||
#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
|
||||
@ -40,75 +73,9 @@ extern "C" {
|
||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
|
||||
SS(r, DUVSHIFT) * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_I422TOARGBROW_SSSE3
|
||||
ANY31(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I444TOARGBROW_SSSE3
|
||||
ANY31(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
|
||||
ANY31(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
|
||||
ANY31(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
|
||||
ANY31(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7)
|
||||
#ifdef HAS_I422TOYUY2ROW_SSE2
|
||||
ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
|
||||
ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
|
||||
#endif // HAS_I444TOARGBROW_SSSE3
|
||||
#ifdef HAS_I422TORGB24ROW_AVX2
|
||||
ANY31(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TORAWROW_AVX2
|
||||
ANY31(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15)
|
||||
#endif
|
||||
#ifdef HAS_J422TOARGBROW_SSSE3
|
||||
ANY31(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, 1, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_J422TOARGBROW_AVX2
|
||||
ANY31(J422ToARGBRow_Any_AVX2, J422ToARGBRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGBROW_AVX2
|
||||
ANY31(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOBGRAROW_AVX2
|
||||
ANY31(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGBAROW_AVX2
|
||||
ANY31(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOABGRROW_AVX2
|
||||
ANY31(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I444TOARGBROW_AVX2
|
||||
ANY31(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I411TOARGBROW_AVX2
|
||||
ANY31(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGB4444ROW_AVX2
|
||||
ANY31(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGB1555ROW_AVX2
|
||||
ANY31(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGB565ROW_AVX2
|
||||
ANY31(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGBROW_NEON
|
||||
ANY31(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
|
||||
ANY31(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
|
||||
ANY31(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
|
||||
ANY31(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7)
|
||||
ANY31(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7)
|
||||
ANY31(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
|
||||
ANY31(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
|
||||
ANY31(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7)
|
||||
ANY31(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
|
||||
ANY31(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
|
||||
ANY31(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TOYUY2ROW_NEON
|
||||
ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
|
||||
@ -116,8 +83,91 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
|
||||
#ifdef HAS_I422TOUYVYROW_NEON
|
||||
ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_BLENDPLANEROW_AVX2
|
||||
ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_BLENDPLANEROW_SSSE3
|
||||
ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
|
||||
#endif
|
||||
#undef ANY31
|
||||
|
||||
// Note that odd width replication includes 444 due to implementation
|
||||
// on arm that subsamples 444 to 422 internally.
|
||||
// Any 3 planes to 1 with yuvconstants
|
||||
#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
|
||||
uint8* dst_ptr, const struct YuvConstants* yuvconstants, \
|
||||
int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 4]); \
|
||||
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n, r); \
|
||||
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
if (width & 1) { \
|
||||
temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1]; \
|
||||
temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \
|
||||
} \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, \
|
||||
yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
|
||||
SS(r, DUVSHIFT) * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_I422TOARGBROW_SSSE3
|
||||
ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I411TOARGBROW_SSSE3
|
||||
ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I444TOARGBROW_SSSE3
|
||||
ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
|
||||
ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
|
||||
#endif // HAS_I444TOARGBROW_SSSE3
|
||||
#ifdef HAS_I422TORGB24ROW_AVX2
|
||||
ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGBROW_AVX2
|
||||
ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGBAROW_AVX2
|
||||
ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I444TOARGBROW_AVX2
|
||||
ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I411TOARGBROW_AVX2
|
||||
ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGB4444ROW_AVX2
|
||||
ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGB1555ROW_AVX2
|
||||
ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGB565ROW_AVX2
|
||||
ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGBROW_NEON
|
||||
ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
|
||||
ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
|
||||
ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
|
||||
ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
|
||||
ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
|
||||
ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
|
||||
ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
|
||||
ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
|
||||
#endif
|
||||
#undef ANY31C
|
||||
|
||||
// Any 2 planes to 1.
|
||||
#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \
|
||||
@ -136,32 +186,6 @@ ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
|
||||
// Biplanar to RGB.
|
||||
#ifdef HAS_NV12TOARGBROW_SSSE3
|
||||
ANY21(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
|
||||
ANY21(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TOARGBROW_AVX2
|
||||
ANY21(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
|
||||
ANY21(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_NV12TOARGBROW_NEON
|
||||
ANY21(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
|
||||
ANY21(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB565ROW_SSSE3
|
||||
ANY21(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
|
||||
ANY21(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB565ROW_AVX2
|
||||
ANY21(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
|
||||
ANY21(NV21ToRGB565Row_Any_AVX2, NV21ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB565ROW_NEON
|
||||
ANY21(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
|
||||
ANY21(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, 1, 1, 2, 2, 7)
|
||||
#endif
|
||||
|
||||
// Merge functions.
|
||||
#ifdef HAS_MERGEUVROW_SSE2
|
||||
ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
|
||||
@ -221,6 +245,55 @@ ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
|
||||
#endif
|
||||
#undef ANY21
|
||||
|
||||
// Any 2 planes to 1 with yuvconstants
|
||||
#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \
|
||||
uint8* dst_ptr, const struct YuvConstants* yuvconstants, \
|
||||
int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 3]); \
|
||||
memset(temp, 0, 64 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n * SBPP, r * SBPP); \
|
||||
memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
|
||||
SS(r, UVSHIFT) * SBPP2); \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
|
||||
// Biplanar to RGB.
|
||||
#ifdef HAS_NV12TOARGBROW_SSSE3
|
||||
ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TOARGBROW_AVX2
|
||||
ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_NV12TOARGBROW_NEON
|
||||
ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV21TOARGBROW_SSSE3
|
||||
ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV21TOARGBROW_AVX2
|
||||
ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_NV21TOARGBROW_NEON
|
||||
ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB565ROW_SSSE3
|
||||
ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB565ROW_AVX2
|
||||
ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB565ROW_NEON
|
||||
ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
|
||||
#endif
|
||||
#undef ANY21C
|
||||
|
||||
// Any 1 to 1.
|
||||
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
|
||||
@ -252,8 +325,10 @@ ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3)
|
||||
ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3)
|
||||
ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3)
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
|
||||
#if defined(HAS_ARGBTORGB565ROW_AVX2)
|
||||
ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7)
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
|
||||
ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7)
|
||||
ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7)
|
||||
#endif
|
||||
@ -269,15 +344,16 @@ ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7)
|
||||
#if defined(HAS_I400TOARGBROW_AVX2)
|
||||
ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
|
||||
#endif
|
||||
#if defined(HAS_YUY2TOARGBROW_SSSE3)
|
||||
ANY11(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
|
||||
ANY11(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
|
||||
#if defined(HAS_RGB24TOARGBROW_SSSE3)
|
||||
ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
|
||||
ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
|
||||
ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
|
||||
ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7)
|
||||
ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7)
|
||||
#endif
|
||||
#if defined(HAS_RAWTORGB24ROW_SSSE3)
|
||||
ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7)
|
||||
#endif
|
||||
#if defined(HAS_RGB565TOARGBROW_AVX2)
|
||||
ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15)
|
||||
#endif
|
||||
@ -287,10 +363,6 @@ ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
|
||||
#if defined(HAS_ARGB4444TOARGBROW_AVX2)
|
||||
ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
|
||||
#endif
|
||||
#if defined(HAS_YUY2TOARGBROW_AVX2)
|
||||
ANY11(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
|
||||
ANY11(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORGB24ROW_NEON)
|
||||
ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7)
|
||||
ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
|
||||
@ -299,8 +371,9 @@ ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
|
||||
ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
|
||||
ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
|
||||
ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
|
||||
ANY11(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
|
||||
ANY11(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
|
||||
#endif
|
||||
#if defined(HAS_RAWTORGB24ROW_NEON)
|
||||
ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOYROW_AVX2
|
||||
ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
|
||||
@ -381,9 +454,6 @@ ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
|
||||
#ifdef HAS_ARGBATTENUATEROW_SSSE3
|
||||
ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3)
|
||||
#endif
|
||||
#ifdef HAS_ARGBATTENUATEROW_SSE2
|
||||
ANY11(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, 0, 4, 4, 3)
|
||||
#endif
|
||||
#ifdef HAS_ARGBUNATTENUATEROW_SSE2
|
||||
ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3)
|
||||
#endif
|
||||
@ -396,8 +466,44 @@ ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7)
|
||||
#ifdef HAS_ARGBATTENUATEROW_NEON
|
||||
ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
|
||||
ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
|
||||
#endif
|
||||
#ifdef HAS_ARGBEXTRACTALPHAROW_NEON
|
||||
ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15)
|
||||
#endif
|
||||
#undef ANY11
|
||||
|
||||
// Any 1 to 1 blended. Destination is read, modify, write.
|
||||
#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[128 * 2]); \
|
||||
memset(temp, 0, 128 * 2); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
||||
memcpy(temp + 128, dst_ptr + n * BPP, r * BPP); \
|
||||
ANY_SIMD(temp, temp + 128, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_ARGBCOPYALPHAROW_AVX2
|
||||
ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGBCOPYALPHAROW_SSE2
|
||||
ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
|
||||
ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
|
||||
ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
|
||||
#endif
|
||||
#undef ANY11B
|
||||
|
||||
// Any 1 to 1 with parameter.
|
||||
#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
|
||||
@ -440,6 +546,35 @@ ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8*, 4, 4, 3)
|
||||
#endif
|
||||
#undef ANY11P
|
||||
|
||||
// Any 1 to 1 with yuvconstants
|
||||
#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
|
||||
const struct YuvConstants* yuvconstants, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[128 * 2]); \
|
||||
memset(temp, 0, 128); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
||||
ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
#if defined(HAS_YUY2TOARGBROW_SSSE3)
|
||||
ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
|
||||
ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
|
||||
#endif
|
||||
#if defined(HAS_YUY2TOARGBROW_AVX2)
|
||||
ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
|
||||
ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
|
||||
#endif
|
||||
#if defined(HAS_YUY2TOARGBROW_NEON)
|
||||
ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
|
||||
ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
|
||||
#endif
|
||||
#undef ANY11C
|
||||
|
||||
// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
|
||||
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
|
||||
void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \
|
||||
@ -464,14 +599,11 @@ ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
|
||||
#ifdef HAS_INTERPOLATEROW_SSSE3
|
||||
ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_INTERPOLATEROW_SSE2
|
||||
ANY11T(InterpolateRow_Any_SSE2, InterpolateRow_SSE2, 1, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_INTERPOLATEROW_NEON
|
||||
ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_INTERPOLATEROW_MIPS_DSPR2
|
||||
ANY11T(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2, 1, 1, 3)
|
||||
#ifdef HAS_INTERPOLATEROW_DSPR2
|
||||
ANY11T(InterpolateRow_Any_DSPR2, InterpolateRow_DSPR2, 1, 1, 3)
|
||||
#endif
|
||||
#undef ANY11T
|
||||
|
||||
@ -496,9 +628,6 @@ ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
|
||||
#ifdef HAS_MIRRORROW_SSSE3
|
||||
ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_MIRRORROW_SSE2
|
||||
ANY11M(MirrorRow_Any_SSE2, MirrorRow_SSE2, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_MIRRORROW_NEON
|
||||
ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
|
||||
#endif
|
||||
@ -548,9 +677,25 @@ ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3)
|
||||
ANY_SIMD(src_ptr, dst_u, dst_v, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||
if ((width & 1) && BPP == 4) { /* repeat last 4 bytes for subsampler */ \
|
||||
/* repeat last 4 bytes for 422 subsampler */ \
|
||||
if ((width & 1) && BPP == 4 && DUVSHIFT == 1) { \
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP, \
|
||||
temp + SS(r, UVSHIFT) * BPP - BPP, 4); \
|
||||
temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||
} \
|
||||
/* repeat last 4 - 12 bytes for 411 subsampler */ \
|
||||
if (((width & 3) == 1) && BPP == 4 && DUVSHIFT == 2) { \
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP, \
|
||||
temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP + BPP, \
|
||||
temp + SS(r, UVSHIFT) * BPP - BPP, BPP * 2); \
|
||||
} \
|
||||
if (((width & 3) == 2) && BPP == 4 && DUVSHIFT == 2) { \
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP, \
|
||||
temp + SS(r, UVSHIFT) * BPP - BPP * 2, BPP * 2); \
|
||||
} \
|
||||
if (((width & 3) == 3) && BPP == 4 && DUVSHIFT == 2) { \
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP, \
|
||||
temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||
} \
|
||||
ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
|
||||
memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \
|
||||
@ -566,8 +711,8 @@ ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
|
||||
#ifdef HAS_SPLITUVROW_NEON
|
||||
ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15)
|
||||
#endif
|
||||
#ifdef HAS_SPLITUVROW_MIPS_DSPR2
|
||||
ANY12(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_MIPS_DSPR2, 0, 2, 0, 15)
|
||||
#ifdef HAS_SPLITUVROW_DSPR2
|
||||
ANY12(SplitUVRow_Any_DSPR2, SplitUVRow_DSPR2, 0, 2, 0, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUV444ROW_SSSE3
|
||||
ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15)
|
||||
@ -576,16 +721,12 @@ ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15)
|
||||
ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31)
|
||||
ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUV422ROW_SSSE3
|
||||
ANY12(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_SSSE3, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_YUY2TOUV422ROW_SSE2
|
||||
ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15)
|
||||
ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_YUY2TOUV422ROW_NEON
|
||||
ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7)
|
||||
ANY12(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON, 0, 4, 1, 15)
|
||||
ANY12(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 0, 4, 2, 31)
|
||||
ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15)
|
||||
ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
|
||||
@ -607,11 +748,11 @@ ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||
memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
|
||||
SS(r, UVSHIFT) * BPP); \
|
||||
if ((width & 1) && BPP == 4) { /* repeat last 4 bytes for subsampler */ \
|
||||
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */\
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP, \
|
||||
temp + SS(r, UVSHIFT) * BPP - BPP, 4); \
|
||||
temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||
memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
|
||||
temp + 128 + SS(r, UVSHIFT) * BPP - BPP, 4); \
|
||||
temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||
} \
|
||||
ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \
|
||||
memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \
|
||||
@ -621,6 +762,9 @@ ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
|
||||
#ifdef HAS_ARGBTOUVROW_AVX2
|
||||
ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUVJROW_AVX2
|
||||
ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUVROW_SSSE3
|
||||
ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
|
||||
ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
|
||||
|
1245
third_party/libyuv/source/row_common.cc
vendored
1245
third_party/libyuv/source/row_common.cc
vendored
File diff suppressed because it is too large
Load Diff
2149
third_party/libyuv/source/row_gcc.cc
vendored
2149
third_party/libyuv/source/row_gcc.cc
vendored
File diff suppressed because it is too large
Load Diff
171
third_party/libyuv/source/row_mips.cc
vendored
171
third_party/libyuv/source/row_mips.cc
vendored
@ -375,13 +375,13 @@ void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
|
||||
}
|
||||
#endif // HAS_COPYROW_MIPS
|
||||
|
||||
// MIPS DSPR2 functions
|
||||
// DSPR2 functions
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
|
||||
(__mips_dsp_rev >= 2) && \
|
||||
(_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
|
||||
|
||||
void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
int width) {
|
||||
void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
@ -389,7 +389,6 @@ void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
"blez $t4, 2f \n"
|
||||
" andi %[width], %[width], 0xf \n" // residual
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"addiu $t4, $t4, -1 \n"
|
||||
"lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
|
||||
@ -447,7 +446,7 @@ void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
);
|
||||
}
|
||||
|
||||
void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
|
||||
void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
@ -457,7 +456,6 @@ void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
|
||||
"blez $t4, 2f \n"
|
||||
" addu %[src], %[src], %[width] \n" // src += width
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"lw $t0, -16(%[src]) \n" // |3|2|1|0|
|
||||
"lw $t1, -12(%[src]) \n" // |7|6|5|4|
|
||||
@ -498,10 +496,10 @@ void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
|
||||
);
|
||||
}
|
||||
|
||||
void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
int width) {
|
||||
int x = 0;
|
||||
int y = 0;
|
||||
void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
int width) {
|
||||
int x;
|
||||
int y;
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
@ -512,7 +510,6 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
"blez %[x], 2f \n"
|
||||
" addu %[src_uv], %[src_uv], $t4 \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
|
||||
"lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
|
||||
@ -582,7 +579,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
[dst_u] "+r" (dst_u),
|
||||
[dst_v] "+r" (dst_v),
|
||||
[x] "=&r" (x),
|
||||
[y] "+r" (y)
|
||||
[y] "=&r" (y)
|
||||
: [width] "r" (width)
|
||||
: "t0", "t1", "t2", "t3", "t4",
|
||||
"t5", "t7", "t8", "t9"
|
||||
@ -596,7 +593,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
// t8 = | 0 | G1 | 0 | g1 |
|
||||
// t2 = | 0 | R0 | 0 | r0 |
|
||||
// t1 = | 0 | R1 | 0 | r1 |
|
||||
#define I422ToTransientMipsRGB \
|
||||
#define YUVTORGB \
|
||||
"lw $t0, 0(%[y_buf]) \n" \
|
||||
"lhu $t1, 0(%[u_buf]) \n" \
|
||||
"lhu $t2, 0(%[v_buf]) \n" \
|
||||
@ -655,11 +652,13 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
"addu.ph $t2, $t2, $s5 \n" \
|
||||
"addu.ph $t1, $t1, $s5 \n"
|
||||
|
||||
void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
// TODO(fbarchard): accept yuv conversion constants.
|
||||
void I422ToARGBRow_DSPR2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
@ -673,9 +672,8 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
|
||||
"lui $s6, 0xff00 \n"
|
||||
"ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
I422ToTransientMipsRGB
|
||||
YUVTORGB
|
||||
// Arranging into argb format
|
||||
"precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
|
||||
"precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
|
||||
@ -717,136 +715,10 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
|
||||
);
|
||||
}
|
||||
|
||||
void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"beqz %[width], 2f \n"
|
||||
" repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
|
||||
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
|
||||
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
|
||||
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
|
||||
"repl.ph $s4, 16 \n" // |0|16|0|16|
|
||||
"repl.ph $s5, 128 \n" // |128|128|
|
||||
"lui $s6, 0xff00 \n"
|
||||
"ori $s6, 0xff00 \n" // |ff|00|ff|00|
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
I422ToTransientMipsRGB
|
||||
// Arranging into abgr format
|
||||
"precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1|
|
||||
"precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0|
|
||||
"precrq.qb.ph $t8, $t0, $t3 \n" // |G1|R1|G0|R0|
|
||||
"precr.qb.ph $t9, $t0, $t3 \n" // |g1|r1|g0|r0|
|
||||
|
||||
"precr.qb.ph $t2, $t4, $t5 \n" // |B1|b1|B0|b0|
|
||||
"addiu %[width], -4 \n"
|
||||
"addiu %[y_buf], 4 \n"
|
||||
"preceu.ph.qbla $t1, $t2 \n" // |0 |B1|0 |B0|
|
||||
"preceu.ph.qbra $t2, $t2 \n" // |0 |b1|0 |b0|
|
||||
"or $t1, $t1, $s6 \n" // |ff|B1|ff|B0|
|
||||
"or $t2, $t2, $s6 \n" // |ff|b1|ff|b0|
|
||||
"precrq.ph.w $t0, $t2, $t9 \n" // |ff|b1|g1|r1|
|
||||
"precrq.ph.w $t3, $t1, $t8 \n" // |ff|B1|G1|R1|
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"sll $t8, $t8, 16 \n"
|
||||
"packrl.ph $t2, $t2, $t9 \n" // |ff|b0|g0|r0|
|
||||
"packrl.ph $t1, $t1, $t8 \n" // |ff|B0|G0|R0|
|
||||
// Store results.
|
||||
"sw $t2, 0(%[rgb_buf]) \n"
|
||||
"sw $t0, 4(%[rgb_buf]) \n"
|
||||
"sw $t1, 8(%[rgb_buf]) \n"
|
||||
"sw $t3, 12(%[rgb_buf]) \n"
|
||||
"bnez %[width], 1b \n"
|
||||
" addiu %[rgb_buf], 16 \n"
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
:[y_buf] "+r" (y_buf),
|
||||
[u_buf] "+r" (u_buf),
|
||||
[v_buf] "+r" (v_buf),
|
||||
[width] "+r" (width),
|
||||
[rgb_buf] "+r" (rgb_buf)
|
||||
:
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9",
|
||||
"s0", "s1", "s2", "s3",
|
||||
"s4", "s5", "s6"
|
||||
);
|
||||
}
|
||||
|
||||
void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"beqz %[width], 2f \n"
|
||||
" repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 |
|
||||
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
|
||||
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
|
||||
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
|
||||
"repl.ph $s4, 16 \n" // |0|16|0|16|
|
||||
"repl.ph $s5, 128 \n" // |128|128|
|
||||
"lui $s6, 0xff \n"
|
||||
"ori $s6, 0xff \n" // |00|ff|00|ff|
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
I422ToTransientMipsRGB
|
||||
// Arranging into bgra format
|
||||
"precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
|
||||
"precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
|
||||
"precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0|
|
||||
"precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0|
|
||||
|
||||
"precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
|
||||
"addiu %[width], -4 \n"
|
||||
"addiu %[y_buf], 4 \n"
|
||||
"preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
|
||||
"preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
|
||||
"sll $t1, $t1, 8 \n" // |R1|0 |R0|0 |
|
||||
"sll $t2, $t2, 8 \n" // |r1|0 |r0|0 |
|
||||
"or $t1, $t1, $s6 \n" // |R1|ff|R0|ff|
|
||||
"or $t2, $t2, $s6 \n" // |r1|ff|r0|ff|
|
||||
"precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff|
|
||||
"precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff|
|
||||
"sll $t1, $t1, 16 \n"
|
||||
"sll $t2, $t2, 16 \n"
|
||||
"packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff|
|
||||
"packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff|
|
||||
// Store results.
|
||||
"sw $t2, 0(%[rgb_buf]) \n"
|
||||
"sw $t0, 4(%[rgb_buf]) \n"
|
||||
"sw $t1, 8(%[rgb_buf]) \n"
|
||||
"sw $t3, 12(%[rgb_buf]) \n"
|
||||
"bnez %[width], 1b \n"
|
||||
" addiu %[rgb_buf], 16 \n"
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
:[y_buf] "+r" (y_buf),
|
||||
[u_buf] "+r" (u_buf),
|
||||
[v_buf] "+r" (v_buf),
|
||||
[width] "+r" (width),
|
||||
[rgb_buf] "+r" (rgb_buf)
|
||||
:
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9",
|
||||
"s0", "s1", "s2", "s3",
|
||||
"s4", "s5", "s6"
|
||||
);
|
||||
}
|
||||
|
||||
// Bilinear filter 8x2 -> 8x1
|
||||
void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) {
|
||||
void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) {
|
||||
int y0_fraction = 256 - source_y_fraction;
|
||||
const uint8* src_ptr1 = src_ptr + src_stride;
|
||||
|
||||
@ -857,7 +729,6 @@ void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"replv.ph $t0, %[y0_fraction] \n"
|
||||
"replv.ph $t1, %[source_y_fraction] \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"lw $t2, 0(%[src_ptr]) \n"
|
||||
"lw $t3, 0(%[src_ptr1]) \n"
|
||||
|
793
third_party/libyuv/source/row_neon.cc
vendored
793
third_party/libyuv/source/row_neon.cc
vendored
File diff suppressed because it is too large
Load Diff
862
third_party/libyuv/source/row_neon64.cc
vendored
862
third_party/libyuv/source/row_neon64.cc
vendored
File diff suppressed because it is too large
Load Diff
2190
third_party/libyuv/source/row_win.cc
vendored
2190
third_party/libyuv/source/row_win.cc
vendored
File diff suppressed because it is too large
Load Diff
153
third_party/libyuv/source/scale.cc
vendored
153
third_party/libyuv/source/scale.cc
vendored
@ -61,15 +61,15 @@ static void ScalePlaneDown2(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEROWDOWN2_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSE2 :
|
||||
(filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSE2 :
|
||||
ScaleRowDown2Box_Any_SSE2);
|
||||
#if defined(HAS_SCALEROWDOWN2_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSSE3 :
|
||||
(filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3 :
|
||||
ScaleRowDown2Box_Any_SSSE3);
|
||||
if (IS_ALIGNED(dst_width, 16)) {
|
||||
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
|
||||
(filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
|
||||
ScaleRowDown2Box_SSE2);
|
||||
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSSE3 :
|
||||
(filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3 :
|
||||
ScaleRowDown2Box_SSSE3);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -85,12 +85,12 @@ static void ScalePlaneDown2(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
|
||||
#if defined(HAS_SCALEROWDOWN2_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) &&
|
||||
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
|
||||
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
ScaleRowDown2 = filtering ?
|
||||
ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;
|
||||
ScaleRowDown2Box_DSPR2 : ScaleRowDown2_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -135,12 +135,12 @@ static void ScalePlaneDown2_16(int src_width, int src_height,
|
||||
ScaleRowDown2Box_16_SSE2);
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
|
||||
#if defined(HAS_SCALEROWDOWN2_16_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) &&
|
||||
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
|
||||
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
ScaleRowDown2 = filtering ?
|
||||
ScaleRowDown2Box_16_MIPS_DSPR2 : ScaleRowDown2_16_MIPS_DSPR2;
|
||||
ScaleRowDown2Box_16_DSPR2 : ScaleRowDown2_16_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -182,12 +182,12 @@ static void ScalePlaneDown4(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEROWDOWN4_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
#if defined(HAS_SCALEROWDOWN4_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ScaleRowDown4 = filtering ?
|
||||
ScaleRowDown4Box_Any_SSE2 : ScaleRowDown4_Any_SSE2;
|
||||
ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
|
||||
if (IS_ALIGNED(dst_width, 8)) {
|
||||
ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
|
||||
ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -200,12 +200,12 @@ static void ScalePlaneDown4(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
|
||||
#if defined(HAS_SCALEROWDOWN4_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) &&
|
||||
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
|
||||
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
ScaleRowDown4 = filtering ?
|
||||
ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
|
||||
ScaleRowDown4Box_DSPR2 : ScaleRowDown4_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -245,12 +245,12 @@ static void ScalePlaneDown4_16(int src_width, int src_height,
|
||||
ScaleRowDown4_16_SSE2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
|
||||
#if defined(HAS_SCALEROWDOWN4_16_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) &&
|
||||
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
|
||||
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
ScaleRowDown4 = filtering ?
|
||||
ScaleRowDown4Box_16_MIPS_DSPR2 : ScaleRowDown4_16_MIPS_DSPR2;
|
||||
ScaleRowDown4Box_16_DSPR2 : ScaleRowDown4_16_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -325,16 +325,16 @@ static void ScalePlaneDown34(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
|
||||
#if defined(HAS_SCALEROWDOWN34_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) &&
|
||||
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
|
||||
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
if (!filtering) {
|
||||
ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
|
||||
ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
|
||||
ScaleRowDown34_0 = ScaleRowDown34_DSPR2;
|
||||
ScaleRowDown34_1 = ScaleRowDown34_DSPR2;
|
||||
} else {
|
||||
ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2;
|
||||
ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2;
|
||||
ScaleRowDown34_0 = ScaleRowDown34_0_Box_DSPR2;
|
||||
ScaleRowDown34_1 = ScaleRowDown34_1_Box_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -404,16 +404,16 @@ static void ScalePlaneDown34_16(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEROWDOWN34_16_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
|
||||
#if defined(HAS_SCALEROWDOWN34_16_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) &&
|
||||
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
|
||||
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
if (!filtering) {
|
||||
ScaleRowDown34_0 = ScaleRowDown34_16_MIPS_DSPR2;
|
||||
ScaleRowDown34_1 = ScaleRowDown34_16_MIPS_DSPR2;
|
||||
ScaleRowDown34_0 = ScaleRowDown34_16_DSPR2;
|
||||
ScaleRowDown34_1 = ScaleRowDown34_16_DSPR2;
|
||||
} else {
|
||||
ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_MIPS_DSPR2;
|
||||
ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_MIPS_DSPR2;
|
||||
ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_DSPR2;
|
||||
ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -517,16 +517,16 @@ static void ScalePlaneDown38(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
|
||||
#if defined(HAS_SCALEROWDOWN38_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) &&
|
||||
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
|
||||
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
if (!filtering) {
|
||||
ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;
|
||||
ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;
|
||||
ScaleRowDown38_3 = ScaleRowDown38_DSPR2;
|
||||
ScaleRowDown38_2 = ScaleRowDown38_DSPR2;
|
||||
} else {
|
||||
ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2;
|
||||
ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2;
|
||||
ScaleRowDown38_3 = ScaleRowDown38_3_Box_DSPR2;
|
||||
ScaleRowDown38_2 = ScaleRowDown38_2_Box_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -595,16 +595,16 @@ static void ScalePlaneDown38_16(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
|
||||
#if defined(HAS_SCALEROWDOWN38_16_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) &&
|
||||
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
|
||||
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
if (!filtering) {
|
||||
ScaleRowDown38_3 = ScaleRowDown38_16_MIPS_DSPR2;
|
||||
ScaleRowDown38_2 = ScaleRowDown38_16_MIPS_DSPR2;
|
||||
ScaleRowDown38_3 = ScaleRowDown38_16_DSPR2;
|
||||
ScaleRowDown38_2 = ScaleRowDown38_16_DSPR2;
|
||||
} else {
|
||||
ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_MIPS_DSPR2;
|
||||
ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_MIPS_DSPR2;
|
||||
ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_DSPR2;
|
||||
ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -659,7 +659,6 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
|
||||
int i;
|
||||
int scaletbl[2];
|
||||
int minboxwidth = dx >> 16;
|
||||
int* scaleptr = scaletbl - minboxwidth;
|
||||
int boxwidth;
|
||||
scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
|
||||
scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
|
||||
@ -667,7 +666,8 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
|
||||
int ix = x >> 16;
|
||||
x += dx;
|
||||
boxwidth = MIN1((x >> 16) - ix);
|
||||
*dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
|
||||
*dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) *
|
||||
scaletbl[boxwidth - minboxwidth] >> 16;
|
||||
}
|
||||
}
|
||||
|
||||
@ -676,7 +676,6 @@ static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
|
||||
int i;
|
||||
int scaletbl[2];
|
||||
int minboxwidth = dx >> 16;
|
||||
int* scaleptr = scaletbl - minboxwidth;
|
||||
int boxwidth;
|
||||
scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
|
||||
scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
|
||||
@ -684,8 +683,8 @@ static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
|
||||
int ix = x >> 16;
|
||||
x += dx;
|
||||
boxwidth = MIN1((x >> 16) - ix);
|
||||
*dst_ptr++ =
|
||||
SumPixels_16(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
|
||||
*dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
|
||||
scaletbl[boxwidth - minboxwidth] >> 16;
|
||||
}
|
||||
}
|
||||
|
||||
@ -875,14 +874,6 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
|
||||
&x, &y, &dx, &dy);
|
||||
src_width = Abs(src_width);
|
||||
|
||||
#if defined(HAS_INTERPOLATEROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSE2;
|
||||
if (IS_ALIGNED(src_width, 16)) {
|
||||
InterpolateRow = InterpolateRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSSE3;
|
||||
@ -907,11 +898,11 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
|
||||
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
|
||||
#if defined(HAS_INTERPOLATEROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
InterpolateRow = InterpolateRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(src_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_MIPS_DSPR2;
|
||||
InterpolateRow = InterpolateRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -1011,11 +1002,11 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
|
||||
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
|
||||
#if defined(HAS_INTERPOLATEROW_16_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
InterpolateRow = InterpolateRow_Any_16_DSPR2;
|
||||
if (IS_ALIGNED(src_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
|
||||
InterpolateRow = InterpolateRow_16_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -1072,14 +1063,6 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
|
||||
&x, &y, &dx, &dy);
|
||||
src_width = Abs(src_width);
|
||||
|
||||
#if defined(HAS_INTERPOLATEROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSE2;
|
||||
if (IS_ALIGNED(dst_width, 16)) {
|
||||
InterpolateRow = InterpolateRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSSE3;
|
||||
@ -1104,11 +1087,11 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
|
||||
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
|
||||
#if defined(HAS_INTERPOLATEROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
InterpolateRow = InterpolateRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_MIPS_DSPR2;
|
||||
InterpolateRow = InterpolateRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -1243,11 +1226,11 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
|
||||
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
|
||||
#if defined(HAS_INTERPOLATEROW_16_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
InterpolateRow = InterpolateRow_Any_16_DSPR2;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
|
||||
InterpolateRow = InterpolateRow_16_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
35
third_party/libyuv/source/scale_any.cc
vendored
35
third_party/libyuv/source/scale_any.cc
vendored
@ -55,12 +55,29 @@ CANY(ScaleARGBFilterCols_Any_NEON, ScaleARGBFilterCols_NEON,
|
||||
dst_ptr + n * BPP, r); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEROWDOWN2_SSE2
|
||||
SDANY(ScaleRowDown2_Any_SSE2, ScaleRowDown2_SSE2, ScaleRowDown2_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Linear_Any_SSE2, ScaleRowDown2Linear_SSE2,
|
||||
// Fixed scale down for odd source width. Used by I420Blend subsampling.
|
||||
// Since dst_width is (width + 1) / 2, this function scales one less pixel
|
||||
// and copies the last pixel.
|
||||
#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
|
||||
uint8* dst_ptr, int dst_width) { \
|
||||
int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); \
|
||||
int n = dst_width - r; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
|
||||
dst_ptr + n * BPP, r); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEROWDOWN2_SSSE3
|
||||
SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Linear_Any_SSSE3, ScaleRowDown2Linear_SSSE3,
|
||||
ScaleRowDown2Linear_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Box_Any_SSE2, ScaleRowDown2Box_SSE2, ScaleRowDown2Box_C,
|
||||
SDANY(ScaleRowDown2Box_Any_SSSE3, ScaleRowDown2Box_SSSE3, ScaleRowDown2Box_C,
|
||||
2, 1, 15)
|
||||
SDODD(ScaleRowDown2Box_Odd_SSSE3, ScaleRowDown2Box_SSSE3,
|
||||
ScaleRowDown2Box_Odd_C, 2, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN2_AVX2
|
||||
SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
|
||||
@ -68,6 +85,8 @@ SDANY(ScaleRowDown2Linear_Any_AVX2, ScaleRowDown2Linear_AVX2,
|
||||
ScaleRowDown2Linear_C, 2, 1, 31)
|
||||
SDANY(ScaleRowDown2Box_Any_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_C,
|
||||
2, 1, 31)
|
||||
SDODD(ScaleRowDown2Box_Odd_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_Odd_C,
|
||||
2, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN2_NEON
|
||||
SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
|
||||
@ -75,10 +94,12 @@ SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON,
|
||||
ScaleRowDown2Linear_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON,
|
||||
ScaleRowDown2Box_C, 2, 1, 15)
|
||||
SDODD(ScaleRowDown2Box_Odd_NEON, ScaleRowDown2Box_NEON,
|
||||
ScaleRowDown2Box_Odd_C, 2, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN4_SSE2
|
||||
SDANY(ScaleRowDown4_Any_SSE2, ScaleRowDown4_SSE2, ScaleRowDown4_C, 4, 1, 7)
|
||||
SDANY(ScaleRowDown4Box_Any_SSE2, ScaleRowDown4Box_SSE2, ScaleRowDown4Box_C,
|
||||
#ifdef HAS_SCALEROWDOWN4_SSSE3
|
||||
SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
|
||||
SDANY(ScaleRowDown4Box_Any_SSSE3, ScaleRowDown4Box_SSSE3, ScaleRowDown4Box_C,
|
||||
4, 1, 7)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN4_AVX2
|
||||
|
80
third_party/libyuv/source/scale_argb.cc
vendored
80
third_party/libyuv/source/scale_argb.cc
vendored
@ -210,14 +210,6 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
|
||||
clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
|
||||
src_argb += xl * 4;
|
||||
x -= (int)(xl << 16);
|
||||
#if defined(HAS_INTERPOLATEROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSE2;
|
||||
if (IS_ALIGNED(clip_src_width, 16)) {
|
||||
InterpolateRow = InterpolateRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSSE3;
|
||||
@ -242,12 +234,12 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
|
||||
#if defined(HAS_INTERPOLATEROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
|
||||
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
|
||||
InterpolateRow = InterpolateRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(clip_src_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_MIPS_DSPR2;
|
||||
InterpolateRow = InterpolateRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -308,14 +300,6 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
int dst_width, int x, int dx) =
|
||||
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
|
||||
const int max_y = (src_height - 1) << 16;
|
||||
#if defined(HAS_INTERPOLATEROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSE2;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSSE3;
|
||||
@ -340,10 +324,10 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
|
||||
#if defined(HAS_INTERPOLATEROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
InterpolateRow = InterpolateRow_MIPS_DSPR2;
|
||||
InterpolateRow = InterpolateRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
if (src_width >= 32768) {
|
||||
@ -481,27 +465,19 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
|
||||
#if defined(HAS_I422TOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
I422ToARGBRow = I422ToARGBRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
|
||||
InterpolateRow_C;
|
||||
#if defined(HAS_INTERPOLATEROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSE2;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSSE3;
|
||||
@ -526,10 +502,10 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
|
||||
#if defined(HAS_INTERPOLATEROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
InterpolateRow = InterpolateRow_MIPS_DSPR2;
|
||||
InterpolateRow = InterpolateRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -847,6 +823,36 @@ int ARGBScale(const uint8* src_argb, int src_stride_argb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Scale with YUV conversion to ARGB and clipping.
|
||||
LIBYUV_API
|
||||
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint32 src_fourcc,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
uint32 dst_fourcc,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
enum FilterMode filtering) {
|
||||
uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4);
|
||||
int r;
|
||||
I420ToARGB(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
argb_buffer, src_width * 4,
|
||||
src_width, src_height);
|
||||
|
||||
r = ARGBScaleClip(argb_buffer, src_width * 4,
|
||||
src_width, src_height,
|
||||
dst_argb, dst_stride_argb,
|
||||
dst_width, dst_height,
|
||||
clip_x, clip_y, clip_width, clip_height,
|
||||
filtering);
|
||||
free(argb_buffer);
|
||||
return r;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
58
third_party/libyuv/source/scale_common.cc
vendored
58
third_party/libyuv/source/scale_common.cc
vendored
@ -103,6 +103,28 @@ void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
const uint8* s = src_ptr;
|
||||
const uint8* t = src_ptr + src_stride;
|
||||
int x;
|
||||
dst_width -= 1;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
|
||||
dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
|
||||
dst += 2;
|
||||
s += 4;
|
||||
t += 4;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
|
||||
dst += 1;
|
||||
s += 2;
|
||||
t += 2;
|
||||
}
|
||||
dst[0] = (s[0] + t[0] + 1) >> 1;
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width) {
|
||||
const uint16* s = src_ptr;
|
||||
@ -395,8 +417,14 @@ void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
}
|
||||
|
||||
// (1-f)a + fb can be replaced with a + f(b-a)
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
#define BLENDER(a, b, f) (uint8)((int)(a) + \
|
||||
((int)(f) * ((int)(b) - (int)(a)) >> 16))
|
||||
((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
|
||||
#else
|
||||
// inteluses 7 bit math with rounding.
|
||||
#define BLENDER(a, b, f) (uint8)((int)(a) + \
|
||||
(((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
|
||||
#endif
|
||||
|
||||
void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
@ -448,8 +476,9 @@ void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
}
|
||||
#undef BLENDER
|
||||
|
||||
// Same as 8 bit arm blender but return is cast to uint16
|
||||
#define BLENDER(a, b, f) (uint16)((int)(a) + \
|
||||
((int)(f) * ((int)(b) - (int)(a)) >> 16))
|
||||
((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
|
||||
|
||||
void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
@ -787,6 +816,7 @@ void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
|
||||
// Mimics SSSE3 blender
|
||||
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
|
||||
#define BLENDERC(a, b, f, s) (uint32)( \
|
||||
@ -876,14 +906,6 @@ void ScalePlaneVertical(int src_height,
|
||||
assert(dst_width > 0);
|
||||
assert(dst_height > 0);
|
||||
src_argb += (x >> 16) * bpp;
|
||||
#if defined(HAS_INTERPOLATEROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSE2;
|
||||
if (IS_ALIGNED(dst_width_bytes, 16)) {
|
||||
InterpolateRow = InterpolateRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSSE3;
|
||||
@ -908,13 +930,13 @@ void ScalePlaneVertical(int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
|
||||
#if defined(HAS_INTERPOLATEROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
|
||||
InterpolateRow = InterpolateRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(dst_width_bytes, 4)) {
|
||||
InterpolateRow = InterpolateRow_MIPS_DSPR2;
|
||||
InterpolateRow = InterpolateRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -982,13 +1004,13 @@ void ScalePlaneVertical_16(int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
|
||||
#if defined(HAS_INTERPOLATEROW_16_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
|
||||
InterpolateRow = InterpolateRow_Any_16_DSPR2;
|
||||
if (IS_ALIGNED(dst_width_bytes, 4)) {
|
||||
InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
|
||||
InterpolateRow = InterpolateRow_16_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
493
third_party/libyuv/source/scale_gcc.cc
vendored
493
third_party/libyuv/source/scale_gcc.cc
vendored
@ -9,6 +9,7 @@
|
||||
*/
|
||||
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/scale_row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
@ -16,7 +17,8 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for GCC x86 and x64.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||
|
||||
// Offsets for source bytes 0 to 9
|
||||
static uvec8 kShuf0 =
|
||||
@ -96,8 +98,8 @@ static uvec16 kScaleAb2 =
|
||||
// Generated using gcc disassembly on Visual C object file:
|
||||
// objdump -D yuvscaler.obj >yuvscaler.txt
|
||||
|
||||
void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -118,26 +120,24 @@ void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"psrlw $0x8,%%xmm5 \n"
|
||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||
"psrlw $0xf,%%xmm4 \n"
|
||||
"packuswb %%xmm4,%%xmm4 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"movdqu " MEMACCESS2(0x10, 0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"pand %%xmm5,%%xmm2 \n"
|
||||
"pand %%xmm5,%%xmm3 \n"
|
||||
"pavgw %%xmm2,%%xmm0 \n"
|
||||
"pavgw %%xmm3,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm1 \n"
|
||||
"pavgw %%xmm5,%%xmm0 \n"
|
||||
"pavgw %%xmm5,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
@ -145,15 +145,17 @@ void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width) // %2
|
||||
:: "memory", "cc", "xmm0", "xmm1", "xmm5"
|
||||
:: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"psrlw $0x8,%%xmm5 \n"
|
||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||
"psrlw $0xf,%%xmm4 \n"
|
||||
"packuswb %%xmm4,%%xmm4 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -162,17 +164,17 @@ void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
|
||||
MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"pavgb %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"pand %%xmm5,%%xmm2 \n"
|
||||
"pand %%xmm5,%%xmm3 \n"
|
||||
"pavgw %%xmm2,%%xmm0 \n"
|
||||
"pavgw %%xmm3,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm3 \n"
|
||||
"paddw %%xmm2,%%xmm0 \n"
|
||||
"paddw %%xmm3,%%xmm1 \n"
|
||||
"psrlw $0x1,%%xmm0 \n"
|
||||
"psrlw $0x1,%%xmm1 \n"
|
||||
"pavgw %%xmm5,%%xmm0 \n"
|
||||
"pavgw %%xmm5,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
@ -186,7 +188,105 @@ void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
#ifdef HAS_SCALEROWDOWN2_AVX2
|
||||
void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||
"vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
|
||||
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||
"vpsrlw $0x8,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $0x8,%%ymm1,%%ymm1 \n"
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width) // %2
|
||||
:: "memory", "cc", "xmm0", "xmm1"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
|
||||
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||
"vmovdqu " MEMACCESS2(0x20, 0) ",%%ymm1 \n"
|
||||
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
|
||||
"vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
|
||||
"vpavgw %%ymm5,%%ymm1,%%ymm1 \n"
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width) // %2
|
||||
:: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
|
||||
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||
"vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
|
||||
MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2
|
||||
MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3
|
||||
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
|
||||
"vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||
"vpsrlw $0x1,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $0x1,%%ymm1,%%ymm1 \n"
|
||||
"vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
|
||||
"vpavgw %%ymm5,%%ymm1,%%ymm1 \n"
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width) // %2
|
||||
: "r"((intptr_t)(src_stride)) // %3
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_SCALEROWDOWN2_AVX2
|
||||
|
||||
void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
@ -214,12 +314,15 @@ void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
intptr_t stridex3 = 0;
|
||||
intptr_t stridex3;
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm7,%%xmm7 \n"
|
||||
"psrlw $0x8,%%xmm7 \n"
|
||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||
"psrlw $0xf,%%xmm4 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"packuswb %%xmm4,%%xmm4 \n"
|
||||
"psllw $0x3,%%xmm5 \n"
|
||||
"lea " MEMLEA4(0x00,4,4,2) ",%3 \n"
|
||||
|
||||
LABELALIGN
|
||||
@ -228,31 +331,29 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
|
||||
MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
|
||||
MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"pavgb %%xmm3,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm3 \n"
|
||||
"paddw %%xmm2,%%xmm0 \n"
|
||||
"paddw %%xmm3,%%xmm1 \n"
|
||||
MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2
|
||||
MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3
|
||||
MEMOPREG(movdqu,0x00,0,3,1,xmm4) // movdqu (%0,%3,1),%%xmm4
|
||||
MEMOPREG(movdqu,0x10,0,3,1,xmm5) // movdqu 0x10(%0,%3,1),%%xmm5
|
||||
"pmaddubsw %%xmm4,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm3 \n"
|
||||
"paddw %%xmm2,%%xmm0 \n"
|
||||
"paddw %%xmm3,%%xmm1 \n"
|
||||
MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
|
||||
MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"pavgb %%xmm4,%%xmm2 \n"
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"pavgb %%xmm5,%%xmm3 \n"
|
||||
"pavgb %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"pand %%xmm7,%%xmm2 \n"
|
||||
"pand %%xmm7,%%xmm3 \n"
|
||||
"pavgw %%xmm2,%%xmm0 \n"
|
||||
"pavgw %%xmm3,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"pand %%xmm7,%%xmm2 \n"
|
||||
"pavgw %%xmm2,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm3 \n"
|
||||
"paddw %%xmm2,%%xmm0 \n"
|
||||
"paddw %%xmm3,%%xmm1 \n"
|
||||
"phaddw %%xmm1,%%xmm0 \n"
|
||||
"paddw %%xmm5,%%xmm0 \n"
|
||||
"psrlw $0x4,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movq %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x8,1) ",%1 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
@ -260,13 +361,100 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width), // %2
|
||||
"+r"(stridex3) // %3
|
||||
"=&r"(stridex3) // %3
|
||||
: "r"((intptr_t)(src_stride)) // %4
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm7"
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAS_SCALEROWDOWN4_AVX2
|
||||
void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
"vpsrld $0x18,%%ymm5,%%ymm5 \n"
|
||||
"vpslld $0x10,%%ymm5,%%ymm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||
"vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
|
||||
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||
"vpand %%ymm5,%%ymm0,%%ymm0 \n"
|
||||
"vpand %%ymm5,%%ymm1,%%ymm1 \n"
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $0x8,%%ymm0,%%ymm0 \n"
|
||||
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width) // %2
|
||||
:: "memory", "cc", "xmm0", "xmm1", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
|
||||
"vpsllw $0x3,%%ymm4,%%ymm5 \n"
|
||||
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||
"vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
|
||||
MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2
|
||||
MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3
|
||||
"vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
|
||||
"vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||
MEMOPREG(vmovdqu,0x00,0,3,2,ymm2) // vmovdqu (%0,%3,2),%%ymm2
|
||||
MEMOPREG(vmovdqu,0x20,0,3,2,ymm3) // vmovdqu 0x20(%0,%3,2),%%ymm3
|
||||
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
|
||||
"vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||
MEMOPREG(vmovdqu,0x00,0,4,1,ymm2) // vmovdqu (%0,%4,1),%%ymm2
|
||||
MEMOPREG(vmovdqu,0x20,0,4,1,ymm3) // vmovdqu 0x20(%0,%4,1),%%ymm3
|
||||
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
|
||||
"vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vpaddw %%ymm5,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $0x4,%%ymm0,%%ymm0 \n"
|
||||
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width) // %2
|
||||
: "r"((intptr_t)(src_stride)), // %3
|
||||
"r"((intptr_t)(src_stride * 3)) // %4
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_SCALEROWDOWN4_AVX2
|
||||
|
||||
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
@ -574,61 +762,89 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
|
||||
}
|
||||
|
||||
// Reads 16xN bytes and produces 16 shorts at a time.
|
||||
void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst_ptr, int src_width, int src_height) {
|
||||
int tmp_height = 0;
|
||||
intptr_t tmp_src = 0;
|
||||
void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
asm volatile (
|
||||
"mov %0,%3 \n" // row pointer
|
||||
"mov %5,%2 \n" // height
|
||||
"pxor %%xmm0,%%xmm0 \n" // clear accumulators
|
||||
"pxor %%xmm1,%%xmm1 \n"
|
||||
"pxor %%xmm4,%%xmm4 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(3) ",%%xmm2 \n"
|
||||
"add %6,%3 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"punpcklbw %%xmm4,%%xmm2 \n"
|
||||
"punpckhbw %%xmm4,%%xmm3 \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm3 \n"
|
||||
"lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16
|
||||
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
||||
"movdqu " MEMACCESS2(0x10,1) ",%%xmm1 \n"
|
||||
"movdqa %%xmm3,%%xmm2 \n"
|
||||
"punpcklbw %%xmm5,%%xmm2 \n"
|
||||
"punpckhbw %%xmm5,%%xmm3 \n"
|
||||
"paddusw %%xmm2,%%xmm0 \n"
|
||||
"paddusw %%xmm3,%%xmm1 \n"
|
||||
"sub $0x1,%2 \n"
|
||||
"jg 1b \n"
|
||||
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16
|
||||
"mov %0,%3 \n" // row pointer
|
||||
"mov %5,%2 \n" // height
|
||||
"pxor %%xmm0,%%xmm0 \n" // clear accumulators
|
||||
"pxor %%xmm1,%%xmm1 \n"
|
||||
"sub $0x10,%4 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(tmp_height), // %2
|
||||
"+r"(tmp_src), // %3
|
||||
"+r"(src_width), // %4
|
||||
"+rm"(src_height) // %5
|
||||
: "rm"((intptr_t)(src_stride)) // %6
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
|
||||
"+r"(src_width) // %2
|
||||
:
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAS_SCALEADDROW_AVX2
|
||||
// Reads 32 bytes and accumulates to 32 shorts at a time.
|
||||
void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
asm volatile (
|
||||
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm3 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n" // src_ptr += 32
|
||||
"vpermq $0xd8,%%ymm3,%%ymm3 \n"
|
||||
"vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
|
||||
"vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
|
||||
"vpaddusw " MEMACCESS(1) ",%%ymm2,%%ymm0 \n"
|
||||
"vpaddusw " MEMACCESS2(0x20,1) ",%%ymm3,%%ymm1 \n"
|
||||
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||
"vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n"
|
||||
"lea " MEMLEA(0x40,1) ",%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(src_width) // %2
|
||||
:
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_SCALEADDROW_AVX2
|
||||
|
||||
// Constant for making pixels signed to avoid pmaddubsw
|
||||
// saturation.
|
||||
static uvec8 kFsub80 =
|
||||
{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
|
||||
|
||||
// Constant for making pixels unsigned and adding .5 for rounding.
|
||||
static uvec16 kFadd40 =
|
||||
{ 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040 };
|
||||
|
||||
// Bilinear column filtering. SSSE3 version.
|
||||
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
intptr_t x0 = 0, x1 = 0, temp_pixel = 0;
|
||||
intptr_t x0, x1, temp_pixel;
|
||||
asm volatile (
|
||||
"movd %6,%%xmm2 \n"
|
||||
"movd %7,%%xmm3 \n"
|
||||
"movl $0x04040000,%k2 \n"
|
||||
"movd %k2,%%xmm5 \n"
|
||||
"pcmpeqb %%xmm6,%%xmm6 \n"
|
||||
"psrlw $0x9,%%xmm6 \n"
|
||||
"psrlw $0x9,%%xmm6 \n" // 0x007f007f
|
||||
"pcmpeqb %%xmm7,%%xmm7 \n"
|
||||
"psrlw $15,%%xmm7 \n" // 0x00010001
|
||||
|
||||
"pextrw $0x1,%%xmm2,%k3 \n"
|
||||
"subl $0x2,%5 \n"
|
||||
"jl 29f \n"
|
||||
@ -650,16 +866,19 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"movd %k2,%%xmm4 \n"
|
||||
"pshufb %%xmm5,%%xmm1 \n"
|
||||
"punpcklwd %%xmm4,%%xmm0 \n"
|
||||
"pxor %%xmm6,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm1,%%xmm0 \n"
|
||||
"psubb %8,%%xmm0 \n" // make pixels signed.
|
||||
"pxor %%xmm6,%%xmm1 \n" // 128 -f = (f ^ 127 ) + 1
|
||||
"paddusb %%xmm7,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm0,%%xmm1 \n"
|
||||
"pextrw $0x1,%%xmm2,%k3 \n"
|
||||
"pextrw $0x3,%%xmm2,%k4 \n"
|
||||
"psrlw $0x7,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movd %%xmm0,%k2 \n"
|
||||
"paddw %9,%%xmm1 \n" // make pixels unsigned.
|
||||
"psrlw $0x7,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm1 \n"
|
||||
"movd %%xmm1,%k2 \n"
|
||||
"mov %w2," MEMACCESS(0) " \n"
|
||||
"lea " MEMLEA(0x2,0) ",%0 \n"
|
||||
"sub $0x2,%5 \n"
|
||||
"subl $0x2,%5 \n"
|
||||
"jge 2b \n"
|
||||
|
||||
LABELALIGN
|
||||
@ -670,23 +889,37 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"movd %k2,%%xmm0 \n"
|
||||
"psrlw $0x9,%%xmm2 \n"
|
||||
"pshufb %%xmm5,%%xmm2 \n"
|
||||
"psubb %8,%%xmm0 \n" // make pixels signed.
|
||||
"pxor %%xmm6,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm2,%%xmm0 \n"
|
||||
"psrlw $0x7,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movd %%xmm0,%k2 \n"
|
||||
"paddusb %%xmm7,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm0,%%xmm2 \n"
|
||||
"paddw %9,%%xmm2 \n" // make pixels unsigned.
|
||||
"psrlw $0x7,%%xmm2 \n"
|
||||
"packuswb %%xmm2,%%xmm2 \n"
|
||||
"movd %%xmm2,%k2 \n"
|
||||
"mov %b2," MEMACCESS(0) " \n"
|
||||
"99: \n"
|
||||
: "+r"(dst_ptr), // %0
|
||||
"+r"(src_ptr), // %1
|
||||
"+a"(temp_pixel), // %2
|
||||
"+r"(x0), // %3
|
||||
"+r"(x1), // %4
|
||||
"+rm"(dst_width) // %5
|
||||
: "rm"(x), // %6
|
||||
"rm"(dx) // %7
|
||||
: "+r"(dst_ptr), // %0
|
||||
"+r"(src_ptr), // %1
|
||||
"=&a"(temp_pixel), // %2
|
||||
"=&r"(x0), // %3
|
||||
"=&r"(x1), // %4
|
||||
#if defined(__x86_64__)
|
||||
"+rm"(dst_width) // %5
|
||||
#else
|
||||
"+m"(dst_width) // %5
|
||||
#endif
|
||||
: "rm"(x), // %6
|
||||
"rm"(dx), // %7
|
||||
#if defined(__x86_64__)
|
||||
"x"(kFsub80), // %8
|
||||
"x"(kFadd40) // %9
|
||||
#else
|
||||
"m"(kFsub80), // %8
|
||||
"m"(kFadd40) // %9
|
||||
#endif
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
);
|
||||
}
|
||||
|
||||
@ -795,7 +1028,7 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
|
||||
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
int src_stepx, uint8* dst_argb, int dst_width) {
|
||||
intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
|
||||
intptr_t src_stepx_x12 = 0;
|
||||
intptr_t src_stepx_x12;
|
||||
asm volatile (
|
||||
"lea " MEMLEA3(0x00,1,4) ",%1 \n"
|
||||
"lea " MEMLEA4(0x00,1,1,2) ",%4 \n"
|
||||
@ -813,11 +1046,11 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
"lea " MEMLEA(0x10,2) ",%2 \n"
|
||||
"sub $0x4,%3 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(src_stepx_x4), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(dst_width), // %3
|
||||
"+r"(src_stepx_x12) // %4
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(src_stepx_x4), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(dst_width), // %3
|
||||
"=&r"(src_stepx_x12) // %4
|
||||
:: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3"
|
||||
);
|
||||
@ -829,7 +1062,7 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int src_stepx,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
|
||||
intptr_t src_stepx_x12 = 0;
|
||||
intptr_t src_stepx_x12;
|
||||
intptr_t row1 = (intptr_t)(src_stride);
|
||||
asm volatile (
|
||||
"lea " MEMLEA3(0x00,1,4) ",%1 \n"
|
||||
@ -858,12 +1091,12 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
|
||||
"lea " MEMLEA(0x10,2) ",%2 \n"
|
||||
"sub $0x4,%3 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(src_stepx_x4), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+rm"(dst_width), // %3
|
||||
"+r"(src_stepx_x12), // %4
|
||||
"+r"(row1) // %5
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(src_stepx_x4), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+rm"(dst_width), // %3
|
||||
"=&r"(src_stepx_x12), // %4
|
||||
"+r"(row1) // %5
|
||||
:: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3"
|
||||
);
|
||||
@ -871,7 +1104,7 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
|
||||
|
||||
void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
intptr_t x0 = 0, x1 = 0;
|
||||
intptr_t x0, x1;
|
||||
asm volatile (
|
||||
"movd %5,%%xmm2 \n"
|
||||
"movd %6,%%xmm3 \n"
|
||||
@ -924,8 +1157,8 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
|
||||
MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
|
||||
"movd %%xmm0," MEMACCESS(2) " \n"
|
||||
"99: \n"
|
||||
: "+a"(x0), // %0
|
||||
"+d"(x1), // %1
|
||||
: "=&a"(x0), // %0
|
||||
"=&d"(x1), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(src_argb), // %3
|
||||
"+r"(dst_width) // %4
|
||||
@ -976,7 +1209,7 @@ static uvec8 kShuffleFractions = {
|
||||
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
|
||||
void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
intptr_t x0 = 0, x1 = 0;
|
||||
intptr_t x0, x1;
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm4 \n"
|
||||
"movdqa %1,%%xmm5 \n"
|
||||
@ -1039,8 +1272,8 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
|
||||
: "+r"(dst_argb), // %0
|
||||
"+r"(src_argb), // %1
|
||||
"+rm"(dst_width), // %2
|
||||
"+r"(x0), // %3
|
||||
"+r"(x1) // %4
|
||||
"=&r"(x0), // %3
|
||||
"=&r"(x1) // %4
|
||||
: "rm"(x), // %5
|
||||
"rm"(dx) // %6
|
||||
: "memory", "cc", NACL_R14
|
||||
|
52
third_party/libyuv/source/scale_mips.cc
vendored
52
third_party/libyuv/source/scale_mips.cc
vendored
@ -21,8 +21,8 @@ extern "C" {
|
||||
defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
|
||||
(_MIPS_SIM == _MIPS_SIM_ABI32)
|
||||
|
||||
void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
@ -31,7 +31,6 @@ void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"beqz $t9, 2f \n"
|
||||
" nop \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
@ -78,8 +77,8 @@ void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
const uint8* t = src_ptr + src_stride;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
@ -90,7 +89,6 @@ void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"bltz $t9, 2f \n"
|
||||
" nop \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
@ -178,8 +176,8 @@ void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
@ -188,7 +186,6 @@ void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"beqz $t9, 2f \n"
|
||||
" nop \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
@ -234,8 +231,8 @@ void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
const uint8* s1 = src_ptr + stride;
|
||||
const uint8* s2 = s1 + stride;
|
||||
@ -248,7 +245,6 @@ void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"srl $t9, %[dst_width], 1 \n"
|
||||
"andi $t8, %[dst_width], 1 \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 0(%[s1]) \n" // |7|6|5|4|
|
||||
@ -314,12 +310,11 @@ void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
@ -361,14 +356,13 @@ void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width) {
|
||||
void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"repl.ph $t3, 3 \n" // 0x00030003
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
|
||||
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
|
||||
@ -418,14 +412,13 @@ void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width) {
|
||||
void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"repl.ph $t2, 3 \n" // 0x00030003
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
|
||||
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
|
||||
@ -471,13 +464,12 @@ void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
@ -518,8 +510,8 @@ void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
const uint8* t = src_ptr + stride;
|
||||
const int c = 0x2AAA;
|
||||
@ -528,7 +520,6 @@ void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
|
||||
@ -572,9 +563,9 @@ void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
const uint8* s1 = src_ptr + stride;
|
||||
stride += stride;
|
||||
@ -586,7 +577,6 @@ void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
|
||||
|
58
third_party/libyuv/source/scale_neon.cc
vendored
58
third_party/libyuv/source/scale_neon.cc
vendored
@ -26,7 +26,6 @@ extern "C" {
|
||||
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
// load even pixels into q0, odd into q1
|
||||
MEMACCESS(0)
|
||||
@ -47,7 +46,6 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0, q1}, [%0]! \n" // load pixels and post inc
|
||||
@ -73,7 +71,6 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
asm volatile (
|
||||
// change the stride to row 2 pointer
|
||||
"add %1, %0 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc
|
||||
@ -101,7 +98,6 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
|
||||
@ -123,7 +119,6 @@ void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
const uint8* src_ptr2 = src_ptr + src_stride * 2;
|
||||
const uint8* src_ptr3 = src_ptr + src_stride * 3;
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0]! \n" // load up 16x4
|
||||
@ -162,7 +157,6 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
|
||||
@ -185,7 +179,6 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
|
||||
asm volatile (
|
||||
"vmov.u8 d24, #3 \n"
|
||||
"add %3, %0 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
|
||||
@ -245,7 +238,6 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
|
||||
asm volatile (
|
||||
"vmov.u8 d24, #3 \n"
|
||||
"add %3, %0 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
|
||||
@ -300,7 +292,6 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
|
||||
asm volatile (
|
||||
MEMACCESS(3)
|
||||
"vld1.8 {q3}, [%3] \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d0, d1, d2, d3}, [%0]! \n"
|
||||
@ -334,7 +325,6 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
|
||||
MEMACCESS(7)
|
||||
"vld1.8 {q15}, [%7] \n"
|
||||
"add %3, %0 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
|
||||
// d0 = 00 40 01 41 02 42 03 43
|
||||
@ -450,7 +440,6 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
||||
MEMACCESS(5)
|
||||
"vld1.8 {q14}, [%5] \n"
|
||||
"add %3, %0 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
|
||||
// d0 = 00 40 01 41 02 42 03 43
|
||||
@ -543,9 +532,8 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
||||
|
||||
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst_ptr, int src_width, int src_height) {
|
||||
const uint8* src_tmp = NULL;
|
||||
const uint8* src_tmp;
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
"mov r12, %5 \n"
|
||||
@ -564,12 +552,12 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"add %1, %1, #16 \n"
|
||||
"subs %4, %4, #16 \n" // 16 processed per loop
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_tmp), // %0
|
||||
"+r"(src_ptr), // %1
|
||||
"+r"(dst_ptr), // %2
|
||||
"+r"(src_stride), // %3
|
||||
"+r"(src_width), // %4
|
||||
"+r"(src_height) // %5
|
||||
: "=&r"(src_tmp), // %0
|
||||
"+r"(src_ptr), // %1
|
||||
"+r"(dst_ptr), // %2
|
||||
"+r"(src_stride), // %3
|
||||
"+r"(src_width), // %4
|
||||
"+r"(src_height) // %5
|
||||
:
|
||||
: "memory", "cc", "r12", "q0", "q1", "q2", "q3" // Clobber List
|
||||
);
|
||||
@ -584,13 +572,16 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
MEMACCESS(6) \
|
||||
"vld2.8 {d6["#n"], d7["#n"]}, [%6] \n"
|
||||
|
||||
// The NEON version mimics this formula:
|
||||
// #define BLENDER(a, b, f) (uint8)((int)(a) +
|
||||
// ((int)(f) * ((int)(b) - (int)(a)) >> 16))
|
||||
|
||||
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
int dx_offset[4] = {0, 1, 2, 3};
|
||||
int* tmp = dx_offset;
|
||||
const uint8* src_tmp = src_ptr;
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"vdup.32 q0, %3 \n" // x
|
||||
"vdup.32 q1, %4 \n" // dx
|
||||
"vld1.32 {q2}, [%5] \n" // 0 1 2 3
|
||||
@ -621,8 +612,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"vmovl.u16 q10, d21 \n"
|
||||
"vmul.s32 q11, q11, q13 \n"
|
||||
"vmul.s32 q12, q12, q10 \n"
|
||||
"vshrn.s32 d18, q11, #16 \n"
|
||||
"vshrn.s32 d19, q12, #16 \n"
|
||||
"vrshrn.s32 d18, q11, #16 \n"
|
||||
"vrshrn.s32 d19, q12, #16 \n"
|
||||
"vadd.s16 q8, q8, q9 \n"
|
||||
"vmovn.s16 d6, q8 \n"
|
||||
|
||||
@ -749,7 +740,6 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
|
||||
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
// load even pixels into q0, odd into q1
|
||||
MEMACCESS(0)
|
||||
@ -773,7 +763,6 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
||||
@ -804,7 +793,6 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
asm volatile (
|
||||
// change the stride to row 2 pointer
|
||||
"add %1, %1, %0 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
||||
@ -845,7 +833,6 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
int src_stepx, uint8* dst_argb, int dst_width) {
|
||||
asm volatile (
|
||||
"mov r12, %3, lsl #2 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d0[0]}, [%0], r12 \n"
|
||||
@ -875,7 +862,6 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
asm volatile (
|
||||
"mov r12, %4, lsl #2 \n"
|
||||
"add %1, %1, %0 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1
|
||||
@ -927,10 +913,9 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
|
||||
void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
int tmp = 0;
|
||||
int tmp;
|
||||
const uint8* src_tmp = src_argb;
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
LOAD1_DATA32_LANE(d0, 0)
|
||||
LOAD1_DATA32_LANE(d0, 1)
|
||||
@ -945,13 +930,13 @@ void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
"vst1.32 {q0, q1}, [%0]! \n" // store pixels
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop
|
||||
"bgt 1b \n"
|
||||
: "+r"(dst_argb), // %0
|
||||
"+r"(src_argb), // %1
|
||||
"+r"(dst_width), // %2
|
||||
"+r"(x), // %3
|
||||
"+r"(dx), // %4
|
||||
"+r"(tmp), // %5
|
||||
"+r"(src_tmp) // %6
|
||||
: "+r"(dst_argb), // %0
|
||||
"+r"(src_argb), // %1
|
||||
"+r"(dst_width), // %2
|
||||
"+r"(x), // %3
|
||||
"+r"(dx), // %4
|
||||
"=&r"(tmp), // %5
|
||||
"+r"(src_tmp) // %6
|
||||
:
|
||||
: "memory", "cc", "q0", "q1"
|
||||
);
|
||||
@ -974,7 +959,6 @@ void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
int* tmp = dx_offset;
|
||||
const uint8* src_tmp = src_argb;
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"vdup.32 q0, %3 \n" // x
|
||||
"vdup.32 q1, %4 \n" // dx
|
||||
"vld1.32 {q2}, [%5] \n" // 0 1 2 3
|
||||
|
34
third_party/libyuv/source/scale_neon64.cc
vendored
34
third_party/libyuv/source/scale_neon64.cc
vendored
@ -547,7 +547,7 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
||||
|
||||
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst_ptr, int src_width, int src_height) {
|
||||
const uint8* src_tmp = NULL;
|
||||
const uint8* src_tmp;
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
@ -567,12 +567,12 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"add %1, %1, #16 \n"
|
||||
"subs %w4, %w4, #16 \n" // 16 processed per loop
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_tmp), // %0
|
||||
"+r"(src_ptr), // %1
|
||||
"+r"(dst_ptr), // %2
|
||||
"+r"(src_stride), // %3
|
||||
"+r"(src_width), // %4
|
||||
"+r"(src_height) // %5
|
||||
: "=&r"(src_tmp), // %0
|
||||
"+r"(src_ptr), // %1
|
||||
"+r"(dst_ptr), // %2
|
||||
"+r"(src_stride), // %3
|
||||
"+r"(src_width), // %4
|
||||
"+r"(src_height) // %5
|
||||
:
|
||||
: "memory", "cc", "w12", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
@ -626,8 +626,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"ushll2 v6.4s, v6.8h, #0 \n"
|
||||
"mul v16.4s, v16.4s, v7.4s \n"
|
||||
"mul v17.4s, v17.4s, v6.4s \n"
|
||||
"shrn v6.4h, v16.4s, #16 \n"
|
||||
"shrn2 v6.8h, v17.4s, #16 \n"
|
||||
"rshrn v6.4h, v16.4s, #16 \n"
|
||||
"rshrn2 v6.8h, v17.4s, #16 \n"
|
||||
"add v4.8h, v4.8h, v6.8h \n"
|
||||
"xtn v4.8b, v4.8h \n"
|
||||
|
||||
@ -931,7 +931,7 @@ void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
|
||||
int64 x64 = (int64) x;
|
||||
int64 dx64 = (int64) dx;
|
||||
int64 tmp64 = 0;
|
||||
int64 tmp64;
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
LOAD1_DATA32_LANE(v0, 0)
|
||||
@ -947,13 +947,13 @@ void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
"st1 {v0.4s, v1.4s}, [%0], #32 \n" // store pixels
|
||||
"subs %w2, %w2, #8 \n" // 8 processed per loop
|
||||
"b.gt 1b \n"
|
||||
: "+r"(dst_argb), // %0
|
||||
"+r"(src_argb), // %1
|
||||
"+r"(dst_width64), // %2
|
||||
"+r"(x64), // %3
|
||||
"+r"(dx64), // %4
|
||||
"+r"(tmp64), // %5
|
||||
"+r"(src_tmp) // %6
|
||||
: "+r"(dst_argb), // %0
|
||||
"+r"(src_argb), // %1
|
||||
"+r"(dst_width64), // %2
|
||||
"+r"(x64), // %3
|
||||
"+r"(dx64), // %4
|
||||
"=&r"(tmp64), // %5
|
||||
"+r"(src_tmp) // %6
|
||||
:
|
||||
: "memory", "cc", "v0", "v1"
|
||||
);
|
||||
|
284
third_party/libyuv/source/scale_win.cc
vendored
284
third_party/libyuv/source/scale_win.cc
vendored
@ -16,9 +16,8 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for Visual C x86.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
||||
defined(_MSC_VER) && !defined(__clang__)
|
||||
// This module is for 32 bit Visual C x86 and clangcl
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
|
||||
// Offsets for source bytes 0 to 9
|
||||
static uvec8 kShuf0 =
|
||||
@ -96,8 +95,8 @@ static uvec16 kScaleAb2 =
|
||||
|
||||
// Reads 32 pixels, throws half away and writes 16 pixels.
|
||||
__declspec(naked)
|
||||
void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_ptr
|
||||
// src_stride ignored
|
||||
@ -122,31 +121,28 @@ void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
|
||||
// Blends 32x1 rectangle to 16x1.
|
||||
__declspec(naked)
|
||||
void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_ptr
|
||||
// src_stride
|
||||
mov edx, [esp + 12] // dst_ptr
|
||||
mov ecx, [esp + 16] // dst_width
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||
psrlw xmm5, 8
|
||||
|
||||
pcmpeqb xmm4, xmm4 // constant 0x0101
|
||||
psrlw xmm4, 15
|
||||
packuswb xmm4, xmm4
|
||||
pxor xmm5, xmm5 // constant 0
|
||||
|
||||
wloop:
|
||||
movdqu xmm0, [eax]
|
||||
movdqu xmm1, [eax + 16]
|
||||
lea eax, [eax + 32]
|
||||
|
||||
movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
|
||||
psrlw xmm0, 8
|
||||
movdqa xmm3, xmm1
|
||||
psrlw xmm1, 8
|
||||
pand xmm2, xmm5
|
||||
pand xmm3, xmm5
|
||||
pavgw xmm0, xmm2
|
||||
pavgw xmm1, xmm3
|
||||
pmaddubsw xmm0, xmm4 // horizontal add
|
||||
pmaddubsw xmm1, xmm4
|
||||
pavgw xmm0, xmm5 // (x + 1) / 2
|
||||
pavgw xmm1, xmm5
|
||||
packuswb xmm0, xmm1
|
||||
|
||||
movdqu [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 16
|
||||
@ -158,16 +154,19 @@ void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
|
||||
// Blends 32x2 rectangle to 16x1.
|
||||
__declspec(naked)
|
||||
void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
__asm {
|
||||
push esi
|
||||
mov eax, [esp + 4 + 4] // src_ptr
|
||||
mov esi, [esp + 4 + 8] // src_stride
|
||||
mov edx, [esp + 4 + 12] // dst_ptr
|
||||
mov ecx, [esp + 4 + 16] // dst_width
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||
psrlw xmm5, 8
|
||||
|
||||
pcmpeqb xmm4, xmm4 // constant 0x0101
|
||||
psrlw xmm4, 15
|
||||
packuswb xmm4, xmm4
|
||||
pxor xmm5, xmm5 // constant 0
|
||||
|
||||
wloop:
|
||||
movdqu xmm0, [eax]
|
||||
@ -175,19 +174,17 @@ void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
movdqu xmm2, [eax + esi]
|
||||
movdqu xmm3, [eax + esi + 16]
|
||||
lea eax, [eax + 32]
|
||||
pavgb xmm0, xmm2 // average rows
|
||||
pavgb xmm1, xmm3
|
||||
|
||||
movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
|
||||
psrlw xmm0, 8
|
||||
movdqa xmm3, xmm1
|
||||
psrlw xmm1, 8
|
||||
pand xmm2, xmm5
|
||||
pand xmm3, xmm5
|
||||
pavgw xmm0, xmm2
|
||||
pavgw xmm1, xmm3
|
||||
pmaddubsw xmm0, xmm4 // horizontal add
|
||||
pmaddubsw xmm1, xmm4
|
||||
pmaddubsw xmm2, xmm4
|
||||
pmaddubsw xmm3, xmm4
|
||||
paddw xmm0, xmm2 // vertical add
|
||||
paddw xmm1, xmm3
|
||||
psrlw xmm0, 1
|
||||
psrlw xmm1, 1
|
||||
pavgw xmm0, xmm5 // (x + 1) / 2
|
||||
pavgw xmm1, xmm5
|
||||
packuswb xmm0, xmm1
|
||||
|
||||
movdqu [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 16
|
||||
@ -246,14 +243,12 @@ void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
vmovdqu ymm0, [eax]
|
||||
vmovdqu ymm1, [eax + 32]
|
||||
lea eax, [eax + 64]
|
||||
|
||||
vpmaddubsw ymm0, ymm0, ymm4 // average horizontally
|
||||
vpmaddubsw ymm0, ymm0, ymm4 // horizontal add
|
||||
vpmaddubsw ymm1, ymm1, ymm4
|
||||
vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2
|
||||
vpavgw ymm1, ymm1, ymm5
|
||||
vpackuswb ymm0, ymm0, ymm1
|
||||
vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
|
||||
|
||||
vmovdqu [edx], ymm0
|
||||
lea edx, [edx + 32]
|
||||
sub ecx, 32
|
||||
@ -264,6 +259,8 @@ void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
// For rounding, average = (sum + 2) / 4
|
||||
// becomes average((sum >> 1), 0)
|
||||
// Blends 64x2 rectangle to 32x1.
|
||||
__declspec(naked)
|
||||
void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
@ -281,19 +278,23 @@ void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
vpxor ymm5, ymm5, ymm5 // constant 0
|
||||
|
||||
wloop:
|
||||
vmovdqu ymm0, [eax] // average rows
|
||||
vmovdqu ymm0, [eax]
|
||||
vmovdqu ymm1, [eax + 32]
|
||||
vpavgb ymm0, ymm0, [eax + esi]
|
||||
vpavgb ymm1, ymm1, [eax + esi + 32]
|
||||
vmovdqu ymm2, [eax + esi]
|
||||
vmovdqu ymm3, [eax + esi + 32]
|
||||
lea eax, [eax + 64]
|
||||
|
||||
vpmaddubsw ymm0, ymm0, ymm4 // average horizontally
|
||||
vpmaddubsw ymm0, ymm0, ymm4 // horizontal add
|
||||
vpmaddubsw ymm1, ymm1, ymm4
|
||||
vpmaddubsw ymm2, ymm2, ymm4
|
||||
vpmaddubsw ymm3, ymm3, ymm4
|
||||
vpaddw ymm0, ymm0, ymm2 // vertical add
|
||||
vpaddw ymm1, ymm1, ymm3
|
||||
vpsrlw ymm0, ymm0, 1 // (x + 2) / 4 = (x / 2 + 1) / 2
|
||||
vpsrlw ymm1, ymm1, 1
|
||||
vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2
|
||||
vpavgw ymm1, ymm1, ymm5
|
||||
vpackuswb ymm0, ymm0, ymm1
|
||||
vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
|
||||
|
||||
vmovdqu [edx], ymm0
|
||||
lea edx, [edx + 32]
|
||||
sub ecx, 32
|
||||
@ -308,7 +309,7 @@ void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
|
||||
// Point samples 32 pixels to 8 pixels.
|
||||
__declspec(naked)
|
||||
void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_ptr
|
||||
@ -339,7 +340,7 @@ void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
|
||||
// Blends 32x4 rectangle to 8x1.
|
||||
__declspec(naked)
|
||||
void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
__asm {
|
||||
push esi
|
||||
@ -349,42 +350,40 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
mov edx, [esp + 8 + 12] // dst_ptr
|
||||
mov ecx, [esp + 8 + 16] // dst_width
|
||||
lea edi, [esi + esi * 2] // src_stride * 3
|
||||
pcmpeqb xmm7, xmm7 // generate mask 0x00ff00ff
|
||||
psrlw xmm7, 8
|
||||
pcmpeqb xmm4, xmm4 // constant 0x0101
|
||||
psrlw xmm4, 15
|
||||
movdqa xmm5, xmm4
|
||||
packuswb xmm4, xmm4
|
||||
psllw xmm5, 3 // constant 0x0008
|
||||
|
||||
wloop:
|
||||
movdqu xmm0, [eax] // average rows
|
||||
movdqu xmm1, [eax + 16]
|
||||
movdqu xmm2, [eax + esi]
|
||||
movdqu xmm3, [eax + esi + 16]
|
||||
pavgb xmm0, xmm2
|
||||
pavgb xmm1, xmm3
|
||||
pmaddubsw xmm0, xmm4 // horizontal add
|
||||
pmaddubsw xmm1, xmm4
|
||||
pmaddubsw xmm2, xmm4
|
||||
pmaddubsw xmm3, xmm4
|
||||
paddw xmm0, xmm2 // vertical add rows 0, 1
|
||||
paddw xmm1, xmm3
|
||||
movdqu xmm2, [eax + esi * 2]
|
||||
movdqu xmm3, [eax + esi * 2 + 16]
|
||||
movdqu xmm4, [eax + edi]
|
||||
movdqu xmm5, [eax + edi + 16]
|
||||
pmaddubsw xmm2, xmm4
|
||||
pmaddubsw xmm3, xmm4
|
||||
paddw xmm0, xmm2 // add row 2
|
||||
paddw xmm1, xmm3
|
||||
movdqu xmm2, [eax + edi]
|
||||
movdqu xmm3, [eax + edi + 16]
|
||||
lea eax, [eax + 32]
|
||||
pavgb xmm2, xmm4
|
||||
pavgb xmm3, xmm5
|
||||
pavgb xmm0, xmm2
|
||||
pavgb xmm1, xmm3
|
||||
|
||||
movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
|
||||
psrlw xmm0, 8
|
||||
movdqa xmm3, xmm1
|
||||
psrlw xmm1, 8
|
||||
pand xmm2, xmm7
|
||||
pand xmm3, xmm7
|
||||
pavgw xmm0, xmm2
|
||||
pavgw xmm1, xmm3
|
||||
packuswb xmm0, xmm1
|
||||
|
||||
movdqa xmm2, xmm0 // average columns (16 to 8 pixels)
|
||||
psrlw xmm0, 8
|
||||
pand xmm2, xmm7
|
||||
pavgw xmm0, xmm2
|
||||
pmaddubsw xmm2, xmm4
|
||||
pmaddubsw xmm3, xmm4
|
||||
paddw xmm0, xmm2 // add row 3
|
||||
paddw xmm1, xmm3
|
||||
phaddw xmm0, xmm1
|
||||
paddw xmm0, xmm5 // + 8 for round
|
||||
psrlw xmm0, 4 // /16 for average of 4 * 4
|
||||
packuswb xmm0, xmm0
|
||||
|
||||
movq qword ptr [edx], xmm0
|
||||
lea edx, [edx + 8]
|
||||
sub ecx, 8
|
||||
@ -443,37 +442,41 @@ void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
mov edx, [esp + 8 + 12] // dst_ptr
|
||||
mov ecx, [esp + 8 + 16] // dst_width
|
||||
lea edi, [esi + esi * 2] // src_stride * 3
|
||||
vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0x00ff00ff
|
||||
vpsrlw ymm7, ymm7, 8
|
||||
vpcmpeqb ymm4, ymm4, ymm4 // constant 0x0101
|
||||
vpsrlw ymm4, ymm4, 15
|
||||
vpsllw ymm5, ymm4, 3 // constant 0x0008
|
||||
vpackuswb ymm4, ymm4, ymm4
|
||||
|
||||
wloop:
|
||||
vmovdqu ymm0, [eax] // average rows
|
||||
vmovdqu ymm1, [eax + 32]
|
||||
vpavgb ymm0, ymm0, [eax + esi]
|
||||
vpavgb ymm1, ymm1, [eax + esi + 32]
|
||||
vmovdqu ymm2, [eax + esi]
|
||||
vmovdqu ymm3, [eax + esi + 32]
|
||||
vpmaddubsw ymm0, ymm0, ymm4 // horizontal add
|
||||
vpmaddubsw ymm1, ymm1, ymm4
|
||||
vpmaddubsw ymm2, ymm2, ymm4
|
||||
vpmaddubsw ymm3, ymm3, ymm4
|
||||
vpaddw ymm0, ymm0, ymm2 // vertical add rows 0, 1
|
||||
vpaddw ymm1, ymm1, ymm3
|
||||
vmovdqu ymm2, [eax + esi * 2]
|
||||
vmovdqu ymm3, [eax + esi * 2 + 32]
|
||||
vpavgb ymm2, ymm2, [eax + edi]
|
||||
vpavgb ymm3, ymm3, [eax + edi + 32]
|
||||
lea eax, [eax + 64]
|
||||
vpavgb ymm0, ymm0, ymm2
|
||||
vpavgb ymm1, ymm1, ymm3
|
||||
|
||||
vpand ymm2, ymm0, ymm7 // average columns (64 to 32 pixels)
|
||||
vpand ymm3, ymm1, ymm7
|
||||
vpsrlw ymm0, ymm0, 8
|
||||
vpsrlw ymm1, ymm1, 8
|
||||
vpavgw ymm0, ymm0, ymm2
|
||||
vpavgw ymm1, ymm1, ymm3
|
||||
vpackuswb ymm0, ymm0, ymm1
|
||||
vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
|
||||
|
||||
vpand ymm2, ymm0, ymm7 // average columns (32 to 16 pixels)
|
||||
vpsrlw ymm0, ymm0, 8
|
||||
vpavgw ymm0, ymm0, ymm2
|
||||
vpmaddubsw ymm2, ymm2, ymm4
|
||||
vpmaddubsw ymm3, ymm3, ymm4
|
||||
vpaddw ymm0, ymm0, ymm2 // add row 2
|
||||
vpaddw ymm1, ymm1, ymm3
|
||||
vmovdqu ymm2, [eax + edi]
|
||||
vmovdqu ymm3, [eax + edi + 32]
|
||||
lea eax, [eax + 64]
|
||||
vpmaddubsw ymm2, ymm2, ymm4
|
||||
vpmaddubsw ymm3, ymm3, ymm4
|
||||
vpaddw ymm0, ymm0, ymm2 // add row 3
|
||||
vpaddw ymm1, ymm1, ymm3
|
||||
vphaddw ymm0, ymm0, ymm1 // mutates
|
||||
vpermq ymm0, ymm0, 0xd8 // unmutate vphaddw
|
||||
vpaddw ymm0, ymm0, ymm5 // + 8 for round
|
||||
vpsrlw ymm0, ymm0, 4 // /32 for average of 4 * 4
|
||||
vpackuswb ymm0, ymm0, ymm0
|
||||
vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
|
||||
|
||||
vmovdqu [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 16
|
||||
@ -499,9 +502,9 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
// src_stride ignored
|
||||
mov edx, [esp + 12] // dst_ptr
|
||||
mov ecx, [esp + 16] // dst_width
|
||||
movdqa xmm3, kShuf0
|
||||
movdqa xmm4, kShuf1
|
||||
movdqa xmm5, kShuf2
|
||||
movdqa xmm3, xmmword ptr kShuf0
|
||||
movdqa xmm4, xmmword ptr kShuf1
|
||||
movdqa xmm5, xmmword ptr kShuf2
|
||||
|
||||
wloop:
|
||||
movdqu xmm0, [eax]
|
||||
@ -548,12 +551,12 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
|
||||
mov esi, [esp + 4 + 8] // src_stride
|
||||
mov edx, [esp + 4 + 12] // dst_ptr
|
||||
mov ecx, [esp + 4 + 16] // dst_width
|
||||
movdqa xmm2, kShuf01
|
||||
movdqa xmm3, kShuf11
|
||||
movdqa xmm4, kShuf21
|
||||
movdqa xmm5, kMadd01
|
||||
movdqa xmm6, kMadd11
|
||||
movdqa xmm7, kRound34
|
||||
movdqa xmm2, xmmword ptr kShuf01
|
||||
movdqa xmm3, xmmword ptr kShuf11
|
||||
movdqa xmm4, xmmword ptr kShuf21
|
||||
movdqa xmm5, xmmword ptr kMadd01
|
||||
movdqa xmm6, xmmword ptr kMadd11
|
||||
movdqa xmm7, xmmword ptr kRound34
|
||||
|
||||
wloop:
|
||||
movdqu xmm0, [eax] // pixels 0..7
|
||||
@ -579,7 +582,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
|
||||
lea eax, [eax + 32]
|
||||
pavgb xmm0, xmm1
|
||||
pshufb xmm0, xmm4
|
||||
movdqa xmm1, kMadd21
|
||||
movdqa xmm1, xmmword ptr kMadd21
|
||||
pmaddubsw xmm0, xmm1
|
||||
paddsw xmm0, xmm7
|
||||
psrlw xmm0, 2
|
||||
@ -605,12 +608,12 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
|
||||
mov esi, [esp + 4 + 8] // src_stride
|
||||
mov edx, [esp + 4 + 12] // dst_ptr
|
||||
mov ecx, [esp + 4 + 16] // dst_width
|
||||
movdqa xmm2, kShuf01
|
||||
movdqa xmm3, kShuf11
|
||||
movdqa xmm4, kShuf21
|
||||
movdqa xmm5, kMadd01
|
||||
movdqa xmm6, kMadd11
|
||||
movdqa xmm7, kRound34
|
||||
movdqa xmm2, xmmword ptr kShuf01
|
||||
movdqa xmm3, xmmword ptr kShuf11
|
||||
movdqa xmm4, xmmword ptr kShuf21
|
||||
movdqa xmm5, xmmword ptr kMadd01
|
||||
movdqa xmm6, xmmword ptr kMadd11
|
||||
movdqa xmm7, xmmword ptr kRound34
|
||||
|
||||
wloop:
|
||||
movdqu xmm0, [eax] // pixels 0..7
|
||||
@ -639,7 +642,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
|
||||
pavgb xmm1, xmm0
|
||||
pavgb xmm0, xmm1
|
||||
pshufb xmm0, xmm4
|
||||
movdqa xmm1, kMadd21
|
||||
movdqa xmm1, xmmword ptr kMadd21
|
||||
pmaddubsw xmm0, xmm1
|
||||
paddsw xmm0, xmm7
|
||||
psrlw xmm0, 2
|
||||
@ -665,8 +668,8 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
// src_stride ignored
|
||||
mov edx, [esp + 12] // dst_ptr
|
||||
mov ecx, [esp + 16] // dst_width
|
||||
movdqa xmm4, kShuf38a
|
||||
movdqa xmm5, kShuf38b
|
||||
movdqa xmm4, xmmword ptr kShuf38a
|
||||
movdqa xmm5, xmmword ptr kShuf38b
|
||||
|
||||
xloop:
|
||||
movdqu xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5
|
||||
@ -698,9 +701,9 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
|
||||
mov esi, [esp + 4 + 8] // src_stride
|
||||
mov edx, [esp + 4 + 12] // dst_ptr
|
||||
mov ecx, [esp + 4 + 16] // dst_width
|
||||
movdqa xmm2, kShufAc
|
||||
movdqa xmm3, kShufAc3
|
||||
movdqa xmm4, kScaleAc33
|
||||
movdqa xmm2, xmmword ptr kShufAc
|
||||
movdqa xmm3, xmmword ptr kShufAc3
|
||||
movdqa xmm4, xmmword ptr kScaleAc33
|
||||
pxor xmm5, xmm5
|
||||
|
||||
xloop:
|
||||
@ -763,10 +766,10 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
|
||||
mov esi, [esp + 4 + 8] // src_stride
|
||||
mov edx, [esp + 4 + 12] // dst_ptr
|
||||
mov ecx, [esp + 4 + 16] // dst_width
|
||||
movdqa xmm2, kShufAb0
|
||||
movdqa xmm3, kShufAb1
|
||||
movdqa xmm4, kShufAb2
|
||||
movdqa xmm5, kScaleAb2
|
||||
movdqa xmm2, xmmword ptr kShufAb0
|
||||
movdqa xmm3, xmmword ptr kShufAb1
|
||||
movdqa xmm4, xmmword ptr kShufAb2
|
||||
movdqa xmm5, xmmword ptr kScaleAb2
|
||||
|
||||
xloop:
|
||||
movdqu xmm0, [eax] // average 2 rows into xmm0
|
||||
@ -857,6 +860,16 @@ void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
}
|
||||
#endif // HAS_SCALEADDROW_AVX2
|
||||
|
||||
// Constant for making pixels signed to avoid pmaddubsw
|
||||
// saturation.
|
||||
static uvec8 kFsub80 =
|
||||
{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
|
||||
|
||||
// Constant for making pixels unsigned and adding .5 for rounding.
|
||||
static uvec16 kFadd40 =
|
||||
{ 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040 };
|
||||
|
||||
// Bilinear column filtering. SSSE3 version.
|
||||
__declspec(naked)
|
||||
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
@ -874,6 +887,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
movd xmm5, eax
|
||||
pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
|
||||
psrlw xmm6, 9
|
||||
pcmpeqb xmm7, xmm7 // generate 0x0001
|
||||
psrlw xmm7, 15
|
||||
pextrw eax, xmm2, 1 // get x0 integer. preroll
|
||||
sub ecx, 2
|
||||
jl xloop29
|
||||
@ -896,20 +911,22 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
movd xmm4, ebx
|
||||
pshufb xmm1, xmm5 // 0011
|
||||
punpcklwd xmm0, xmm4
|
||||
psubb xmm0, xmmword ptr kFsub80 // make pixels signed.
|
||||
pxor xmm1, xmm6 // 0..7f and 7f..0
|
||||
pmaddubsw xmm0, xmm1 // 16 bit, 2 pixels.
|
||||
paddusb xmm1, xmm7 // +1 so 0..7f and 80..1
|
||||
pmaddubsw xmm1, xmm0 // 16 bit, 2 pixels.
|
||||
pextrw eax, xmm2, 1 // get x0 integer. next iteration.
|
||||
pextrw edx, xmm2, 3 // get x1 integer. next iteration.
|
||||
psrlw xmm0, 7 // 8.7 fixed point to low 8 bits.
|
||||
packuswb xmm0, xmm0 // 8 bits, 2 pixels.
|
||||
movd ebx, xmm0
|
||||
paddw xmm1, xmmword ptr kFadd40 // make pixels unsigned and round.
|
||||
psrlw xmm1, 7 // 8.7 fixed point to low 8 bits.
|
||||
packuswb xmm1, xmm1 // 8 bits, 2 pixels.
|
||||
movd ebx, xmm1
|
||||
mov [edi], bx
|
||||
lea edi, [edi + 2]
|
||||
sub ecx, 2 // 2 pixels
|
||||
jge xloop2
|
||||
|
||||
xloop29:
|
||||
|
||||
add ecx, 2 - 1
|
||||
jl xloop99
|
||||
|
||||
@ -918,11 +935,14 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
movd xmm0, ebx
|
||||
psrlw xmm2, 9 // 7 bit fractions.
|
||||
pshufb xmm2, xmm5 // 0011
|
||||
psubb xmm0, xmmword ptr kFsub80 // make pixels signed.
|
||||
pxor xmm2, xmm6 // 0..7f and 7f..0
|
||||
pmaddubsw xmm0, xmm2 // 16 bit
|
||||
psrlw xmm0, 7 // 8.7 fixed point to low 8 bits.
|
||||
packuswb xmm0, xmm0 // 8 bits
|
||||
movd ebx, xmm0
|
||||
paddusb xmm2, xmm7 // +1 so 0..7f and 80..1
|
||||
pmaddubsw xmm2, xmm0 // 16 bit
|
||||
paddw xmm2, xmmword ptr kFadd40 // make pixels unsigned and round.
|
||||
psrlw xmm2, 7 // 8.7 fixed point to low 8 bits.
|
||||
packuswb xmm2, xmm2 // 8 bits
|
||||
movd ebx, xmm2
|
||||
mov [edi], bl
|
||||
|
||||
xloop99:
|
||||
@ -1233,8 +1253,8 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
|
||||
mov ecx, [esp + 8 + 12] // dst_width
|
||||
movd xmm2, [esp + 8 + 16] // x
|
||||
movd xmm3, [esp + 8 + 20] // dx
|
||||
movdqa xmm4, kShuffleColARGB
|
||||
movdqa xmm5, kShuffleFractions
|
||||
movdqa xmm4, xmmword ptr kShuffleColARGB
|
||||
movdqa xmm5, xmmword ptr kShuffleFractions
|
||||
pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
|
||||
psrlw xmm6, 9
|
||||
pextrw eax, xmm2, 1 // get x0 integer. preroll
|
||||
|
1
third_party/libyuv/source/video_common.cc
vendored
1
third_party/libyuv/source/video_common.cc
vendored
@ -25,6 +25,7 @@ struct FourCCAliasEntry {
|
||||
|
||||
static const struct FourCCAliasEntry kFourCCAliases[] = {
|
||||
{FOURCC_IYUV, FOURCC_I420},
|
||||
{FOURCC_YU12, FOURCC_I420},
|
||||
{FOURCC_YU16, FOURCC_I422},
|
||||
{FOURCC_YU24, FOURCC_I444},
|
||||
{FOURCC_YUYV, FOURCC_YUY2},
|
||||
|
Loading…
Reference in New Issue
Block a user