libyuv: update to r1060
picks up some lint, build fixes Change-Id: I0efb19385afa4ea3073a53e2b8334e57f245eea0
This commit is contained in:
parent
812506b80c
commit
b644eb9f44
2
third_party/libyuv/README.libvpx
vendored
2
third_party/libyuv/README.libvpx
vendored
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1041
|
||||
Version: 1060
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
@ -153,7 +153,6 @@ class LIBYUV_API MJpegDecoder {
|
||||
int* subsample_x, int* subsample_y, int number_of_components);
|
||||
|
||||
private:
|
||||
|
||||
void AllocOutputBuffers(int num_outbufs);
|
||||
void DestroyOutputBuffers();
|
||||
|
||||
|
90
third_party/libyuv/include/libyuv/row.h
vendored
90
third_party/libyuv/include/libyuv/row.h
vendored
@ -252,6 +252,94 @@ extern "C" {
|
||||
|
||||
// The following are available on arm64 platforms:
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
// #define HAS_I444TOARGBROW_NEON
|
||||
// #define HAS_I422TOARGBROW_NEON
|
||||
// #define HAS_I411TOARGBROW_NEON
|
||||
// #define HAS_I422TOBGRAROW_NEON
|
||||
// #define HAS_I422TOABGRROW_NEON
|
||||
// #define HAS_I422TORGBAROW_NEON
|
||||
// #define HAS_I422TORGB24ROW_NEON
|
||||
// #define HAS_I422TORAWROW_NEON
|
||||
// #define HAS_I422TORGB565ROW_NEON
|
||||
// #define HAS_I422TOARGB1555ROW_NEON
|
||||
// #define HAS_I422TOARGB4444ROW_NEON
|
||||
// #define HAS_YTOARGBROW_NEON
|
||||
// #define HAS_I400TOARGBROW_NEON
|
||||
// #define HAS_NV12TOARGBROW_NEON
|
||||
// #define HAS_NV21TOARGBROW_NEON
|
||||
// #define HAS_NV12TORGB565ROW_NEON
|
||||
// #define HAS_NV21TORGB565ROW_NEON
|
||||
// #define HAS_YUY2TOARGBROW_NEON
|
||||
// #define HAS_UYVYTOARGBROW_NEON
|
||||
#define HAS_SPLITUVROW_NEON
|
||||
#define HAS_MERGEUVROW_NEON
|
||||
#define HAS_COPYROW_NEON
|
||||
#define HAS_SETROW_NEON
|
||||
#define HAS_ARGBSETROWS_NEON
|
||||
#define HAS_MIRRORROW_NEON
|
||||
#define HAS_MIRRORUVROW_NEON
|
||||
#define HAS_ARGBMIRRORROW_NEON
|
||||
#define HAS_RGB24TOARGBROW_NEON
|
||||
#define HAS_RAWTOARGBROW_NEON
|
||||
// #define HAS_RGB565TOARGBROW_NEON
|
||||
// #define HAS_ARGB1555TOARGBROW_NEON
|
||||
// #define HAS_ARGB4444TOARGBROW_NEON
|
||||
#define HAS_ARGBTORGB24ROW_NEON
|
||||
#define HAS_ARGBTORAWROW_NEON
|
||||
#define HAS_YUY2TOYROW_NEON
|
||||
#define HAS_UYVYTOYROW_NEON
|
||||
#define HAS_YUY2TOUV422ROW_NEON
|
||||
#define HAS_UYVYTOUV422ROW_NEON
|
||||
#define HAS_YUY2TOUVROW_NEON
|
||||
#define HAS_UYVYTOUVROW_NEON
|
||||
#define HAS_HALFROW_NEON
|
||||
#define HAS_ARGBTOBAYERROW_NEON
|
||||
#define HAS_ARGBTOBAYERGGROW_NEON
|
||||
#define HAS_ARGBSHUFFLEROW_NEON
|
||||
#define HAS_I422TOYUY2ROW_NEON
|
||||
#define HAS_I422TOUYVYROW_NEON
|
||||
// #define HAS_ARGBTORGB565ROW_NEON
|
||||
// #define HAS_ARGBTOARGB1555ROW_NEON
|
||||
// #define HAS_ARGBTOARGB4444ROW_NEON
|
||||
#define HAS_ARGBTOYROW_NEON
|
||||
#define HAS_ARGBTOYJROW_NEON
|
||||
// #define HAS_ARGBTOUV444ROW_NEON
|
||||
// #define HAS_ARGBTOUV422ROW_NEON
|
||||
// #define HAS_ARGBTOUV411ROW_NEON
|
||||
// #define HAS_ARGBTOUVROW_NEON
|
||||
// #define HAS_ARGBTOUVJROW_NEON
|
||||
// #define HAS_BGRATOUVROW_NEON
|
||||
// #define HAS_ABGRTOUVROW_NEON
|
||||
// #define HAS_RGBATOUVROW_NEON
|
||||
// #define HAS_RGB24TOUVROW_NEON
|
||||
// #define HAS_RAWTOUVROW_NEON
|
||||
// #define HAS_RGB565TOUVROW_NEON
|
||||
// #define HAS_ARGB1555TOUVROW_NEON
|
||||
// #define HAS_ARGB4444TOUVROW_NEON
|
||||
// #define HAS_RGB565TOYROW_NEON
|
||||
// #define HAS_ARGB1555TOYROW_NEON
|
||||
// #define HAS_ARGB4444TOYROW_NEON
|
||||
// #define HAS_BGRATOYROW_NEON
|
||||
// #define HAS_ABGRTOYROW_NEON
|
||||
// #define HAS_RGBATOYROW_NEON
|
||||
// #define HAS_RGB24TOYROW_NEON
|
||||
// #define HAS_RAWTOYROW_NEON
|
||||
// #define HAS_INTERPOLATEROW_NEON
|
||||
// #define HAS_ARGBBLENDROW_NEON
|
||||
// #define HAS_ARGBATTENUATEROW_NEON
|
||||
// #define HAS_ARGBQUANTIZEROW_NEON
|
||||
// #define HAS_ARGBSHADEROW_NEON
|
||||
// #define HAS_ARGBGRAYROW_NEON
|
||||
// #define HAS_ARGBSEPIAROW_NEON
|
||||
// #define HAS_ARGBCOLORMATRIXROW_NEON
|
||||
#define HAS_ARGBMULTIPLYROW_NEON
|
||||
#define HAS_ARGBADDROW_NEON
|
||||
#define HAS_ARGBSUBTRACTROW_NEON
|
||||
#define HAS_SOBELROW_NEON
|
||||
#define HAS_SOBELTOPLANEROW_NEON
|
||||
#define HAS_SOBELXYROW_NEON
|
||||
#define HAS_SOBELXROW_NEON
|
||||
#define HAS_SOBELYROW_NEON
|
||||
#endif
|
||||
|
||||
// The following are available on Neon platforms:
|
||||
@ -465,7 +553,7 @@ typedef uint8 uvec8[16];
|
||||
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
|
||||
#endif // defined(__native_client__) && defined(__x86_64__)
|
||||
|
||||
#if defined(__arm__)
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
#undef MEMACCESS
|
||||
#if defined(__native_client__)
|
||||
#define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n"
|
||||
|
@ -51,6 +51,14 @@ extern "C" {
|
||||
#define HAS_SCALEROWDOWN38_NEON
|
||||
#define HAS_SCALEARGBROWDOWNEVEN_NEON
|
||||
#define HAS_SCALEARGBROWDOWN2_NEON
|
||||
#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
|
||||
(defined(__aarch64__) || defined(LIBYUV_NEON))
|
||||
/* #define HAS_SCALEROWDOWN2_NEON */
|
||||
/* #define HAS_SCALEROWDOWN4_NEON */
|
||||
/* #define HAS_SCALEROWDOWN34_NEON */
|
||||
/* #define HAS_SCALEROWDOWN38_NEON */
|
||||
/* #define HAS_SCALEARGBROWDOWNEVEN_NEON */
|
||||
/* #define HAS_SCALEARGBROWDOWN2_NEON */
|
||||
#endif
|
||||
|
||||
// The following are available on Mips platforms:
|
||||
|
2
third_party/libyuv/include/libyuv/version.h
vendored
2
third_party/libyuv/include/libyuv/version.h
vendored
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1041
|
||||
#define LIBYUV_VERSION 1059
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
2
third_party/libyuv/source/compare.cc
vendored
2
third_party/libyuv/source/compare.cc
vendored
@ -80,7 +80,7 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
|
||||
|
||||
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
|
||||
#define HAS_SUMSQUAREERROR_NEON
|
||||
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
|
||||
#endif
|
||||
|
39
third_party/libyuv/source/compare_neon.cc
vendored
39
third_party/libyuv/source/compare_neon.cc
vendored
@ -56,6 +56,45 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
|
||||
return sse;
|
||||
}
|
||||
|
||||
#elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
||||
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
|
||||
volatile uint32 sse;
|
||||
asm volatile (
|
||||
"eor v16.16b, v16.16b, v16.16b \n"
|
||||
"eor v18.16b, v18.16b, v18.16b \n"
|
||||
"eor v17.16b, v17.16b, v17.16b \n"
|
||||
"eor v19.16b, v19.16b, v19.16b \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.16b}, [%0], #16 \n"
|
||||
MEMACCESS(1)
|
||||
"ld1 {v1.16b}, [%1], #16 \n"
|
||||
"subs %2, %2, #16 \n"
|
||||
"usubl v2.8h, v0.8b, v1.8b \n"
|
||||
"usubl2 v3.8h, v0.16b, v1.16b \n"
|
||||
"smlal v16.4s, v2.4h, v2.4h \n"
|
||||
"smlal v17.4s, v3.4h, v3.4h \n"
|
||||
"smlal2 v18.4s, v2.8h, v2.8h \n"
|
||||
"smlal2 v19.4s, v3.8h, v3.8h \n"
|
||||
"bgt 1b \n"
|
||||
|
||||
"add v16.4s, v16.4s, v17.4s \n"
|
||||
"add v18.4s, v18.4s, v19.4s \n"
|
||||
"add v19.4s, v16.4s, v18.4s \n"
|
||||
"addv s0, v19.4s \n"
|
||||
"fmov %w3, s0 \n"
|
||||
: "+r"(src_a),
|
||||
"+r"(src_b),
|
||||
"+r"(count),
|
||||
"=r"(sse)
|
||||
:
|
||||
: "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
|
||||
return sse;
|
||||
}
|
||||
|
||||
#endif // __ARM_NEON__
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
346
third_party/libyuv/source/convert.cc
vendored
346
third_party/libyuv/source/convert.cc
vendored
@ -401,7 +401,7 @@ int Q420ToI420(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
int y;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
int halfheight;
|
||||
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
|
||||
void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
|
||||
int pix) = YUY2ToUV422Row_C;
|
||||
@ -711,11 +711,13 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -963,9 +965,6 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGBToYRow_C;
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 15) & ~15;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
#endif
|
||||
if (!src_rgb24 || !dst_y || !dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -1022,36 +1021,44 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
#endif // HAS_ARGBTOUVROW_SSSE3
|
||||
#endif // HAS_RGB24TOYROW_NEON
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if defined(HAS_RGB24TOYROW_NEON)
|
||||
RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
|
||||
RGB24ToYRow(src_rgb24, dst_y, width);
|
||||
RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
|
||||
#else
|
||||
RGB24ToARGBRow(src_rgb24, row, width);
|
||||
RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
|
||||
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
src_rgb24 += src_stride_rgb24 * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
#if defined(HAS_RGB24TOYROW_NEON)
|
||||
RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
|
||||
RGB24ToYRow(src_rgb24, dst_y, width);
|
||||
#else
|
||||
RGB24ToARGBRow(src_rgb24, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
{
|
||||
#if !defined(HAS_RGB24TOYROW_NEON)
|
||||
free_aligned_buffer_64(row);
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 15) & ~15;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if defined(HAS_RGB24TOYROW_NEON)
|
||||
RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
|
||||
RGB24ToYRow(src_rgb24, dst_y, width);
|
||||
RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
|
||||
#else
|
||||
RGB24ToARGBRow(src_rgb24, row, width);
|
||||
RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
|
||||
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
src_rgb24 += src_stride_rgb24 * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
#if defined(HAS_RGB24TOYROW_NEON)
|
||||
RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
|
||||
RGB24ToYRow(src_rgb24, dst_y, width);
|
||||
#else
|
||||
RGB24ToARGBRow(src_rgb24, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
#if !defined(HAS_RGB24TOYROW_NEON)
|
||||
free_aligned_buffer_64(row);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1075,9 +1082,6 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGBToYRow_C;
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 15) & ~15;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
#endif
|
||||
if (!src_raw || !dst_y || !dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -1134,36 +1138,42 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
|
||||
#endif // HAS_ARGBTOUVROW_SSSE3
|
||||
#endif // HAS_RAWTOYROW_NEON
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if defined(HAS_RAWTOYROW_NEON)
|
||||
RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
|
||||
RAWToYRow(src_raw, dst_y, width);
|
||||
RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
|
||||
#else
|
||||
RAWToARGBRow(src_raw, row, width);
|
||||
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
|
||||
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
src_raw += src_stride_raw * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
{
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 15) & ~15;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if defined(HAS_RAWTOYROW_NEON)
|
||||
RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
|
||||
RAWToYRow(src_raw, dst_y, width);
|
||||
RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
|
||||
#else
|
||||
RAWToARGBRow(src_raw, row, width);
|
||||
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
|
||||
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
src_raw += src_stride_raw * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
#if defined(HAS_RAWTOYROW_NEON)
|
||||
RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
|
||||
RAWToYRow(src_raw, dst_y, width);
|
||||
#else
|
||||
RAWToARGBRow(src_raw, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
#if !defined(HAS_RAWTOYROW_NEON)
|
||||
free_aligned_buffer_64(row);
|
||||
#endif
|
||||
}
|
||||
if (height & 1) {
|
||||
#if defined(HAS_RAWTOYROW_NEON)
|
||||
RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
|
||||
RAWToYRow(src_raw, dst_y, width);
|
||||
#else
|
||||
RAWToARGBRow(src_raw, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
#if !defined(HAS_RAWTOYROW_NEON)
|
||||
free_aligned_buffer_64(row);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1187,9 +1197,6 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGBToYRow_C;
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 15) & ~15;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
#endif
|
||||
if (!src_rgb565 || !dst_y || !dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -1246,36 +1253,44 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
#endif // HAS_ARGBTOUVROW_SSSE3
|
||||
#endif // HAS_RGB565TOYROW_NEON
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if defined(HAS_RGB565TOYROW_NEON)
|
||||
RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
|
||||
RGB565ToYRow(src_rgb565, dst_y, width);
|
||||
RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
|
||||
#else
|
||||
RGB565ToARGBRow(src_rgb565, row, width);
|
||||
RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width);
|
||||
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
src_rgb565 += src_stride_rgb565 * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
#if defined(HAS_RGB565TOYROW_NEON)
|
||||
RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
|
||||
RGB565ToYRow(src_rgb565, dst_y, width);
|
||||
#else
|
||||
RGB565ToARGBRow(src_rgb565, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
{
|
||||
#if !defined(HAS_RGB565TOYROW_NEON)
|
||||
free_aligned_buffer_64(row);
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 15) & ~15;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if defined(HAS_RGB565TOYROW_NEON)
|
||||
RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
|
||||
RGB565ToYRow(src_rgb565, dst_y, width);
|
||||
RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
|
||||
#else
|
||||
RGB565ToARGBRow(src_rgb565, row, width);
|
||||
RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width);
|
||||
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
src_rgb565 += src_stride_rgb565 * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
#if defined(HAS_RGB565TOYROW_NEON)
|
||||
RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
|
||||
RGB565ToYRow(src_rgb565, dst_y, width);
|
||||
#else
|
||||
RGB565ToARGBRow(src_rgb565, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
#if !defined(HAS_RGB565TOYROW_NEON)
|
||||
free_aligned_buffer_64(row);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1299,9 +1314,6 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGBToYRow_C;
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 15) & ~15;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
#endif
|
||||
if (!src_argb1555 || !dst_y || !dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -1358,38 +1370,45 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
#endif // HAS_ARGBTOUVROW_SSSE3
|
||||
#endif // HAS_ARGB1555TOYROW_NEON
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if defined(HAS_ARGB1555TOYROW_NEON)
|
||||
ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
|
||||
ARGB1555ToYRow(src_argb1555, dst_y, width);
|
||||
ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
|
||||
width);
|
||||
#else
|
||||
ARGB1555ToARGBRow(src_argb1555, row, width);
|
||||
ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize,
|
||||
width);
|
||||
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
|
||||
{
|
||||
#if !defined(HAS_ARGB1555TOYROW_NEON)
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 15) & ~15;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
#endif
|
||||
src_argb1555 += src_stride_argb1555 * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if defined(HAS_ARGB1555TOYROW_NEON)
|
||||
ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
|
||||
ARGB1555ToYRow(src_argb1555, dst_y, width);
|
||||
ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
|
||||
ARGB1555ToYRow(src_argb1555, dst_y, width);
|
||||
ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
|
||||
width);
|
||||
#else
|
||||
ARGB1555ToARGBRow(src_argb1555, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGB1555ToARGBRow(src_argb1555, row, width);
|
||||
ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize,
|
||||
width);
|
||||
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
}
|
||||
src_argb1555 += src_stride_argb1555 * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
#if defined(HAS_ARGB1555TOYROW_NEON)
|
||||
ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
|
||||
ARGB1555ToYRow(src_argb1555, dst_y, width);
|
||||
#else
|
||||
ARGB1555ToARGBRow(src_argb1555, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
#if !defined(HAS_ARGB1555TOYROW_NEON)
|
||||
free_aligned_buffer_64(row);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1413,9 +1432,6 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGBToYRow_C;
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 15) & ~15;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
#endif
|
||||
if (!src_argb4444 || !dst_y || !dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -1472,38 +1488,46 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
#endif // HAS_ARGBTOUVROW_SSSE3
|
||||
#endif // HAS_ARGB4444TOYROW_NEON
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if defined(HAS_ARGB4444TOYROW_NEON)
|
||||
ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width);
|
||||
ARGB4444ToYRow(src_argb4444, dst_y, width);
|
||||
ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y,
|
||||
width);
|
||||
#else
|
||||
ARGB4444ToARGBRow(src_argb4444, row, width);
|
||||
ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize,
|
||||
width);
|
||||
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
src_argb4444 += src_stride_argb4444 * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
#if defined(HAS_ARGB4444TOYROW_NEON)
|
||||
ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width);
|
||||
ARGB4444ToYRow(src_argb4444, dst_y, width);
|
||||
#else
|
||||
ARGB4444ToARGBRow(src_argb4444, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
{
|
||||
#if !defined(HAS_ARGB4444TOYROW_NEON)
|
||||
free_aligned_buffer_64(row);
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 15) & ~15;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if defined(HAS_ARGB4444TOYROW_NEON)
|
||||
ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width);
|
||||
ARGB4444ToYRow(src_argb4444, dst_y, width);
|
||||
ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y,
|
||||
width);
|
||||
#else
|
||||
ARGB4444ToARGBRow(src_argb4444, row, width);
|
||||
ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize,
|
||||
width);
|
||||
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
src_argb4444 += src_stride_argb4444 * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
#if defined(HAS_ARGB4444TOYROW_NEON)
|
||||
ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width);
|
||||
ARGB4444ToYRow(src_argb4444, dst_y, width);
|
||||
#else
|
||||
ARGB4444ToARGBRow(src_argb4444, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
#if !defined(HAS_ARGB4444TOYROW_NEON)
|
||||
free_aligned_buffer_64(row);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
168
third_party/libyuv/source/convert_from_argb.cc
vendored
168
third_party/libyuv/source/convert_from_argb.cc
vendored
@ -60,6 +60,13 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_ARGBTOUV444ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToUV444Row = ARGBToUV444Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
||||
@ -76,10 +83,8 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
|
||||
#elif defined(HAS_ARGBTOYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
||||
ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
ARGBToUV444Row = ARGBToUV444Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -134,6 +139,13 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_ARGBTOUV422ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
||||
@ -153,12 +165,6 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -228,11 +234,13 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 32) {
|
||||
ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToUV411Row = ARGBToUV411Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUV411ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 32) {
|
||||
ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToUV411Row = ARGBToUV411Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -261,9 +269,6 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
|
||||
ARGBToYRow_C;
|
||||
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
int width) = MergeUVRow_C;
|
||||
// Allocate a rows of uv.
|
||||
align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
|
||||
uint8* row_v = row_u + ((halfwidth + 15) & ~15);
|
||||
if (!src_argb ||
|
||||
!dst_y || !dst_uv ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -296,11 +301,13 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -331,22 +338,27 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
{
|
||||
// Allocate a rows of uv.
|
||||
align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
|
||||
uint8* row_v = row_u + ((halfwidth + 15) & ~15);
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
|
||||
MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
|
||||
ARGBToYRow(src_argb, dst_y, width);
|
||||
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
|
||||
src_argb += src_stride_argb * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_uv += dst_stride_uv;
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
|
||||
MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
|
||||
ARGBToYRow(src_argb, dst_y, width);
|
||||
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
|
||||
src_argb += src_stride_argb * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_uv += dst_stride_uv;
|
||||
}
|
||||
if (height & 1) {
|
||||
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
|
||||
MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
|
||||
ARGBToYRow(src_argb, dst_y, width);
|
||||
}
|
||||
free_aligned_buffer_64(row_u);
|
||||
}
|
||||
if (height & 1) {
|
||||
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
|
||||
MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
|
||||
ARGBToYRow(src_argb, dst_y, width);
|
||||
}
|
||||
free_aligned_buffer_64(row_u);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -364,9 +376,6 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
|
||||
ARGBToYRow_C;
|
||||
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
int width) = MergeUVRow_C;
|
||||
// Allocate a rows of uv.
|
||||
align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
|
||||
uint8* row_v = row_u + ((halfwidth + 15) & ~15);
|
||||
if (!src_argb ||
|
||||
!dst_y || !dst_uv ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -399,11 +408,13 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -434,22 +445,27 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
{
|
||||
// Allocate a rows of uv.
|
||||
align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
|
||||
uint8* row_v = row_u + ((halfwidth + 15) & ~15);
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
|
||||
MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
|
||||
ARGBToYRow(src_argb, dst_y, width);
|
||||
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
|
||||
src_argb += src_stride_argb * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_uv += dst_stride_uv;
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
|
||||
MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
|
||||
ARGBToYRow(src_argb, dst_y, width);
|
||||
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
|
||||
src_argb += src_stride_argb * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_uv += dst_stride_uv;
|
||||
}
|
||||
if (height & 1) {
|
||||
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
|
||||
MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
|
||||
ARGBToYRow(src_argb, dst_y, width);
|
||||
}
|
||||
free_aligned_buffer_64(row_u);
|
||||
}
|
||||
if (height & 1) {
|
||||
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
|
||||
MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
|
||||
ARGBToYRow(src_argb, dst_y, width);
|
||||
}
|
||||
free_aligned_buffer_64(row_u);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -493,6 +509,13 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_ARGBTOUV422ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
||||
@ -510,12 +533,6 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -594,6 +611,13 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_ARGBTOUV422ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
||||
@ -611,12 +635,6 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1022,11 +1040,13 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYJRow = ARGBToYJRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVJRow = ARGBToUVJRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVJROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
|
||||
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVJRow = ARGBToUVJRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
11
third_party/libyuv/source/cpu_id.cc
vendored
11
third_party/libyuv/source/cpu_id.cc
vendored
@ -14,7 +14,7 @@
|
||||
#include <intrin.h> // For __cpuidex()
|
||||
#endif
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
|
||||
!defined(__native_client__) && defined(_M_X64) && \
|
||||
!defined(__native_client__) && \
|
||||
defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
#include <immintrin.h> // For _xgetbv()
|
||||
#endif
|
||||
@ -97,7 +97,7 @@ int TestOsSaveYmm() {
|
||||
uint32 xcr0 = 0u;
|
||||
#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
|
||||
#elif defined(_M_IX86)
|
||||
#elif defined(_M_IX86) && defined(_MSC_VER)
|
||||
__asm {
|
||||
xor ecx, ecx // xcr 0
|
||||
_asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
|
||||
@ -256,12 +256,17 @@ int InitCpuFlags(void) {
|
||||
if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) {
|
||||
cpu_info_ &= ~kCpuHasMIPS_DSPR2;
|
||||
}
|
||||
#elif defined(__arm__)
|
||||
#elif defined(__arm__) || defined(__aarch64__)
|
||||
// gcc -mfpu=neon defines __ARM_NEON__
|
||||
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
|
||||
// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
|
||||
#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
|
||||
cpu_info_ = kCpuHasNEON;
|
||||
// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
|
||||
// flag in it.
|
||||
// So for aarch64, neon enabling is hard coded here.
|
||||
#elif defined(__aarch64__)
|
||||
cpu_info_ = kCpuHasNEON;
|
||||
#else
|
||||
// Linux arm parse text file for neon detect.
|
||||
cpu_info_ = ArmCpuCaps("/proc/cpuinfo");
|
||||
|
12
third_party/libyuv/source/format_conversion.cc
vendored
12
third_party/libyuv/source/format_conversion.cc
vendored
@ -332,11 +332,13 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
10
third_party/libyuv/source/mjpeg_decoder.cc
vendored
10
third_party/libyuv/source/mjpeg_decoder.cc
vendored
@ -13,8 +13,8 @@
|
||||
#ifdef HAVE_JPEG
|
||||
#include <assert.h>
|
||||
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && !defined(COVERAGE_ENABLED) &&\
|
||||
!defined(TARGET_IPHONE_SIMULATOR)
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
|
||||
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
||||
// Must be included before jpeglib.
|
||||
#include <setjmp.h>
|
||||
#define HAVE_SETJMP
|
||||
@ -101,7 +101,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
|
||||
}
|
||||
|
||||
buf_.data = src;
|
||||
buf_.len = (int)(src_len);
|
||||
buf_.len = static_cast<int>(src_len);
|
||||
buf_vec_.pos = 0;
|
||||
decompress_struct_->client_data = &buf_vec_;
|
||||
#ifdef HAVE_SETJMP
|
||||
@ -411,7 +411,7 @@ void init_source(j_decompress_ptr cinfo) {
|
||||
}
|
||||
|
||||
boolean fill_input_buffer(j_decompress_ptr cinfo) {
|
||||
BufferVector* buf_vec = (BufferVector*)(cinfo->client_data);
|
||||
BufferVector* buf_vec = reinterpret_cast<BufferVector*>(cinfo->client_data);
|
||||
if (buf_vec->pos >= buf_vec->len) {
|
||||
assert(0 && "No more data");
|
||||
// ERROR: No more data
|
||||
@ -447,7 +447,7 @@ void ErrorHandler(j_common_ptr cinfo) {
|
||||
// ERROR: Error in jpeglib: buf
|
||||
#endif
|
||||
|
||||
SetJmpErrorMgr* mgr = (SetJmpErrorMgr*)(cinfo->err);
|
||||
SetJmpErrorMgr* mgr = reinterpret_cast<SetJmpErrorMgr*>(cinfo->err);
|
||||
// This rewinds the call stack to the point of the corresponding setjmp()
|
||||
// and causes it to return (for a second time) with value 1.
|
||||
longjmp(mgr->setjmp_buffer, 1);
|
||||
|
26
third_party/libyuv/source/row_any.cc
vendored
26
third_party/libyuv/source/row_any.cc
vendored
@ -79,9 +79,13 @@ YANY(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, I422ToARGB4444Row_C,
|
||||
YANY(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, I422ToARGB1555Row_C,
|
||||
1, 2, 7)
|
||||
YANY(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, I422ToRGB565Row_C, 1, 2, 7)
|
||||
YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15)
|
||||
YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
|
||||
#endif // HAS_I422TOARGBROW_NEON
|
||||
#ifdef HAS_I422TOYUY2ROW_NEON
|
||||
YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15)
|
||||
#endif // HAS_I422TOYUY2ROW_NEON
|
||||
#ifdef HAS_I422TOUYVYROW_NEON
|
||||
YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
|
||||
#endif // HAS_I422TOUYVYROW_NEON
|
||||
#undef YANY
|
||||
|
||||
// Wrappers to handle odd width
|
||||
@ -250,12 +254,26 @@ YANY(RAWToYRow_Any_NEON, RAWToYRow_NEON, 3, 1, 8)
|
||||
YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8)
|
||||
YANY(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 2, 1, 8)
|
||||
YANY(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 2, 1, 8)
|
||||
#endif
|
||||
#ifdef HAS_YUY2TOYROW_NEON
|
||||
YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16)
|
||||
#endif
|
||||
#ifdef HAS_UYVYTOYROW_NEON
|
||||
YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOARGBROW_NEON
|
||||
YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOARGBROW_NEON
|
||||
YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8)
|
||||
#endif
|
||||
#ifdef HAS_RGB565TOARGBROW_NEON
|
||||
YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8)
|
||||
#endif
|
||||
#ifdef HAS_ARGB1555TOARGBROW_NEON
|
||||
YANY(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 2, 4, 8)
|
||||
#endif
|
||||
#ifdef HAS_ARGB4444TOARGBROW_NEON
|
||||
YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8)
|
||||
#endif
|
||||
#undef YANY
|
||||
@ -333,7 +351,11 @@ UVANY(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, RAWToUVRow_C, 3, 15)
|
||||
UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2, 15)
|
||||
UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2, 15)
|
||||
UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_YUY2TOUVROW_NEON
|
||||
UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_UYVYTOUVROW_NEON
|
||||
UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2, 15)
|
||||
#endif
|
||||
#undef UVANY
|
||||
|
437
third_party/libyuv/source/row_neon64.cc
vendored
437
third_party/libyuv/source/row_neon64.cc
vendored
@ -824,19 +824,19 @@ void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {q0, q1}, [%0]! \n" // load 16 pairs of UV
|
||||
"ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pairs of UV
|
||||
"subs %3, %3, #16 \n" // 16 processed per loop
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {q0}, [%1]! \n" // store U
|
||||
"st1 {v0.16b}, [%1], #16 \n" // store U
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {q1}, [%2]! \n" // store V
|
||||
"st1 {v1.16b}, [%2], #16 \n" // store V
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+r"(width) // %3 // Output registers
|
||||
: // Input registers
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
: "cc", "memory", "v0", "v1" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_SPLITUVROW_NEON
|
||||
@ -849,12 +849,12 @@ void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0]! \n" // load U
|
||||
"ld1 {v0.16b}, [%0], #16 \n" // load U
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {q1}, [%1]! \n" // load V
|
||||
"ld1 {v1.16b}, [%1], #16 \n" // load V
|
||||
"subs %3, %3, #16 \n" // 16 processed per loop
|
||||
MEMACCESS(2)
|
||||
"vst2.u8 {q0, q1}, [%2]! \n" // store 16 pairs of UV
|
||||
"st2 {v0.16b, v1.16b}, [%2], #32 \n" // store 16 pairs of UV
|
||||
"bgt 1b \n"
|
||||
:
|
||||
"+r"(src_u), // %0
|
||||
@ -862,7 +862,7 @@ void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
"+r"(dst_uv), // %2
|
||||
"+r"(width) // %3 // Output registers
|
||||
: // Input registers
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
: "cc", "memory", "v0", "v1" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_MERGEUVROW_NEON
|
||||
@ -874,16 +874,16 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32
|
||||
"ld1 {v0.8b-v3.8b}, [%0], #32 \n" // load 32
|
||||
"subs %2, %2, #32 \n" // 32 processed per loop
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d0, d1, d2, d3}, [%1]! \n" // store 32
|
||||
"st1 {v0.8b-v3.8b}, [%1], #32 \n" // store 32
|
||||
"bgt 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(count) // %2 // Output registers
|
||||
: // Input registers
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_COPYROW_NEON
|
||||
@ -892,16 +892,16 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
|
||||
#ifdef HAS_SETROW_NEON
|
||||
void SetRow_NEON(uint8* dst, uint32 v32, int count) {
|
||||
asm volatile (
|
||||
"vdup.u32 q0, %2 \n" // duplicate 4 ints
|
||||
"dup v0.4s, %w2 \n" // duplicate 4 ints
|
||||
"1: \n"
|
||||
"subs %1, %1, #16 \n" // 16 bytes per loop
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {q0}, [%0]! \n" // store
|
||||
"st1 {v0.16b}, [%0], #16 \n" // store
|
||||
"bgt 1b \n"
|
||||
: "+r"(dst), // %0
|
||||
"+r"(count) // %1
|
||||
: "r"(v32) // %2
|
||||
: "cc", "memory", "q0"
|
||||
: "cc", "memory", "v0"
|
||||
);
|
||||
}
|
||||
#endif // HAS_SETROW_NEON
|
||||
@ -922,26 +922,25 @@ void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
|
||||
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
||||
asm volatile (
|
||||
// Start at end of source row.
|
||||
"mov r3, #-16 \n"
|
||||
"add %0, %0, %2 \n"
|
||||
"sub %0, #16 \n"
|
||||
"sub %0, %0, #16 \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0], r3 \n" // src -= 16
|
||||
"subs %2, #16 \n" // 16 pixels per loop.
|
||||
"vrev64.8 q0, q0 \n"
|
||||
"ld1 {v0.16b}, [%0], %3 \n" // src -= 16
|
||||
"subs %2, %2, #16 \n" // 16 pixels per loop.
|
||||
"rev64 v0.16b, v0.16b \n"
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d1}, [%1]! \n" // dst += 16
|
||||
"st1 {v0.D}[1], [%1], #8 \n" // dst += 16
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d0}, [%1]! \n"
|
||||
"st1 {v0.D}[0], [%1], #8 \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "r3", "q0"
|
||||
: "r"((ptrdiff_t)-16) // %3
|
||||
: "cc", "memory", "v0"
|
||||
);
|
||||
}
|
||||
#endif // HAS_MIRRORROW_NEON
|
||||
@ -951,27 +950,27 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
// Start at end of source row.
|
||||
"mov r12, #-16 \n"
|
||||
"add %0, %0, %3, lsl #1 \n"
|
||||
"sub %0, #16 \n"
|
||||
"sub %0, %0, #16 \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d0, d1}, [%0], r12 \n" // src -= 16
|
||||
"subs %3, #8 \n" // 8 pixels per loop.
|
||||
"vrev64.8 q0, q0 \n"
|
||||
"ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16
|
||||
"subs %3, %3, #8 \n" // 8 pixels per loop.
|
||||
"rev64 v0.8b, v0.8b \n"
|
||||
"rev64 v1.8b, v1.8b \n"
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d0}, [%1]! \n" // dst += 8
|
||||
"st1 {v0.8b}, [%1], #8 \n" // dst += 8
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {d1}, [%2]! \n"
|
||||
"st1 {v1.8b}, [%2], #8 \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "r12", "q0"
|
||||
: "r"((ptrdiff_t)-16) // %4
|
||||
: "cc", "memory", "v0", "v1"
|
||||
);
|
||||
}
|
||||
#endif // HAS_MIRRORUVROW_NEON
|
||||
@ -980,26 +979,25 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
||||
asm volatile (
|
||||
// Start at end of source row.
|
||||
"mov r3, #-16 \n"
|
||||
"add %0, %0, %2, lsl #2 \n"
|
||||
"sub %0, #16 \n"
|
||||
"sub %0, %0, #16 \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0], r3 \n" // src -= 16
|
||||
"subs %2, #4 \n" // 4 pixels per loop.
|
||||
"vrev64.32 q0, q0 \n"
|
||||
"ld1 {v0.16b}, [%0], %3 \n" // src -= 16
|
||||
"subs %2, %2, #4 \n" // 4 pixels per loop.
|
||||
"rev64 v0.4s, v0.4s \n"
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d1}, [%1]! \n" // dst += 16
|
||||
"st1 {v0.D}[1], [%1], #8 \n" // dst += 16
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d0}, [%1]! \n"
|
||||
"st1 {v0.D}[0], [%1], #8 \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "r3", "q0"
|
||||
: "r"((ptrdiff_t)-16) // %3
|
||||
: "cc", "memory", "v0"
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBMIRRORROW_NEON
|
||||
@ -1007,20 +1005,20 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
||||
#ifdef HAS_RGB24TOARGBROW_NEON
|
||||
void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
|
||||
asm volatile (
|
||||
"vmov.u8 d4, #255 \n" // Alpha
|
||||
"movi v4.8b, #255 \n" // Alpha
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RGB24.
|
||||
"ld3 {v1.8b-v3.8b}, [%0], #24 \n" // load 8 pixels of RGB24.
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop.
|
||||
MEMACCESS(1)
|
||||
"vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB.
|
||||
"st4 {v1.8b-v4.8b}, [%1], #32 \n" // store 8 pixels of ARGB.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_rgb24), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(pix) // %2
|
||||
:
|
||||
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
|
||||
: "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_RGB24TOARGBROW_NEON
|
||||
@ -1028,21 +1026,22 @@ void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
|
||||
#ifdef HAS_RAWTOARGBROW_NEON
|
||||
void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
|
||||
asm volatile (
|
||||
"vmov.u8 d4, #255 \n" // Alpha
|
||||
"movi v5.8b, #255 \n" // Alpha
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW.
|
||||
"ld3 {v0.8b-v2.8b}, [%0], #24 \n" // read r g b
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop.
|
||||
"vswp.u8 d1, d3 \n" // swap R, B
|
||||
"mov v3.8b, v1.8b \n" // move g
|
||||
"mov v4.8b, v0.8b \n" // move r
|
||||
MEMACCESS(1)
|
||||
"vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB.
|
||||
"st4 {v2.8b-v5.8b}, [%1], #32 \n" // store b g r a
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_raw), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(pix) // %2
|
||||
:
|
||||
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_RAWTOARGBROW_NEON
|
||||
@ -1170,16 +1169,16 @@ void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) {
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
|
||||
"ld4 {v1.8b-v4.8b}, [%0], #32 \n" // load 8 pixels of ARGB.
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop.
|
||||
MEMACCESS(1)
|
||||
"vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RGB24.
|
||||
"st3 {v1.8b-v3.8b}, [%1], #24 \n" // store 8 pixels of RGB24.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_rgb24), // %1
|
||||
"+r"(pix) // %2
|
||||
:
|
||||
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
|
||||
: "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBTORGB24ROW_NEON
|
||||
@ -1190,17 +1189,18 @@ void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
|
||||
"ld4 {v1.8b-v4.8b}, [%0], #32 \n" // load b g r a
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop.
|
||||
"vswp.u8 d1, d3 \n" // swap R, B
|
||||
"mov v4.8b, v2.8b \n" // mov g
|
||||
"mov v5.8b, v1.8b \n" // mov b
|
||||
MEMACCESS(1)
|
||||
"vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RAW.
|
||||
"st3 {v3.8b-v5.8b}, [%1], #24 \n" // store r g b
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_raw), // %1
|
||||
"+r"(pix) // %2
|
||||
:
|
||||
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
|
||||
: "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBTORAWROW_NEON
|
||||
@ -1211,16 +1211,16 @@ void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2.
|
||||
"ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pixels of YUY2.
|
||||
"subs %2, %2, #16 \n" // 16 processed per loop.
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {q0}, [%1]! \n" // store 16 pixels of Y.
|
||||
"st1 {v0.16b}, [%1], #16 \n" // store 16 pixels of Y.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_yuy2), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(pix) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
: "cc", "memory", "v0", "v1" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_YUY2TOYROW_NEON
|
||||
@ -1231,16 +1231,16 @@ void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY.
|
||||
"ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY.
|
||||
"subs %2, %2, #16 \n" // 16 processed per loop.
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {q1}, [%1]! \n" // store 16 pixels of Y.
|
||||
"st1 {v1.16b}, [%1], #16 \n" // store 16 pixels of Y.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_uyvy), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(pix) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
: "cc", "memory", "v0", "v1" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_UYVYTOYROW_NEON
|
||||
@ -1252,19 +1252,19 @@ void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
|
||||
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of YUY2.
|
||||
"subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d1}, [%1]! \n" // store 8 U.
|
||||
"st1 {v1.8b}, [%1], #8 \n" // store 8 U.
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {d3}, [%2]! \n" // store 8 V.
|
||||
"st1 {v3.8b}, [%2], #8 \n" // store 8 V.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_yuy2), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+r"(pix) // %3
|
||||
:
|
||||
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_YUY2TOUV422ROW_NEON
|
||||
@ -1276,19 +1276,19 @@ void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
|
||||
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of UYVY.
|
||||
"subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d0}, [%1]! \n" // store 8 U.
|
||||
"st1 {v0.8b}, [%1], #8 \n" // store 8 U.
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {d2}, [%2]! \n" // store 8 V.
|
||||
"st1 {v2.8b}, [%2], #8 \n" // store 8 V.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_uyvy), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+r"(pix) // %3
|
||||
:
|
||||
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_UYVYTOUV422ROW_NEON
|
||||
@ -1297,20 +1297,20 @@ void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
|
||||
void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // stride + src_yuy2
|
||||
"add %x1, %x0, %w1, sxtw \n" // stride + src_yuy2
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
|
||||
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of YUY2.
|
||||
"subs %4, %4, #16 \n" // 16 pixels = 8 UVs.
|
||||
MEMACCESS(1)
|
||||
"vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row YUY2.
|
||||
"vrhadd.u8 d1, d1, d5 \n" // average rows of U
|
||||
"vrhadd.u8 d3, d3, d7 \n" // average rows of V
|
||||
"ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load next row YUY2.
|
||||
"urhadd v1.8b, v1.8b, v5.8b \n" // average rows of U
|
||||
"urhadd v3.8b, v3.8b, v7.8b \n" // average rows of V
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {d1}, [%2]! \n" // store 8 U.
|
||||
"st1 {v1.8b}, [%2], #8 \n" // store 8 U.
|
||||
MEMACCESS(3)
|
||||
"vst1.8 {d3}, [%3]! \n" // store 8 V.
|
||||
"st1 {v3.8b}, [%3], #8 \n" // store 8 V.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_yuy2), // %0
|
||||
"+r"(stride_yuy2), // %1
|
||||
@ -1318,7 +1318,7 @@ void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
|
||||
"+r"(dst_v), // %3
|
||||
"+r"(pix) // %4
|
||||
:
|
||||
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_YUY2TOUVROW_NEON
|
||||
@ -1327,20 +1327,20 @@ void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
|
||||
void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // stride + src_uyvy
|
||||
"add %x1, %x0, %w1, sxtw \n" // stride + src_uyvy
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
|
||||
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of UYVY.
|
||||
"subs %4, %4, #16 \n" // 16 pixels = 8 UVs.
|
||||
MEMACCESS(1)
|
||||
"vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row UYVY.
|
||||
"vrhadd.u8 d0, d0, d4 \n" // average rows of U
|
||||
"vrhadd.u8 d2, d2, d6 \n" // average rows of V
|
||||
"ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load next row UYVY.
|
||||
"urhadd v0.8b, v0.8b, v4.8b \n" // average rows of U
|
||||
"urhadd v2.8b, v2.8b, v6.8b \n" // average rows of V
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 U.
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 U.
|
||||
MEMACCESS(3)
|
||||
"vst1.8 {d2}, [%3]! \n" // store 8 V.
|
||||
"st1 {v2.8b}, [%3], #8 \n" // store 8 V.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_uyvy), // %0
|
||||
"+r"(stride_uyvy), // %1
|
||||
@ -1348,7 +1348,7 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
|
||||
"+r"(dst_v), // %3
|
||||
"+r"(pix) // %4
|
||||
:
|
||||
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_UYVYTOUVROW_NEON
|
||||
@ -1358,23 +1358,23 @@ void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
|
||||
uint8* dst_uv, int pix) {
|
||||
asm volatile (
|
||||
// change the stride to row 2 pointer
|
||||
"add %1, %0 \n"
|
||||
"add %x1, %x0, %w1, sxtw \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0]! \n" // load row 1 16 pixels.
|
||||
"ld1 {v0.16b}, [%0], #16 \n" // load row 1 16 pixels.
|
||||
"subs %3, %3, #16 \n" // 16 processed per loop
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {q1}, [%1]! \n" // load row 2 16 pixels.
|
||||
"vrhadd.u8 q0, q1 \n" // average row 1 and 2
|
||||
"ld1 {v1.16b}, [%1], #16 \n" // load row 2 16 pixels.
|
||||
"urhadd v0.16b, v0.16b, v1.16b \n" // average row 1 and 2
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {q0}, [%2]! \n"
|
||||
"st1 {v0.16b}, [%2], #16 \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(src_uv_stride), // %1
|
||||
"+r"(dst_uv), // %2
|
||||
"+r"(pix) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
: "cc", "memory", "v0", "v1" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_HALFROW_NEON
|
||||
@ -1384,22 +1384,22 @@ void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
|
||||
void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
|
||||
uint32 selector, int pix) {
|
||||
asm volatile (
|
||||
"vmov.u32 d6[0], %3 \n" // selector
|
||||
"mov v2.s[0], %w3 \n" // selector
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0, q1}, [%0]! \n" // load row 8 pixels.
|
||||
"ld1 {v0.16b, v1.16b}, [%0], 32 \n" // load row 8 pixels.
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop
|
||||
"vtbl.8 d4, {d0, d1}, d6 \n" // look up 4 pixels
|
||||
"vtbl.8 d5, {d2, d3}, d6 \n" // look up 4 pixels
|
||||
"vtrn.u32 d4, d5 \n" // combine 8 pixels
|
||||
"tbl v4.8b, {v0.16b}, v2.8b \n" // look up 4 pixels
|
||||
"tbl v5.8b, {v1.16b}, v2.8b \n" // look up 4 pixels
|
||||
"trn1 v4.4s, v4.4s, v5.4s \n" // combine 8 pixels
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d4}, [%1]! \n" // store 8.
|
||||
"st1 {v4.8b}, [%1], #8 \n" // store 8.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_bayer), // %1
|
||||
"+r"(pix) // %2
|
||||
: "r"(selector) // %3
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
|
||||
: "cc", "memory", "v0", "v1", "v2", "v4", "v5" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBTOBAYERROW_NEON
|
||||
@ -1411,16 +1411,16 @@ void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels.
|
||||
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load row 8 pixels.
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d1}, [%1]! \n" // store 8 G's.
|
||||
"st1 {v1.8b}, [%1], #8 \n" // store 8 G's.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_bayer), // %1
|
||||
"+r"(pix) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBTOBAYERGGROW_NEON
|
||||
@ -1431,21 +1431,20 @@ void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
||||
const uint8* shuffler, int pix) {
|
||||
asm volatile (
|
||||
MEMACCESS(3)
|
||||
"vld1.8 {q2}, [%3] \n" // shuffler
|
||||
"ld1 {v2.16b}, [%3] \n" // shuffler
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0]! \n" // load 4 pixels.
|
||||
"ld1 {v0.16b}, [%0], #16 \n" // load 4 pixels.
|
||||
"subs %2, %2, #4 \n" // 4 processed per loop
|
||||
"vtbl.8 d2, {d0, d1}, d4 \n" // look up 2 first pixels
|
||||
"vtbl.8 d3, {d0, d1}, d5 \n" // look up 2 next pixels
|
||||
"tbl v1.16b, {v0.16b}, v2.16b \n" // look up 4 pixels
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {q1}, [%1]! \n" // store 4.
|
||||
"st1 {v1.16b}, [%1], #16 \n" // store 4.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(pix) // %2
|
||||
: "r"(shuffler) // %3
|
||||
: "cc", "memory", "q0", "q1", "q2" // Clobber List
|
||||
: "cc", "memory", "v0", "v1", "v2" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBSHUFFLEROW_NEON
|
||||
@ -1459,14 +1458,15 @@ void I422ToYUY2Row_NEON(const uint8* src_y,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d0, d2}, [%0]! \n" // load 16 Ys
|
||||
"ld2 {v0.8b, v1.8b}, [%0], #16 \n" // load 16 Ys
|
||||
"mov v2.8b, v1.8b \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d1}, [%1]! \n" // load 8 Us
|
||||
"ld1 {v1.8b}, [%1], #8 \n" // load 8 Us
|
||||
MEMACCESS(2)
|
||||
"vld1.8 {d3}, [%2]! \n" // load 8 Vs
|
||||
"ld1 {v3.8b}, [%2], #8 \n" // load 8 Vs
|
||||
"subs %4, %4, #16 \n" // 16 pixels
|
||||
MEMACCESS(3)
|
||||
"vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 YUY2/16 pixels.
|
||||
"st4 {v0.8b-v3.8b}, [%3], #32 \n" // Store 8 YUY2/16 pixels.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
@ -1474,7 +1474,7 @@ void I422ToYUY2Row_NEON(const uint8* src_y,
|
||||
"+r"(dst_yuy2), // %3
|
||||
"+r"(width) // %4
|
||||
:
|
||||
: "cc", "memory", "d0", "d1", "d2", "d3"
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I422TOYUY2ROW_NEON
|
||||
@ -1488,14 +1488,15 @@ void I422ToUYVYRow_NEON(const uint8* src_y,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d1, d3}, [%0]! \n" // load 16 Ys
|
||||
"ld2 {v1.8b, v2.8b}, [%0], #16 \n" // load 16 Ys
|
||||
"mov v3.8b, v2.8b \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0}, [%1]! \n" // load 8 Us
|
||||
"ld1 {v0.8b}, [%1], #8 \n" // load 8 Us
|
||||
MEMACCESS(2)
|
||||
"vld1.8 {d2}, [%2]! \n" // load 8 Vs
|
||||
"ld1 {v2.8b}, [%2], #8 \n" // load 8 Vs
|
||||
"subs %4, %4, #16 \n" // 16 pixels
|
||||
MEMACCESS(3)
|
||||
"vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 UYVY/16 pixels.
|
||||
"st4 {v0.8b-v3.8b}, [%3], #32 \n" // Store 8 UYVY/16 pixels.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
@ -1503,7 +1504,7 @@ void I422ToUYVYRow_NEON(const uint8* src_y,
|
||||
"+r"(dst_uyvy), // %3
|
||||
"+r"(width) // %4
|
||||
:
|
||||
: "cc", "memory", "d0", "d1", "d2", "d3"
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I422TOUYVYROW_NEON
|
||||
@ -1577,28 +1578,28 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
|
||||
#ifdef HAS_ARGBTOYROW_NEON
|
||||
void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
asm volatile (
|
||||
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
|
||||
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
|
||||
"vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
|
||||
"vmov.u8 d27, #16 \n" // Add 16 constant
|
||||
"movi v4.8b, #13 \n" // B * 0.1016 coefficient
|
||||
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
|
||||
"movi v6.8b, #33 \n" // R * 0.2578 coefficient
|
||||
"movi v7.8b, #16 \n" // Add 16 constant
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
|
||||
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop.
|
||||
"vmull.u8 q2, d0, d24 \n" // B
|
||||
"vmlal.u8 q2, d1, d25 \n" // G
|
||||
"vmlal.u8 q2, d2, d26 \n" // R
|
||||
"vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
|
||||
"vqadd.u8 d0, d27 \n"
|
||||
"umull v3.8h, v0.8b, v4.8b \n" // B
|
||||
"umlal v3.8h, v1.8b, v5.8b \n" // G
|
||||
"umlal v3.8h, v2.8b, v6.8b \n" // R
|
||||
"sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y
|
||||
"uqadd v0.8b, v0.8b, v7.8b \n"
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
|
||||
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(pix) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q12", "q13"
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBTOYROW_NEON
|
||||
@ -1606,26 +1607,26 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
#ifdef HAS_ARGBTOYJROW_NEON
|
||||
void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
asm volatile (
|
||||
"vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
|
||||
"vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
|
||||
"vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
|
||||
"movi v4.8b, #15 \n" // B * 0.11400 coefficient
|
||||
"movi v5.8b, #75 \n" // G * 0.58700 coefficient
|
||||
"movi v6.8b, #38 \n" // R * 0.29900 coefficient
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
|
||||
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop.
|
||||
"vmull.u8 q2, d0, d24 \n" // B
|
||||
"vmlal.u8 q2, d1, d25 \n" // G
|
||||
"vmlal.u8 q2, d2, d26 \n" // R
|
||||
"vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y
|
||||
"umull v3.8h, v0.8b, v4.8b \n" // B
|
||||
"umlal v3.8h, v1.8b, v5.8b \n" // G
|
||||
"umlal v3.8h, v2.8b, v6.8b \n" // R
|
||||
"sqrshrun v0.8b, v3.8h, #7 \n" // 15 bit to 8 bit Y
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
|
||||
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(pix) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q12", "q13"
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBTOYJROW_NEON
|
||||
@ -3048,20 +3049,20 @@ void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
||||
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
|
||||
MEMACCESS(1)
|
||||
"vld4.8 {d1, d3, d5, d7}, [%1]! \n" // load 8 more ARGB pixels.
|
||||
"ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels.
|
||||
"subs %3, %3, #8 \n" // 8 processed per loop.
|
||||
"vmull.u8 q0, d0, d1 \n" // multiply B
|
||||
"vmull.u8 q1, d2, d3 \n" // multiply G
|
||||
"vmull.u8 q2, d4, d5 \n" // multiply R
|
||||
"vmull.u8 q3, d6, d7 \n" // multiply A
|
||||
"vrshrn.u16 d0, q0, #8 \n" // 16 bit to 8 bit B
|
||||
"vrshrn.u16 d1, q1, #8 \n" // 16 bit to 8 bit G
|
||||
"vrshrn.u16 d2, q2, #8 \n" // 16 bit to 8 bit R
|
||||
"vrshrn.u16 d3, q3, #8 \n" // 16 bit to 8 bit A
|
||||
"umull v0.8h, v0.8b, v4.8b \n" // multiply B
|
||||
"umull v1.8h, v1.8b, v5.8b \n" // multiply G
|
||||
"umull v2.8h, v2.8b, v6.8b \n" // multiply R
|
||||
"umull v3.8h, v3.8b, v7.8b \n" // multiply A
|
||||
"rshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit B
|
||||
"rshrn v1.8b, v1.8h, #8 \n" // 16 bit to 8 bit G
|
||||
"rshrn v2.8b, v2.8h, #8 \n" // 16 bit to 8 bit R
|
||||
"rshrn v3.8b, v3.8h, #8 \n" // 16 bit to 8 bit A
|
||||
MEMACCESS(2)
|
||||
"vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
|
||||
"st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
|
||||
"bgt 1b \n"
|
||||
|
||||
: "+r"(src_argb0), // %0
|
||||
@ -3069,7 +3070,7 @@ void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3"
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBMULTIPLYROW_NEON
|
||||
@ -3083,14 +3084,16 @@ void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
|
||||
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
|
||||
MEMACCESS(1)
|
||||
"vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels.
|
||||
"ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels.
|
||||
"subs %3, %3, #8 \n" // 8 processed per loop.
|
||||
"vqadd.u8 q0, q0, q2 \n" // add B, G
|
||||
"vqadd.u8 q1, q1, q3 \n" // add R, A
|
||||
"uqadd v0.8b, v0.8b, v4.8b \n"
|
||||
"uqadd v1.8b, v1.8b, v5.8b \n"
|
||||
"uqadd v2.8b, v2.8b, v6.8b \n"
|
||||
"uqadd v3.8b, v3.8b, v7.8b \n"
|
||||
MEMACCESS(2)
|
||||
"vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
|
||||
"st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
|
||||
"bgt 1b \n"
|
||||
|
||||
: "+r"(src_argb0), // %0
|
||||
@ -3098,7 +3101,7 @@ void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3"
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBADDROW_NEON
|
||||
@ -3112,14 +3115,16 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
|
||||
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
|
||||
MEMACCESS(1)
|
||||
"vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels.
|
||||
"ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels.
|
||||
"subs %3, %3, #8 \n" // 8 processed per loop.
|
||||
"vqsub.u8 q0, q0, q2 \n" // subtract B, G
|
||||
"vqsub.u8 q1, q1, q3 \n" // subtract R, A
|
||||
"uqsub v0.8b, v0.8b, v4.8b \n"
|
||||
"uqsub v1.8b, v1.8b, v5.8b \n"
|
||||
"uqsub v2.8b, v2.8b, v6.8b \n"
|
||||
"uqsub v3.8b, v3.8b, v7.8b \n"
|
||||
MEMACCESS(2)
|
||||
"vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
|
||||
"st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
|
||||
"bgt 1b \n"
|
||||
|
||||
: "+r"(src_argb0), // %0
|
||||
@ -3127,7 +3132,7 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3"
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBSUBTRACTROW_NEON
|
||||
@ -3141,27 +3146,27 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_argb, int width) {
|
||||
asm volatile (
|
||||
"vmov.u8 d3, #255 \n" // alpha
|
||||
"movi v3.8b, #255 \n" // alpha
|
||||
// 8 pixel loop.
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d0}, [%0]! \n" // load 8 sobelx.
|
||||
"ld1 {v0.8b}, [%0], #8 \n" // load 8 sobelx.
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d1}, [%1]! \n" // load 8 sobely.
|
||||
"ld1 {v1.8b}, [%1], #8 \n" // load 8 sobely.
|
||||
"subs %3, %3, #8 \n" // 8 processed per loop.
|
||||
"vqadd.u8 d0, d0, d1 \n" // add
|
||||
"vmov.u8 d1, d0 \n"
|
||||
"vmov.u8 d2, d0 \n"
|
||||
"uqadd v0.8b, v0.8b, v1.8b \n" // add
|
||||
"mov v1.8b, v0.8b \n"
|
||||
"mov v2.8b, v0.8b \n"
|
||||
MEMACCESS(2)
|
||||
"vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
|
||||
"st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_sobelx), // %0
|
||||
"+r"(src_sobely), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1"
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3"
|
||||
);
|
||||
}
|
||||
#endif // HAS_SOBELROW_NEON
|
||||
@ -3175,20 +3180,20 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0]! \n" // load 16 sobelx.
|
||||
"ld1 {v0.16b}, [%0], #16 \n" // load 16 sobelx.
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {q1}, [%1]! \n" // load 16 sobely.
|
||||
"ld1 {v1.16b}, [%1], #16 \n" // load 16 sobely.
|
||||
"subs %3, %3, #16 \n" // 16 processed per loop.
|
||||
"vqadd.u8 q0, q0, q1 \n" // add
|
||||
"uqadd v0.16b, v0.16b, v1.16b \n" // add
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {q0}, [%2]! \n" // store 16 pixels.
|
||||
"st1 {v0.16b}, [%2], #16 \n" // store 16 pixels.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_sobelx), // %0
|
||||
"+r"(src_sobely), // %1
|
||||
"+r"(dst_y), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1"
|
||||
: "cc", "memory", "v0", "v1"
|
||||
);
|
||||
}
|
||||
#endif // HAS_SOBELTOPLANEROW_NEON
|
||||
@ -3202,25 +3207,25 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_argb, int width) {
|
||||
asm volatile (
|
||||
"vmov.u8 d3, #255 \n" // alpha
|
||||
"movi v3.8b, #255 \n" // alpha
|
||||
// 8 pixel loop.
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d2}, [%0]! \n" // load 8 sobelx.
|
||||
"ld1 {v2.8b}, [%0], #8 \n" // load 8 sobelx.
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0}, [%1]! \n" // load 8 sobely.
|
||||
"ld1 {v0.8b}, [%1], #8 \n" // load 8 sobely.
|
||||
"subs %3, %3, #8 \n" // 8 processed per loop.
|
||||
"vqadd.u8 d1, d0, d2 \n" // add
|
||||
"uqadd v1.8b, v0.8b, v2.8b \n" // add
|
||||
MEMACCESS(2)
|
||||
"vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
|
||||
"st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_sobelx), // %0
|
||||
"+r"(src_sobely), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1"
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3"
|
||||
);
|
||||
}
|
||||
#endif // HAS_SOBELXYROW_NEON
|
||||
@ -3236,28 +3241,28 @@ void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d0}, [%0],%5 \n" // top
|
||||
"ld1 {v0.8b}, [%0],%5 \n" // top
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d1}, [%0],%6 \n"
|
||||
"vsubl.u8 q0, d0, d1 \n"
|
||||
"ld1 {v1.8b}, [%0],%6 \n"
|
||||
"usubl v0.8h, v0.8b, v1.8b \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d2}, [%1],%5 \n" // center * 2
|
||||
"ld1 {v2.8b}, [%1],%5 \n" // center * 2
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d3}, [%1],%6 \n"
|
||||
"vsubl.u8 q1, d2, d3 \n"
|
||||
"vadd.s16 q0, q0, q1 \n"
|
||||
"vadd.s16 q0, q0, q1 \n"
|
||||
"ld1 {v3.8b}, [%1],%6 \n"
|
||||
"usubl v1.8h, v2.8b, v3.8b \n"
|
||||
"add v0.8h, v0.8h, v1.8h \n"
|
||||
"add v0.8h, v0.8h, v1.8h \n"
|
||||
MEMACCESS(2)
|
||||
"vld1.8 {d2}, [%2],%5 \n" // bottom
|
||||
"ld1 {v2.8b}, [%2],%5 \n" // bottom
|
||||
MEMACCESS(2)
|
||||
"vld1.8 {d3}, [%2],%6 \n"
|
||||
"ld1 {v3.8b}, [%2],%6 \n"
|
||||
"subs %4, %4, #8 \n" // 8 pixels
|
||||
"vsubl.u8 q1, d2, d3 \n"
|
||||
"vadd.s16 q0, q0, q1 \n"
|
||||
"vabs.s16 q0, q0 \n"
|
||||
"vqmovn.u16 d0, q0 \n"
|
||||
"usubl v1.8h, v2.8b, v3.8b \n"
|
||||
"add v0.8h, v0.8h, v1.8h \n"
|
||||
"abs v0.8h, v0.8h \n"
|
||||
"uqxtn v0.8b, v0.8h \n"
|
||||
MEMACCESS(3)
|
||||
"vst1.8 {d0}, [%3]! \n" // store 8 sobelx
|
||||
"st1 {v0.8b}, [%3], #8 \n" // store 8 sobelx
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y0), // %0
|
||||
"+r"(src_y1), // %1
|
||||
@ -3266,7 +3271,7 @@ void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
||||
"+r"(width) // %4
|
||||
: "r"(2), // %5
|
||||
"r"(6) // %6
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_SOBELXROW_NEON
|
||||
@ -3282,28 +3287,28 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d0}, [%0],%4 \n" // left
|
||||
"ld1 {v0.8b}, [%0],%4 \n" // left
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d1}, [%1],%4 \n"
|
||||
"vsubl.u8 q0, d0, d1 \n"
|
||||
"ld1 {v1.8b}, [%1],%4 \n"
|
||||
"usubl v0.8h, v0.8b, v1.8b \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d2}, [%0],%4 \n" // center * 2
|
||||
"ld1 {v2.8b}, [%0],%4 \n" // center * 2
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d3}, [%1],%4 \n"
|
||||
"vsubl.u8 q1, d2, d3 \n"
|
||||
"vadd.s16 q0, q0, q1 \n"
|
||||
"vadd.s16 q0, q0, q1 \n"
|
||||
"ld1 {v3.8b}, [%1],%4 \n"
|
||||
"usubl v1.8h, v2.8b, v3.8b \n"
|
||||
"add v0.8h, v0.8h, v1.8h \n"
|
||||
"add v0.8h, v0.8h, v1.8h \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d2}, [%0],%5 \n" // right
|
||||
"ld1 {v2.8b}, [%0],%5 \n" // right
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d3}, [%1],%5 \n"
|
||||
"ld1 {v3.8b}, [%1],%5 \n"
|
||||
"subs %3, %3, #8 \n" // 8 pixels
|
||||
"vsubl.u8 q1, d2, d3 \n"
|
||||
"vadd.s16 q0, q0, q1 \n"
|
||||
"vabs.s16 q0, q0 \n"
|
||||
"vqmovn.u16 d0, q0 \n"
|
||||
"usubl v1.8h, v2.8b, v3.8b \n"
|
||||
"add v0.8h, v0.8h, v1.8h \n"
|
||||
"abs v0.8h, v0.8h \n"
|
||||
"uqxtn v0.8b, v0.8h \n"
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 sobely
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 sobely
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y0), // %0
|
||||
"+r"(src_y1), // %1
|
||||
@ -3311,7 +3316,7 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
||||
"+r"(width) // %3
|
||||
: "r"(1), // %4
|
||||
"r"(6) // %5
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_SOBELYROW_NEON
|
||||
|
4
third_party/libyuv/source/row_win.cc
vendored
4
third_party/libyuv/source/row_win.cc
vendored
@ -10,7 +10,7 @@
|
||||
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#if defined (_M_X64)
|
||||
#if defined (_M_X64) && !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h> // For _mm_maddubs_epi16
|
||||
#endif
|
||||
@ -78,7 +78,6 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
|
||||
__m128i xmm0, xmm1, xmm2, xmm3;
|
||||
const __m128i xmm5 = _mm_set1_epi8(-1);
|
||||
const __m128i xmm4 = _mm_setzero_si128();
|
||||
@ -132,7 +131,6 @@ void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
|
||||
__m128i xmm0, xmm1, xmm2, xmm3;
|
||||
const __m128i xmm5 = _mm_set1_epi8(-1);
|
||||
const __m128i xmm4 = _mm_setzero_si128();
|
||||
|
Loading…
Reference in New Issue
Block a user