Align image buffer in multiple-resolution encoder

Aligned the image buffer and stride to 32 bytes. This enables
calling of optimized scaler function in libyuv, and improves
the performance.

Tested libyuv scaler(x86 optimization) on Linux and Windows,
including: Linux 32/64bit, visual studio 32/64bit, Cygwin, and
MinGW32.

Also, fixed a wrong pointer in vpx_codec_encode().

Change-Id: Ibe97d7a0a745f82c43852fa4ed719be5a4db6abc
This commit is contained in:
Yunqing Wang
2011-12-08 12:31:01 -05:00
parent 254889cdfc
commit 153eec46e0
9 changed files with 348 additions and 295 deletions

View File

@@ -1,6 +1,6 @@
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 90 Version: 102
License: BSD License: BSD
License File: LICENSE License File: LICENSE

View File

@@ -13,21 +13,12 @@
#include <stddef.h> // for NULL, size_t #include <stddef.h> // for NULL, size_t
#ifndef WIN32 #if !(defined(_MSC_VER) && (_MSC_VER < 1600))
#include <stdint.h> // for uintptr_t #include <stdint.h> // for uintptr_t
#endif #endif
#ifndef INT_TYPES_DEFINED #ifndef INT_TYPES_DEFINED
#define INT_TYPES_DEFINED #define INT_TYPES_DEFINED
#ifdef COMPILER_MSVC
typedef __int64 int64;
#else
typedef long long int64;
#endif /* COMPILER_MSVC */
typedef int int32;
typedef short int16;
typedef char int8;
#ifdef COMPILER_MSVC #ifdef COMPILER_MSVC
typedef unsigned __int64 uint64; typedef unsigned __int64 uint64;
typedef __int64 int64; typedef __int64 int64;
@@ -38,9 +29,20 @@ typedef __int64 int64;
#define UINT64_C(x) x ## UI64 #define UINT64_C(x) x ## UI64
#endif #endif
#define INT64_F "I64" #define INT64_F "I64"
#else #else // COMPILER_MSVC
#ifdef __LP64__
typedef unsigned long uint64;
typedef long int64;
#ifndef INT64_C
#define INT64_C(x) x ## L
#endif
#ifndef UINT64_C
#define UINT64_C(x) x ## UL
#endif
#define INT64_F "l"
#else // __LP64__
typedef unsigned long long uint64; typedef unsigned long long uint64;
//typedef long long int64; typedef long long int64;
#ifndef INT64_C #ifndef INT64_C
#define INT64_C(x) x ## LL #define INT64_C(x) x ## LL
#endif #endif
@@ -48,10 +50,14 @@ typedef unsigned long long uint64;
#define UINT64_C(x) x ## ULL #define UINT64_C(x) x ## ULL
#endif #endif
#define INT64_F "ll" #define INT64_F "ll"
#endif /* COMPILER_MSVC */ #endif // __LP64__
#endif // COMPILER_MSVC
typedef unsigned int uint32; typedef unsigned int uint32;
typedef int int32;
typedef unsigned short uint16; typedef unsigned short uint16;
typedef short int16;
typedef unsigned char uint8; typedef unsigned char uint8;
typedef char int8;
#endif // INT_TYPES_DEFINED #endif // INT_TYPES_DEFINED
// Detect compiler is for x86 or x64. // Detect compiler is for x86 or x64.
@@ -60,7 +66,6 @@ typedef unsigned char uint8;
#define CPU_X86 1 #define CPU_X86 1
#endif #endif
#define IS_ALIGNED(p, a) (0==((uintptr_t)(p) & ((a)-1)))
#define ALIGNP(p, t) \ #define ALIGNP(p, t) \
((uint8*)((((uintptr_t)(p) + \ ((uint8*)((((uintptr_t)(p) + \
((t)-1)) & ~((t)-1)))) ((t)-1)) & ~((t)-1))))

View File

@@ -11,21 +11,39 @@
#ifndef INCLUDE_LIBYUV_CPU_ID_H_ #ifndef INCLUDE_LIBYUV_CPU_ID_H_
#define INCLUDE_LIBYUV_CPU_ID_H_ #define INCLUDE_LIBYUV_CPU_ID_H_
//namespace libyuv { #ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// These flags are only valid on x86 processors // These flags are only valid on x86 processors
static const int kCpuHasSSE2 = 1; static const int kCpuHasSSE2 = 1;
static const int kCpuHasSSSE3 = 2; static const int kCpuHasSSSE3 = 2;
// SIMD support on ARM processors // These flags are only valid on ARM processors
static const int kCpuHasNEON = 4; static const int kCpuHasNEON = 4;
// Internal flag to indicate cpuid is initialized.
static const int kCpuInitialized = 8;
// Detect CPU has SSE2 etc. // Detect CPU has SSE2 etc.
int TestCpuFlag(int flag); // test_flag parameter should be one of kCpuHas constants above
// returns non-zero if instruction set is detected
static __inline int TestCpuFlag(int test_flag) {
extern int cpu_info_;
extern int InitCpuFlags();
return (cpu_info_ ? cpu_info_ : InitCpuFlags()) & test_flag;
}
// For testing, allow CPU flags to be disabled. // For testing, allow CPU flags to be disabled.
void MaskCpuFlagsForTest(int enable_flags); // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
// -1 to enable all cpu specific optimizations.
// 0 to disable all cpu specific optimizations.
void MaskCpuFlags(int enable_flags);
//} // namespace libyuv #ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CPU_ID_H_ #endif // INCLUDE_LIBYUV_CPU_ID_H_

View File

@@ -13,7 +13,10 @@
#include "third_party/libyuv/include/libyuv/basic_types.h" #include "third_party/libyuv/include/libyuv/basic_types.h"
//namespace libyuv { #ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Supported filtering // Supported filtering
typedef enum { typedef enum {
@@ -42,16 +45,8 @@ int I420Scale(const uint8* src_y, int src_stride_y,
int dst_width, int dst_height, int dst_width, int dst_height,
FilterMode filtering); FilterMode filtering);
// Legacy API // Legacy API. Deprecated
// If dst_height_offset is non-zero, the image is offset by that many pixels int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
// and stretched to (dst_height - dst_height_offset * 2) pixels high,
// instead of dst_height.
int Scale_1(const uint8* src, int src_width, int src_height,
uint8* dst, int dst_width, int dst_height, int dst_height_offset,
int interpolate);
// Same, but specified src terms of each plane location and stride.
int Scale_2(const uint8* src_y, const uint8* src_u, const uint8* src_v,
int src_stride_y, int src_stride_u, int src_stride_v, int src_stride_y, int src_stride_u, int src_stride_v,
int src_width, int src_height, int src_width, int src_height,
uint8* dst_y, uint8* dst_u, uint8* dst_v, uint8* dst_y, uint8* dst_u, uint8* dst_v,
@@ -59,9 +54,17 @@ int Scale_2(const uint8* src_y, const uint8* src_u, const uint8* src_v,
int dst_width, int dst_height, int dst_width, int dst_height,
int interpolate); int interpolate);
// Legacy API. Deprecated
int ScaleOffset(const uint8* src, int src_width, int src_height,
uint8* dst, int dst_width, int dst_height, int dst_yoffset,
int interpolate);
// For testing, allow disabling of optimizations. // For testing, allow disabling of optimizations.
void SetUseReferenceImpl(int use); void SetUseReferenceImpl(int use);
//} // namespace libyuv #ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_H_ #endif // INCLUDE_LIBYUV_SCALE_H_

View File

@@ -9,66 +9,73 @@
*/ */
#include "third_party/libyuv/include/libyuv/cpu_id.h" #include "third_party/libyuv/include/libyuv/cpu_id.h"
#include "third_party/libyuv/include/libyuv/basic_types.h" // for CPU_X86
#ifdef _MSC_VER #ifdef _MSC_VER
#include <intrin.h> #include <intrin.h>
#endif #endif
#ifdef __ANDROID__
#include <cpu-features.h>
#endif
#include "third_party/libyuv/include/libyuv/basic_types.h" // for CPU_X86
// TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux. // TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux.
#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__) #if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
static inline void __cpuid(int cpu_info[4], int info_type) { static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile ( asm volatile (
"mov %%ebx, %%edi\n" "mov %%ebx, %%edi \n"
"cpuid\n" "cpuid \n"
"xchg %%edi, %%ebx\n" "xchg %%edi, %%ebx \n"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type) : "a"(info_type)
); );
} }
#elif defined(__i386__) || defined(__x86_64__) #elif defined(__i386__) || defined(__x86_64__)
static inline void __cpuid(int cpu_info[4], int info_type) { static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile ( asm volatile (
"cpuid\n" "cpuid \n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type) : "a"(info_type)
); );
} }
#endif #endif
//namespace libyuv { #ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// CPU detect function for SIMD instruction sets. // CPU detect function for SIMD instruction sets.
static int cpu_info_initialized_ = 0; int cpu_info_ = 0;
static int cpu_info_ = 0;
// Global lock for cpu initialization. int InitCpuFlags() {
static void InitCpuFlags() {
#ifdef CPU_X86 #ifdef CPU_X86
int cpu_info[4]; int cpu_info[4];
__cpuid(cpu_info, 1); __cpuid(cpu_info, 1);
cpu_info_ = (cpu_info[2] & 0x00000200 ? kCpuHasSSSE3 : 0) | cpu_info_ = (cpu_info[3] & 0x04000000 ? kCpuHasSSE2 : 0) |
(cpu_info[3] & 0x04000000 ? kCpuHasSSE2 : 0); (cpu_info[2] & 0x00000200 ? kCpuHasSSSE3 : 0) |
kCpuInitialized;
#elif defined(__ANDROID__) && defined(__ARM_NEON__)
uint64_t features = android_getCpuFeatures();
cpu_info_ = ((features & ANDROID_CPU_ARM_FEATURE_NEON) ? kCpuHasNEON : 0) |
kCpuInitialized;
#elif defined(__ARM_NEON__) #elif defined(__ARM_NEON__)
// gcc -mfpu=neon defines __ARM_NEON__ // gcc -mfpu=neon defines __ARM_NEON__
// if code is specifically built for Neon-only, enable the flag. // Enable Neon if you want support for Neon and Arm, and use MaskCpuFlags
cpu_info_ |= kCpuHasNEON; // to disable Neon on devices that do not have it.
cpu_info_ = kCpuHasNEON | kCpuInitialized;
#else #else
cpu_info_ = 0; cpu_info_ = kCpuInitialized;
#endif #endif
cpu_info_initialized_ = 1; return cpu_info_;
} }
void MaskCpuFlagsForTest(int enable_flags) { void MaskCpuFlags(int enable_flags) {
InitCpuFlags(); InitCpuFlags();
cpu_info_ &= enable_flags; cpu_info_ = (cpu_info_ & enable_flags) | kCpuInitialized;
} }
int TestCpuFlag(int flag) { #ifdef __cplusplus
if (!cpu_info_initialized_) { } // extern "C"
InitCpuFlags(); } // namespace libyuv
} #endif
return cpu_info_ & flag ? 1 : 0;
}
//} // namespace libyuv

View File

@@ -14,7 +14,7 @@
#include "third_party/libyuv/include/libyuv/basic_types.h" #include "third_party/libyuv/include/libyuv/basic_types.h"
#define kMaxStride (2048 * 4) #define kMaxStride (2048 * 4)
//#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1))) #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
#if defined(COVERAGE_ENABLED) || defined(TARGET_IPHONE_SIMULATOR) #if defined(COVERAGE_ENABLED) || defined(TARGET_IPHONE_SIMULATOR)
#define YUV_DISABLE_ASM #define YUV_DISABLE_ASM
@@ -72,7 +72,10 @@ void FastConvertYUVToABGRRow_NEON(const uint8* y_buf,
#define HAS_REVERSE_ROW_NEON #define HAS_REVERSE_ROW_NEON
#endif #endif
//extern "C" { #ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#ifdef HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYROW_SSSE3
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
@@ -253,6 +256,9 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
#endif #endif
//} // extern "C" #ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // LIBYUV_SOURCE_ROW_H_ #endif // LIBYUV_SOURCE_ROW_H_

View File

@@ -15,6 +15,17 @@
#include "third_party/libyuv/include/libyuv/cpu_id.h" #include "third_party/libyuv/include/libyuv/cpu_id.h"
#include "third_party/libyuv/source/row.h" #include "third_party/libyuv/source/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
/*
* Note: Defining YUV_DISABLE_ASM allows to use c version.
*/
//#define YUV_DISABLE_ASM
#if defined(_MSC_VER) #if defined(_MSC_VER)
#define ALIGN16(var) __declspec(align(16)) var #define ALIGN16(var) __declspec(align(16)) var
#else #else
@@ -26,8 +37,6 @@
// Note: Some SSE2 reference manuals // Note: Some SSE2 reference manuals
// cpuvol1.pdf agner_instruction_tables.pdf 253666.pdf 253667.pdf // cpuvol1.pdf agner_instruction_tables.pdf 253666.pdf 253667.pdf
//namespace libyuv {
// Set the following flag to true to revert to only // Set the following flag to true to revert to only
// using the reference implementation ScalePlaneBox(), and // using the reference implementation ScalePlaneBox(), and
// NOT the optimized versions. Useful for debugging and // NOT the optimized versions. Useful for debugging and
@@ -40,9 +49,7 @@ void SetUseReferenceImpl(int use) {
use_reference_impl_ = use; use_reference_impl_ = use;
} }
// TODO: The preprocessor definitions for Win64 are not right in build system. // ScaleRowDown2Int also used by planar functions
// Disable optimized code for now.
#define YUV_DISABLE_ASM
/** /**
* NEON downscalers with interpolation. * NEON downscalers with interpolation.
@@ -511,83 +518,116 @@ static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride,
!defined(YUV_DISABLE_ASM) !defined(YUV_DISABLE_ASM)
#if defined(_MSC_VER) #if defined(_MSC_VER)
#define TALIGN16(t, var) __declspec(align(16)) t _ ## var #define TALIGN16(t, var) __declspec(align(16)) t _ ## var
#elif defined(OSX) && defined(__i386__) #elif (defined(__APPLE__) || defined(__MINGW32__) || defined(__CYGWIN__)) && defined(__i386__)
#define TALIGN16(t, var) t var __attribute__((aligned(16))) #define TALIGN16(t, var) t var __attribute__((aligned(16)))
#else #else
#define TALIGN16(t, var) t _ ## var __attribute__((aligned(16))) #define TALIGN16(t, var) t _ ## var __attribute__((aligned(16)))
#endif #endif
#if (defined(__APPLE__) || defined(__MINGW32__) || defined(__CYGWIN__)) && \
defined(__i386__)
#define DECLARE_FUNCTION(name) \
".text \n" \
".globl _" #name " \n" \
"_" #name ": \n"
#else
#define DECLARE_FUNCTION(name) \
".text \n" \
".global " #name " \n" \
#name ": \n"
#endif
// Offsets for source bytes 0 to 9 // Offsets for source bytes 0 to 9
//extern "C"
TALIGN16(const uint8, shuf0[16]) = TALIGN16(const uint8, shuf0[16]) =
{ 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 }; { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 };
// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12. // Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12.
//extern "C"
TALIGN16(const uint8, shuf1[16]) = TALIGN16(const uint8, shuf1[16]) =
{ 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 }; { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 };
// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
//extern "C"
TALIGN16(const uint8, shuf2[16]) = TALIGN16(const uint8, shuf2[16]) =
{ 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 }; { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 };
// Offsets for source bytes 0 to 10 // Offsets for source bytes 0 to 10
//extern "C"
TALIGN16(const uint8, shuf01[16]) = TALIGN16(const uint8, shuf01[16]) =
{ 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 }; { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 };
// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13. // Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13.
//extern "C"
TALIGN16(const uint8, shuf11[16]) = TALIGN16(const uint8, shuf11[16]) =
{ 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 }; { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 };
// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
//extern "C"
TALIGN16(const uint8, shuf21[16]) = TALIGN16(const uint8, shuf21[16]) =
{ 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 }; { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 };
// Coefficients for source bytes 0 to 10 // Coefficients for source bytes 0 to 10
//extern "C"
TALIGN16(const uint8, madd01[16]) = TALIGN16(const uint8, madd01[16]) =
{ 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 }; { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 };
// Coefficients for source bytes 10 to 21 // Coefficients for source bytes 10 to 21
//extern "C"
TALIGN16(const uint8, madd11[16]) = TALIGN16(const uint8, madd11[16]) =
{ 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 }; { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 };
// Coefficients for source bytes 21 to 31 // Coefficients for source bytes 21 to 31
//extern "C"
TALIGN16(const uint8, madd21[16]) = TALIGN16(const uint8, madd21[16]) =
{ 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 }; { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 };
// Coefficients for source bytes 21 to 31 // Coefficients for source bytes 21 to 31
//extern "C"
TALIGN16(const int16, round34[8]) = TALIGN16(const int16, round34[8]) =
{ 2, 2, 2, 2, 2, 2, 2, 2 }; { 2, 2, 2, 2, 2, 2, 2, 2 };
//extern "C"
TALIGN16(const uint8, shuf38a[16]) = TALIGN16(const uint8, shuf38a[16]) =
{ 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
//extern "C"
TALIGN16(const uint8, shuf38b[16]) = TALIGN16(const uint8, shuf38b[16]) =
{ 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 }; { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 };
// Arrange words 0,3,6 into 0,1,2 // Arrange words 0,3,6 into 0,1,2
//extern "C"
TALIGN16(const uint8, shufac0[16]) = TALIGN16(const uint8, shufac0[16]) =
{ 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
// Arrange words 0,3,6 into 3,4,5 // Arrange words 0,3,6 into 3,4,5
//extern "C"
TALIGN16(const uint8, shufac3[16]) = TALIGN16(const uint8, shufac3[16]) =
{ 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 }; { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 };
// Scaling values for boxes of 3x3 and 2x3 // Scaling values for boxes of 3x3 and 2x3
//extern "C"
TALIGN16(const uint16, scaleac3[8]) = TALIGN16(const uint16, scaleac3[8]) =
{ 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 }; { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 };
// Arrange first value for pixels 0,1,2,3,4,5 // Arrange first value for pixels 0,1,2,3,4,5
//extern "C"
TALIGN16(const uint8, shufab0[16]) = TALIGN16(const uint8, shufab0[16]) =
{ 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 }; { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 };
// Arrange second value for pixels 0,1,2,3,4,5 // Arrange second value for pixels 0,1,2,3,4,5
//extern "C"
TALIGN16(const uint8, shufab1[16]) = TALIGN16(const uint8, shufab1[16]) =
{ 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 }; { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 };
// Arrange third value for pixels 0,1,2,3,4,5 // Arrange third value for pixels 0,1,2,3,4,5
//extern "C"
TALIGN16(const uint8, shufab2[16]) = TALIGN16(const uint8, shufab2[16]) =
{ 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 }; { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 };
// Scaling values for boxes of 3x2 and 2x2 // Scaling values for boxes of 3x2 and 2x2
//extern "C"
TALIGN16(const uint16, scaleab2[8]) = TALIGN16(const uint16, scaleab2[8]) =
{ 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 }; { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
#endif #endif
@@ -1620,14 +1660,7 @@ static void ScaleRowDown8_SSE2(const uint8* src_ptr, int src_stride,
void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride, void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
asm( asm(
".text \n" DECLARE_FUNCTION(ScaleRowDown8Int_SSE2)
#if defined(OSX)
".globl _ScaleRowDown8Int_SSE2 \n"
"_ScaleRowDown8Int_SSE2: \n"
#else
".global ScaleRowDown8Int_SSE2 \n"
"ScaleRowDown8Int_SSE2: \n"
#endif
"pusha \n" "pusha \n"
"mov 0x24(%esp),%esi \n" "mov 0x24(%esp),%esi \n"
"mov 0x28(%esp),%ebx \n" "mov 0x28(%esp),%ebx \n"
@@ -1691,14 +1724,7 @@ void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride, void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
asm( asm(
".text \n" DECLARE_FUNCTION(ScaleRowDown34_SSSE3)
#if defined(OSX)
".globl _ScaleRowDown34_SSSE3 \n"
"_ScaleRowDown34_SSSE3: \n"
#else
".global ScaleRowDown34_SSSE3 \n"
"ScaleRowDown34_SSSE3: \n"
#endif
"pusha \n" "pusha \n"
"mov 0x24(%esp),%esi \n" "mov 0x24(%esp),%esi \n"
"mov 0x2c(%esp),%edi \n" "mov 0x2c(%esp),%edi \n"
@@ -1729,14 +1755,7 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride, void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
asm( asm(
".text \n" DECLARE_FUNCTION(ScaleRowDown34_1_Int_SSSE3)
#if defined(OSX)
".globl _ScaleRowDown34_1_Int_SSSE3 \n"
"_ScaleRowDown34_1_Int_SSSE3: \n"
#else
".global ScaleRowDown34_1_Int_SSSE3 \n"
"ScaleRowDown34_1_Int_SSSE3: \n"
#endif
"pusha \n" "pusha \n"
"mov 0x24(%esp),%esi \n" "mov 0x24(%esp),%esi \n"
"mov 0x28(%esp),%ebp \n" "mov 0x28(%esp),%ebp \n"
@@ -1790,14 +1809,7 @@ void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride, void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
asm( asm(
".text \n" DECLARE_FUNCTION(ScaleRowDown34_0_Int_SSSE3)
#if defined(OSX)
".globl _ScaleRowDown34_0_Int_SSSE3 \n"
"_ScaleRowDown34_0_Int_SSSE3: \n"
#else
".global ScaleRowDown34_0_Int_SSSE3 \n"
"ScaleRowDown34_0_Int_SSSE3: \n"
#endif
"pusha \n" "pusha \n"
"mov 0x24(%esp),%esi \n" "mov 0x24(%esp),%esi \n"
"mov 0x28(%esp),%ebp \n" "mov 0x28(%esp),%ebp \n"
@@ -1854,14 +1866,7 @@ void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride,
void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride, void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
asm( asm(
".text \n" DECLARE_FUNCTION(ScaleRowDown38_SSSE3)
#if defined(OSX)
".globl _ScaleRowDown38_SSSE3 \n"
"_ScaleRowDown38_SSSE3: \n"
#else
".global ScaleRowDown38_SSSE3 \n"
"ScaleRowDown38_SSSE3: \n"
#endif
"pusha \n" "pusha \n"
"mov 0x24(%esp),%esi \n" "mov 0x24(%esp),%esi \n"
"mov 0x28(%esp),%edx \n" "mov 0x28(%esp),%edx \n"
@@ -1890,14 +1895,7 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride, void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
asm( asm(
".text \n" DECLARE_FUNCTION(ScaleRowDown38_3_Int_SSSE3)
#if defined(OSX)
".globl _ScaleRowDown38_3_Int_SSSE3 \n"
"_ScaleRowDown38_3_Int_SSSE3: \n"
#else
".global ScaleRowDown38_3_Int_SSSE3 \n"
"ScaleRowDown38_3_Int_SSSE3: \n"
#endif
"pusha \n" "pusha \n"
"mov 0x24(%esp),%esi \n" "mov 0x24(%esp),%esi \n"
"mov 0x28(%esp),%edx \n" "mov 0x28(%esp),%edx \n"
@@ -1954,14 +1952,7 @@ void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride,
void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride, void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
asm( asm(
".text \n" DECLARE_FUNCTION(ScaleRowDown38_2_Int_SSSE3)
#if defined(OSX)
".globl _ScaleRowDown38_2_Int_SSSE3 \n"
"_ScaleRowDown38_2_Int_SSSE3: \n"
#else
".global ScaleRowDown38_2_Int_SSSE3 \n"
"ScaleRowDown38_2_Int_SSSE3: \n"
#endif
"pusha \n" "pusha \n"
"mov 0x24(%esp),%esi \n" "mov 0x24(%esp),%esi \n"
"mov 0x28(%esp),%edx \n" "mov 0x28(%esp),%edx \n"
@@ -2001,14 +1992,7 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
uint16* dst_ptr, int src_width, uint16* dst_ptr, int src_width,
int src_height); int src_height);
asm( asm(
".text \n" DECLARE_FUNCTION(ScaleAddRows_SSE2)
#if defined(OSX)
".globl _ScaleAddRows_SSE2 \n"
"_ScaleAddRows_SSE2: \n"
#else
".global ScaleAddRows_SSE2 \n"
"ScaleAddRows_SSE2: \n"
#endif
"pusha \n" "pusha \n"
"mov 0x24(%esp),%esi \n" "mov 0x24(%esp),%esi \n"
"mov 0x28(%esp),%edx \n" "mov 0x28(%esp),%edx \n"
@@ -2052,14 +2036,7 @@ void ScaleFilterRows_SSE2(uint8* dst_ptr,
const uint8* src_ptr, int src_stride, const uint8* src_ptr, int src_stride,
int dst_width, int source_y_fraction); int dst_width, int source_y_fraction);
asm( asm(
".text \n" DECLARE_FUNCTION(ScaleFilterRows_SSE2)
#if defined(OSX)
".globl _ScaleFilterRows_SSE2 \n"
"_ScaleFilterRows_SSE2: \n"
#else
".global ScaleFilterRows_SSE2 \n"
"ScaleFilterRows_SSE2: \n"
#endif
"push %esi \n" "push %esi \n"
"push %edi \n" "push %edi \n"
"mov 0xc(%esp),%edi \n" "mov 0xc(%esp),%edi \n"
@@ -2147,14 +2124,7 @@ void ScaleFilterRows_SSSE3(uint8* dst_ptr,
const uint8* src_ptr, int src_stride, const uint8* src_ptr, int src_stride,
int dst_width, int source_y_fraction); int dst_width, int source_y_fraction);
asm( asm(
".text \n" DECLARE_FUNCTION(ScaleFilterRows_SSSE3)
#if defined(OSX)
".globl _ScaleFilterRows_SSSE3 \n"
"_ScaleFilterRows_SSSE3: \n"
#else
".global ScaleFilterRows_SSSE3 \n"
"ScaleFilterRows_SSSE3: \n"
#endif
"push %esi \n" "push %esi \n"
"push %edi \n" "push %edi \n"
"mov 0xc(%esp),%edi \n" "mov 0xc(%esp),%edi \n"
@@ -2318,7 +2288,7 @@ static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride, static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
asm volatile( asm volatile (
"movdqa (%4),%%xmm2 \n" // _shuf01 "movdqa (%4),%%xmm2 \n" // _shuf01
"movdqa (%5),%%xmm3 \n" // _shuf11 "movdqa (%5),%%xmm3 \n" // _shuf11
"movdqa (%6),%%xmm4 \n" // _shuf21 "movdqa (%6),%%xmm4 \n" // _shuf21
@@ -2436,7 +2406,7 @@ static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride,
#define HAS_SCALEROWDOWN38_SSSE3 #define HAS_SCALEROWDOWN38_SSSE3
static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride, static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
asm volatile( asm volatile (
"movdqa (%3),%%xmm4 \n" "movdqa (%3),%%xmm4 \n"
"movdqa (%4),%%xmm5 \n" "movdqa (%4),%%xmm5 \n"
"1:" "1:"
@@ -2560,7 +2530,7 @@ static void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride,
static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride, static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
uint16* dst_ptr, int src_width, uint16* dst_ptr, int src_width,
int src_height) { int src_height) {
asm volatile( asm volatile (
"pxor %%xmm5,%%xmm5 \n" "pxor %%xmm5,%%xmm5 \n"
"1:" "1:"
"movdqa (%0),%%xmm2 \n" "movdqa (%0),%%xmm2 \n"
@@ -2602,7 +2572,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
const uint8* src_ptr, int src_stride, const uint8* src_ptr, int src_stride,
int dst_width, int source_y_fraction) { int dst_width, int source_y_fraction) {
if (source_y_fraction == 0) { if (source_y_fraction == 0) {
asm volatile( asm volatile (
"1:" "1:"
"movdqa (%1),%%xmm0 \n" "movdqa (%1),%%xmm0 \n"
"lea 0x10(%1),%1 \n" "lea 0x10(%1),%1 \n"
@@ -2620,7 +2590,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
); );
return; return;
} else if (source_y_fraction == 128) { } else if (source_y_fraction == 128) {
asm volatile( asm volatile (
"1:" "1:"
"movdqa (%1),%%xmm0 \n" "movdqa (%1),%%xmm0 \n"
"movdqa (%1,%3,1),%%xmm2 \n" "movdqa (%1,%3,1),%%xmm2 \n"
@@ -2640,7 +2610,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
); );
return; return;
} else { } else {
asm volatile( asm volatile (
"mov %3,%%eax \n" "mov %3,%%eax \n"
"movd %%eax,%%xmm6 \n" "movd %%eax,%%xmm6 \n"
"punpcklwd %%xmm6,%%xmm6 \n" "punpcklwd %%xmm6,%%xmm6 \n"
@@ -2693,7 +2663,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
const uint8* src_ptr, int src_stride, const uint8* src_ptr, int src_stride,
int dst_width, int source_y_fraction) { int dst_width, int source_y_fraction) {
if (source_y_fraction == 0) { if (source_y_fraction == 0) {
asm volatile( asm volatile (
"1:" "1:"
"movdqa (%1),%%xmm0 \n" "movdqa (%1),%%xmm0 \n"
"lea 0x10(%1),%1 \n" "lea 0x10(%1),%1 \n"
@@ -2711,7 +2681,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
); );
return; return;
} else if (source_y_fraction == 128) { } else if (source_y_fraction == 128) {
asm volatile( asm volatile (
"1:" "1:"
"movdqa (%1),%%xmm0 \n" "movdqa (%1),%%xmm0 \n"
"movdqa (%1,%3,1),%%xmm2 \n" "movdqa (%1,%3,1),%%xmm2 \n"
@@ -2731,7 +2701,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
); );
return; return;
} else { } else {
asm volatile( asm volatile (
"mov %3,%%eax \n" "mov %3,%%eax \n"
"shr %%eax \n" "shr %%eax \n"
"mov %%al,%%ah \n" "mov %%al,%%ah \n"
@@ -3095,10 +3065,7 @@ static void ScalePlaneDown2(int src_width, int src_height,
ScaleRowDown2 = filtering ? ScaleRowDown2Int_NEON : ScaleRowDown2_NEON; ScaleRowDown2 = filtering ? ScaleRowDown2Int_NEON : ScaleRowDown2_NEON;
} else } else
#endif #endif
/* TODO: Force to call C version all the time in ordert to get matching results #if defined(HAS_SCALEROWDOWN2_SSE2)
* in multi-resolution encoder example.
*/
#if 0 //defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 16) && IS_ALIGNED(dst_width, 16) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
@@ -3292,7 +3259,7 @@ static void ScalePlaneDown34(int src_width, int src_height,
src_row = 0; src_row = 0;
} }
} }
} }
} }
/** /**
@@ -3369,7 +3336,7 @@ static void ScalePlaneDown38(int src_width, int src_height,
} }
dst_ptr += dst_stride; dst_ptr += dst_stride;
} }
} }
} }
__inline static uint32 SumBox(int iboxwidth, int iboxheight, __inline static uint32 SumBox(int iboxwidth, int iboxheight,
@@ -3630,7 +3597,7 @@ static void ScalePlaneBilinear(int src_width, int src_height,
} }
} }
} }
} }
} }
/** /**
@@ -3818,36 +3785,32 @@ int I420Scale(const uint8* src_y, int src_stride_y,
src_stride_v = -src_stride_v; src_stride_v = -src_stride_v;
} }
{ {
int halfsrc_width = (src_width + 1) >> 1; int src_halfwidth = (src_width + 1) >> 1;
int halfsrc_height = (src_height + 1) >> 1; int src_halfheight = (src_height + 1) >> 1;
int halfdst_width = (dst_width + 1) >> 1; int dst_halfwidth = (dst_width + 1) >> 1;
int halfoheight = (dst_height + 1) >> 1; int dst_halfheight = (dst_height + 1) >> 1;
ScalePlane(src_y, src_stride_y, src_width, src_height, ScalePlane(src_y, src_stride_y, src_width, src_height,
dst_y, dst_stride_y, dst_width, dst_height, dst_y, dst_stride_y, dst_width, dst_height,
filtering, use_reference_impl_); filtering, use_reference_impl_);
ScalePlane(src_u, src_stride_u, halfsrc_width, halfsrc_height, ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
dst_u, dst_stride_u, halfdst_width, halfoheight, dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
filtering, use_reference_impl_); filtering, use_reference_impl_);
ScalePlane(src_v, src_stride_v, halfsrc_width, halfsrc_height, ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
dst_v, dst_stride_v, halfdst_width, halfoheight, dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
filtering, use_reference_impl_); filtering, use_reference_impl_);
} }
return 0; return 0;
} }
int Scale_2(const uint8* src_y, const uint8* src_u, const uint8* src_v, // Deprecated api
int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
int src_stride_y, int src_stride_u, int src_stride_v, int src_stride_y, int src_stride_u, int src_stride_v,
int src_width, int src_height, int src_width, int src_height,
uint8* dst_y, uint8* dst_u, uint8* dst_v, uint8* dst_y, uint8* dst_u, uint8* dst_v,
int dst_stride_y, int dst_stride_u, int dst_stride_v, int dst_stride_y, int dst_stride_u, int dst_stride_v,
int dst_width, int dst_height, int dst_width, int dst_height,
int interpolate) { int interpolate) {
int halfsrc_width;
int halfsrc_height;
int halfdst_width;
int halfoheight;
FilterMode filtering;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
return -1; return -1;
@@ -3864,51 +3827,58 @@ int Scale_2(const uint8* src_y, const uint8* src_u, const uint8* src_v,
src_stride_u = -src_stride_u; src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v; src_stride_v = -src_stride_v;
} }
halfsrc_width = (src_width + 1) >> 1; {
halfsrc_height = (src_height + 1) >> 1; int src_halfwidth = (src_width + 1) >> 1;
halfdst_width = (dst_width + 1) >> 1; int src_halfheight = (src_height + 1) >> 1;
halfoheight = (dst_height + 1) >> 1; int dst_halfwidth = (dst_width + 1) >> 1;
filtering = interpolate ? kFilterBox : kFilterNone; int dst_halfheight = (dst_height + 1) >> 1;
FilterMode filtering = interpolate ? kFilterBox : kFilterNone;
ScalePlane(src_y, src_stride_y, src_width, src_height, ScalePlane(src_y, src_stride_y, src_width, src_height,
dst_y, dst_stride_y, dst_width, dst_height, dst_y, dst_stride_y, dst_width, dst_height,
filtering, use_reference_impl_); filtering, use_reference_impl_);
ScalePlane(src_u, src_stride_u, halfsrc_width, halfsrc_height, ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
dst_u, dst_stride_u, halfdst_width, halfoheight, dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
filtering, use_reference_impl_); filtering, use_reference_impl_);
ScalePlane(src_v, src_stride_v, halfsrc_width, halfsrc_height, ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
dst_v, dst_stride_v, halfdst_width, halfoheight, dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
filtering, use_reference_impl_); filtering, use_reference_impl_);
}
return 0; return 0;
} }
int Scale_1(const uint8* src, int src_width, int src_height, // Deprecated api
uint8* dst, int dst_width, int dst_height, int ooffset, int ScaleOffset(const uint8* src, int src_width, int src_height,
uint8* dst, int dst_width, int dst_height, int dst_yoffset,
int interpolate) { int interpolate) {
if (!src || src_width <= 0 || src_height <= 0 || if (!src || src_width <= 0 || src_height <= 0 ||
!dst || dst_width <= 0 || dst_height <= 0 || ooffset < 0 || !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset < 0 ||
ooffset >= dst_height) { dst_yoffset >= dst_height) {
return -1; return -1;
} }
ooffset = ooffset & ~1; // chroma requires offset to multiple of 2. dst_yoffset = dst_yoffset & ~1; // chroma requires offset to multiple of 2.
{ {
int halfsrc_width = (src_width + 1) >> 1; int src_halfwidth = (src_width + 1) >> 1;
int halfsrc_height = (src_height + 1) >> 1; int src_halfheight = (src_height + 1) >> 1;
int halfdst_width = (dst_width + 1) >> 1; int dst_halfwidth = (dst_width + 1) >> 1;
int halfoheight = (dst_height + 1) >> 1; int dst_halfheight = (dst_height + 1) >> 1;
int aheight = dst_height - ooffset * 2; // actual output height int aheight = dst_height - dst_yoffset * 2; // actual output height
const uint8* const iyptr = src; const uint8* const src_y = src;
uint8* oyptr = dst + ooffset * dst_width; const uint8* const src_u = src + src_width * src_height;
const uint8* const iuptr = src + src_width * src_height; const uint8* const src_v = src + src_width * src_height +
uint8* ouptr = dst + dst_width * dst_height + (ooffset >> 1) * halfdst_width; src_halfwidth * src_halfheight;
const uint8* const ivptr = src + src_width * src_height + uint8* dst_y = dst + dst_yoffset * dst_width;
halfsrc_width * halfsrc_height; uint8* dst_u = dst + dst_width * dst_height +
uint8* ovptr = dst + dst_width * dst_height + halfdst_width * halfoheight + (dst_yoffset >> 1) * dst_halfwidth;
(ooffset >> 1) * halfdst_width; uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
return Scale_2(iyptr, iuptr, ivptr, src_width, halfsrc_width, halfsrc_width, (dst_yoffset >> 1) * dst_halfwidth;
src_width, src_height, oyptr, ouptr, ovptr, dst_width, return Scale(src_y, src_u, src_v, src_width, src_halfwidth, src_halfwidth,
halfdst_width, halfdst_width, dst_width, aheight, interpolate); src_width, src_height, dst_y, dst_u, dst_v, dst_width,
dst_halfwidth, dst_halfwidth, dst_width, aheight, interpolate);
} }
} }
//} // namespace libyuv #ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@@ -78,6 +78,8 @@ static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
int (*read_frame_p)(FILE *f, vpx_image_t *img);
static int read_frame(FILE *f, vpx_image_t *img) { static int read_frame(FILE *f, vpx_image_t *img) {
size_t nbytes, to_read; size_t nbytes, to_read;
int res = 1; int res = 1;
@@ -92,6 +94,55 @@ static int read_frame(FILE *f, vpx_image_t *img) {
return res; return res;
} }
static int read_frame_by_row(FILE *f, vpx_image_t *img) {
size_t nbytes, to_read;
int res = 1;
int plane;
for (plane = 0; plane < 3; plane++)
{
unsigned char *ptr;
int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
int r;
/* Determine the correct plane based on the image format. The for-loop
* always counts in Y,U,V order, but this may not match the order of
* the data on disk.
*/
switch (plane)
{
case 1:
ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U];
break;
case 2:
ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V];
break;
default:
ptr = img->planes[plane];
}
for (r = 0; r < h; r++)
{
to_read = w;
nbytes = fread(ptr, 1, to_read, f);
if(nbytes != to_read) {
res = 0;
if(nbytes > 0)
printf("Warning: Read partial frame. Check your width & height!\n");
break;
}
ptr += img->stride[plane];
}
if (!res)
break;
}
return res;
}
static void write_ivf_file_header(FILE *outfile, static void write_ivf_file_header(FILE *outfile,
const vpx_codec_enc_cfg_t *cfg, const vpx_codec_enc_cfg_t *cfg,
int frame_cnt) { int frame_cnt) {
@@ -262,9 +313,14 @@ int main(int argc, char **argv)
/* Allocate image for each encoder */ /* Allocate image for each encoder */
for (i=0; i< NUM_ENCODERS; i++) for (i=0; i< NUM_ENCODERS; i++)
if(!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 1)) if(!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h); die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
if (raw[0].stride[VPX_PLANE_Y] == raw[0].d_w)
read_frame_p = read_frame;
else
read_frame_p = read_frame_by_row;
for (i=0; i< NUM_ENCODERS; i++) for (i=0; i< NUM_ENCODERS; i++)
write_ivf_file_header(outfile[i], &cfg[i], 0); write_ivf_file_header(outfile[i], &cfg[i], 0);
@@ -305,35 +361,22 @@ int main(int argc, char **argv)
const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS]; const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS];
flags = 0; flags = 0;
frame_avail = read_frame(infile, &raw[0]); frame_avail = read_frame_p(infile, &raw[0]);
for ( i=1; i<NUM_ENCODERS; i++)
{
if(frame_avail) if(frame_avail)
{
for ( i=1; i<NUM_ENCODERS; i++)
{ {
/*Scale the image down a number of times by downsampling factor*/ /*Scale the image down a number of times by downsampling factor*/
int src_uvwidth = (raw[i-1].d_w + 1) >> 1;
int src_uvheight = (raw[i-1].d_h + 1) >> 1;
const unsigned char* src_y = raw[i-1].planes[VPX_PLANE_Y];
const unsigned char* src_u = raw[i-1].planes[VPX_PLANE_Y]
+ raw[i-1].d_w*raw[i-1].d_h;
const unsigned char* src_v = raw[i-1].planes[VPX_PLANE_Y]
+ raw[i-1].d_w*raw[i-1].d_h
+ src_uvwidth*src_uvheight;
int dst_uvwidth = (raw[i].d_w + 1) >> 1;
int dst_uvheight = (raw[i].d_h + 1) >> 1;
unsigned char* dst_y = raw[i].planes[VPX_PLANE_Y];
unsigned char* dst_u = raw[i].planes[VPX_PLANE_Y]
+ raw[i].d_w*raw[i].d_h;
unsigned char* dst_v = raw[i].planes[VPX_PLANE_Y]
+ raw[i].d_w*raw[i].d_h
+ dst_uvwidth*dst_uvheight;
/* FilterMode 1 or 2 give better psnr than FilterMode 0. */ /* FilterMode 1 or 2 give better psnr than FilterMode 0. */
I420Scale(src_y, raw[i-1].d_w, src_u, src_uvwidth, src_v, I420Scale(raw[i-1].planes[VPX_PLANE_Y], raw[i-1].stride[VPX_PLANE_Y],
src_uvwidth, raw[i-1].d_w, raw[i-1].d_h, raw[i-1].planes[VPX_PLANE_U], raw[i-1].stride[VPX_PLANE_U],
dst_y, raw[i].d_w, dst_u, dst_uvwidth, raw[i-1].planes[VPX_PLANE_V], raw[i-1].stride[VPX_PLANE_V],
dst_v, dst_uvwidth, raw[i].d_w, raw[i].d_h, 1); raw[i-1].d_w, raw[i-1].d_h,
raw[i].planes[VPX_PLANE_Y], raw[i].stride[VPX_PLANE_Y],
raw[i].planes[VPX_PLANE_U], raw[i].stride[VPX_PLANE_U],
raw[i].planes[VPX_PLANE_V], raw[i].stride[VPX_PLANE_V],
raw[i].d_w, raw[i].d_h, 1);
} }
} }

View File

@@ -243,6 +243,7 @@ vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx,
ctx--; ctx--;
if (img) img--; if (img) img--;
} }
ctx++;
} }
FLOATING_POINT_RESTORE(); FLOATING_POINT_RESTORE();