Merge "vpx_scale: sync from experimental"

This commit is contained in:
John Koleszar 2012-11-02 09:16:41 -07:00 committed by Gerrit Code Review
commit 3b783d2217
12 changed files with 2640 additions and 2822 deletions

View File

@ -8,15 +8,14 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_rtcd.h"
#include "./vpx_rtcd.h"
extern void vp8_yv12_copy_frame_func_neon(struct yv12_buffer_config *src_ybc,
struct yv12_buffer_config *dst_ybc);
void vp8_yv12_copy_frame_neon(struct yv12_buffer_config *src_ybc,
struct yv12_buffer_config *dst_ybc)
{
vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc);
struct yv12_buffer_config *dst_ybc) {
vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc);
vp8_yv12_extend_frame_borders_neon(dst_ybc);
vp8_yv12_extend_frame_borders_neon(dst_ybc);
}

View File

@ -46,557 +46,524 @@ static float a = -0.6;
// 3 2
// C0 = a*t - a*t
//
static short c0_fixed(unsigned int t)
{
// put t in Q16 notation
unsigned short v1, v2;
static short c0_fixed(unsigned int t) {
// put t in Q16 notation
unsigned short v1, v2;
// Q16
v1 = (a_i * t) >> 16;
v1 = (v1 * t) >> 16;
// Q16
v1 = (a_i * t) >> 16;
v1 = (v1 * t) >> 16;
// Q16
v2 = (a_i * t) >> 16;
v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16;
// Q16
v2 = (a_i * t) >> 16;
v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16;
// Q12
return -((v1 - v2) >> 4);
// Q12
return -((v1 - v2) >> 4);
}
// 2 3
// C1 = a*t + (3-2*a)*t - (2-a)*t
//
static short c1_fixed(unsigned int t)
{
unsigned short v1, v2, v3;
unsigned short two, three;
static short c1_fixed(unsigned int t) {
unsigned short v1, v2, v3;
unsigned short two, three;
// Q16
v1 = (a_i * t) >> 16;
// Q16
v1 = (a_i * t) >> 16;
// Q13
two = 2 << 13;
v2 = two - (a_i >> 3);
v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16;
// Q13
two = 2 << 13;
v2 = two - (a_i >> 3);
v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16;
// Q13
three = 3 << 13;
v3 = three - (2 * (a_i >> 3));
v3 = (v3 * t) >> 16;
v3 = (v3 * t) >> 16;
// Q13
three = 3 << 13;
v3 = three - (2 * (a_i >> 3));
v3 = (v3 * t) >> 16;
v3 = (v3 * t) >> 16;
// Q12
return (((v1 >> 3) - v2 + v3) >> 1);
// Q12
return (((v1 >> 3) - v2 + v3) >> 1);
}
// 2 3
// C2 = 1 - (3-a)*t + (2-a)*t
//
static short c2_fixed(unsigned int t)
{
unsigned short v1, v2, v3;
unsigned short two, three;
static short c2_fixed(unsigned int t) {
unsigned short v1, v2, v3;
unsigned short two, three;
// Q13
v1 = 1 << 13;
// Q13
v1 = 1 << 13;
// Q13
three = 3 << 13;
v2 = three - (a_i >> 3);
v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16;
// Q13
three = 3 << 13;
v2 = three - (a_i >> 3);
v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16;
// Q13
two = 2 << 13;
v3 = two - (a_i >> 3);
v3 = (v3 * t) >> 16;
v3 = (v3 * t) >> 16;
v3 = (v3 * t) >> 16;
// Q13
two = 2 << 13;
v3 = two - (a_i >> 3);
v3 = (v3 * t) >> 16;
v3 = (v3 * t) >> 16;
v3 = (v3 * t) >> 16;
// Q12
return (v1 - v2 + v3) >> 1;
// Q12
return (v1 - v2 + v3) >> 1;
}
// 2 3
// C3 = a*t - 2*a*t + a*t
//
static short c3_fixed(unsigned int t)
{
int v1, v2, v3;
static short c3_fixed(unsigned int t) {
int v1, v2, v3;
// Q16
v1 = (a_i * t) >> 16;
// Q16
v1 = (a_i * t) >> 16;
// Q15
v2 = 2 * (a_i >> 1);
v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16;
// Q15
v2 = 2 * (a_i >> 1);
v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16;
// Q16
v3 = (a_i * t) >> 16;
v3 = (v3 * t) >> 16;
v3 = (v3 * t) >> 16;
// Q16
v3 = (a_i * t) >> 16;
v3 = (v3 * t) >> 16;
v3 = (v3 * t) >> 16;
// Q12
return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3);
// Q12
return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3);
}
#else
// 3 2
// C0 = -a*t + a*t
//
float C0(float t)
{
return -a * t * t * t + a * t * t;
float C0(float t) {
return -a * t * t * t + a * t * t;
}
// 2 3
// C1 = -a*t + (2*a+3)*t - (a+2)*t
//
float C1(float t)
{
return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t;
float C1(float t) {
return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t;
}
// 2 3
// C2 = 1 - (a+3)*t + (a+2)*t
//
float C2(float t)
{
return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f;
float C2(float t) {
return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f;
}
// 2 3
// C3 = a*t - 2*a*t + a*t
//
float C3(float t)
{
return a * t * t * t - 2.0f * a * t * t + a * t;
float C3(float t) {
return a * t * t * t - 2.0f * a * t * t + a * t;
}
#endif
#if 0
int compare_real_fixed()
{
int i, errors = 0;
float mult = 1.0 / 10000.0;
unsigned int fixed_mult = mult * 4294967296;//65536;
unsigned int phase_offset_int;
float phase_offset_real;
int compare_real_fixed() {
int i, errors = 0;
float mult = 1.0 / 10000.0;
unsigned int fixed_mult = mult * 4294967296;// 65536;
unsigned int phase_offset_int;
float phase_offset_real;
for (i = 0; i < 10000; i++)
{
int fixed0, fixed1, fixed2, fixed3, fixed_total;
int real0, real1, real2, real3, real_total;
for (i = 0; i < 10000; i++) {
int fixed0, fixed1, fixed2, fixed3, fixed_total;
int real0, real1, real2, real3, real_total;
phase_offset_real = (float)i * mult;
phase_offset_int = (fixed_mult * i) >> 16;
phase_offset_real = (float)i * mult;
phase_offset_int = (fixed_mult * i) >> 16;
// phase_offset_int = phase_offset_real * 65536;
fixed0 = c0_fixed(phase_offset_int);
real0 = C0(phase_offset_real) * 4096.0;
fixed0 = c0_fixed(phase_offset_int);
real0 = C0(phase_offset_real) * 4096.0;
if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1)))
errors++;
if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1)))
errors++;
fixed1 = c1_fixed(phase_offset_int);
real1 = C1(phase_offset_real) * 4096.0;
fixed1 = c1_fixed(phase_offset_int);
real1 = C1(phase_offset_real) * 4096.0;
if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1)))
errors++;
if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1)))
errors++;
fixed2 = c2_fixed(phase_offset_int);
real2 = C2(phase_offset_real) * 4096.0;
fixed2 = c2_fixed(phase_offset_int);
real2 = C2(phase_offset_real) * 4096.0;
if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1)))
errors++;
if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1)))
errors++;
fixed3 = c3_fixed(phase_offset_int);
real3 = C3(phase_offset_real) * 4096.0;
fixed3 = c3_fixed(phase_offset_int);
real3 = C3(phase_offset_real) * 4096.0;
if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1)))
errors++;
if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1)))
errors++;
fixed_total = fixed0 + fixed1 + fixed2 + fixed3;
real_total = real0 + real1 + real2 + real3;
fixed_total = fixed0 + fixed1 + fixed2 + fixed3;
real_total = real0 + real1 + real2 + real3;
if ((fixed_total > 4097) || (fixed_total < 4094))
errors ++;
if ((fixed_total > 4097) || (fixed_total < 4094))
errors++;
if ((real_total > 4097) || (real_total < 4095))
errors ++;
}
if ((real_total > 4097) || (real_total < 4095))
errors++;
}
return errors;
return errors;
}
#endif
// Find greatest common denominator between two integers. Method used here is
// slow compared to Euclid's algorithm, but does not require any division.
int gcd(int a, int b)
{
// Problem with this algorithm is that if a or b = 0 this function
// will never exit. Don't want to return 0 because any computation
// that was based on a common denoninator and tried to reduce by
// dividing by 0 would fail. Best solution that could be thought of
// would to be fail by returing a 1;
if (a <= 0 || b <= 0)
return 1;
int gcd(int a, int b) {
// Problem with this algorithm is that if a or b = 0 this function
// will never exit. Don't want to return 0 because any computation
// that was based on a common denoninator and tried to reduce by
// dividing by 0 would fail. Best solution that could be thought of
// would to be fail by returing a 1;
if (a <= 0 || b <= 0)
return 1;
while (a != b)
{
if (b > a)
b = b - a;
else
{
int tmp = a;//swap large and
a = b; //small
b = tmp;
}
while (a != b) {
if (b > a)
b = b - a;
else {
int tmp = a;// swap large and
a = b; // small
b = tmp;
}
}
return b;
return b;
}
void bicubic_coefficient_init()
{
vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
g_first_time = 0;
void bicubic_coefficient_init() {
vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
g_first_time = 0;
}
void bicubic_coefficient_destroy()
{
if (!g_first_time)
{
vpx_free(g_b_scaler.l_w);
vpx_free(g_b_scaler.l_h);
vpx_free(g_b_scaler.l_h_uv);
vpx_free(g_b_scaler.c_w);
vpx_free(g_b_scaler.c_h);
vpx_free(g_b_scaler.c_h_uv);
vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
}
}
// Create the coeffients that will be used for the cubic interpolation.
// Because scaling does not have to be equal in the vertical and horizontal
// regimes the phase offsets will be different. There are 4 coefficents
// for each point, two on each side. The layout is that there are the
// 4 coefficents for each phase in the array and then the next phase.
int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height)
{
int i;
#ifdef FIXED_POINT
int phase_offset_int;
unsigned int fixed_mult;
int product_val = 0;
#else
float phase_offset;
#endif
int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv;
if (g_first_time)
bicubic_coefficient_init();
// check to see if the coefficents have already been set up correctly
if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height)
&& (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height))
return 0;
g_b_scaler.in_width = in_width;
g_b_scaler.in_height = in_height;
g_b_scaler.out_width = out_width;
g_b_scaler.out_height = out_height;
// Don't want to allow crazy scaling, just try and prevent a catastrophic
// failure here. Want to fail after setting the member functions so if
// if the scaler is called the member functions will not scale.
if (out_width <= 0 || out_height <= 0)
return -1;
// reduce in/out width and height ratios using the gcd
gcd_w = gcd(out_width, in_width);
gcd_h = gcd(out_height, in_height);
gcd_h_uv = gcd(out_height, in_height / 2);
// the numerator width and height are to be saved in
// globals so they can be used during the scaling process
// without having to be recalculated.
g_b_scaler.nw = out_width / gcd_w;
d_w = in_width / gcd_w;
g_b_scaler.nh = out_height / gcd_h;
d_h = in_height / gcd_h;
g_b_scaler.nh_uv = out_height / gcd_h_uv;
d_h_uv = (in_height / 2) / gcd_h_uv;
// allocate memory for the coefficents
void bicubic_coefficient_destroy() {
if (!g_first_time) {
vpx_free(g_b_scaler.l_w);
vpx_free(g_b_scaler.l_h);
vpx_free(g_b_scaler.l_h_uv);
g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);
vpx_free(g_b_scaler.c_w);
vpx_free(g_b_scaler.c_h);
vpx_free(g_b_scaler.c_h_uv);
g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2);
g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2);
g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2);
vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
}
}
g_b_scaler.hbuf = g_hbuf;
g_b_scaler.hbuf_uv = g_hbuf_uv;
// Set up polyphase filter taps. This needs to be done before
// the scaling because of the floating point math required. The
// coefficients are multiplied by 2^12 so that fixed point math
// can be used in the main scaling loop.
// Create the coeffients that will be used for the cubic interpolation.
// Because scaling does not have to be equal in the vertical and horizontal
// regimes the phase offsets will be different. There are 4 coefficents
// for each point, two on each side. The layout is that there are the
// 4 coefficents for each phase in the array and then the next phase.
int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) {
int i;
#ifdef FIXED_POINT
fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296;
int phase_offset_int;
unsigned int fixed_mult;
int product_val = 0;
#else
float phase_offset;
#endif
int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv;
product_val = 0;
for (i = 0; i < g_b_scaler.nw; i++)
{
if (product_val > g_b_scaler.nw)
product_val -= g_b_scaler.nw;
phase_offset_int = (fixed_mult * product_val) >> 16;
g_b_scaler.c_w[i*4] = c3_fixed(phase_offset_int);
g_b_scaler.c_w[i*4+1] = c2_fixed(phase_offset_int);
g_b_scaler.c_w[i*4+2] = c1_fixed(phase_offset_int);
g_b_scaler.c_w[i*4+3] = c0_fixed(phase_offset_int);
product_val += d_w;
}
if (g_first_time)
bicubic_coefficient_init();
fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296;
// check to see if the coefficents have already been set up correctly
if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height)
&& (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height))
return 0;
product_val = 0;
g_b_scaler.in_width = in_width;
g_b_scaler.in_height = in_height;
g_b_scaler.out_width = out_width;
g_b_scaler.out_height = out_height;
for (i = 0; i < g_b_scaler.nh; i++)
{
if (product_val > g_b_scaler.nh)
product_val -= g_b_scaler.nh;
// Don't want to allow crazy scaling, just try and prevent a catastrophic
// failure here. Want to fail after setting the member functions so if
// if the scaler is called the member functions will not scale.
if (out_width <= 0 || out_height <= 0)
return -1;
phase_offset_int = (fixed_mult * product_val) >> 16;
// reduce in/out width and height ratios using the gcd
gcd_w = gcd(out_width, in_width);
gcd_h = gcd(out_height, in_height);
gcd_h_uv = gcd(out_height, in_height / 2);
g_b_scaler.c_h[i*4] = c0_fixed(phase_offset_int);
g_b_scaler.c_h[i*4+1] = c1_fixed(phase_offset_int);
g_b_scaler.c_h[i*4+2] = c2_fixed(phase_offset_int);
g_b_scaler.c_h[i*4+3] = c3_fixed(phase_offset_int);
// the numerator width and height are to be saved in
// globals so they can be used during the scaling process
// without having to be recalculated.
g_b_scaler.nw = out_width / gcd_w;
d_w = in_width / gcd_w;
product_val += d_h;
}
g_b_scaler.nh = out_height / gcd_h;
d_h = in_height / gcd_h;
fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296;
g_b_scaler.nh_uv = out_height / gcd_h_uv;
d_h_uv = (in_height / 2) / gcd_h_uv;
product_val = 0;
// allocate memory for the coefficents
vpx_free(g_b_scaler.l_w);
for (i = 0; i < g_b_scaler.nh_uv; i++)
{
if (product_val > g_b_scaler.nh_uv)
product_val -= g_b_scaler.nh_uv;
vpx_free(g_b_scaler.l_h);
phase_offset_int = (fixed_mult * product_val) >> 16;
vpx_free(g_b_scaler.l_h_uv);
g_b_scaler.c_h_uv[i*4] = c0_fixed(phase_offset_int);
g_b_scaler.c_h_uv[i*4+1] = c1_fixed(phase_offset_int);
g_b_scaler.c_h_uv[i*4+2] = c2_fixed(phase_offset_int);
g_b_scaler.c_h_uv[i*4+3] = c3_fixed(phase_offset_int);
g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);
product_val += d_h_uv;
}
vpx_free(g_b_scaler.c_w);
vpx_free(g_b_scaler.c_h);
vpx_free(g_b_scaler.c_h_uv);
g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2);
g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2);
g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2);
g_b_scaler.hbuf = g_hbuf;
g_b_scaler.hbuf_uv = g_hbuf_uv;
// Set up polyphase filter taps. This needs to be done before
// the scaling because of the floating point math required. The
// coefficients are multiplied by 2^12 so that fixed point math
// can be used in the main scaling loop.
#ifdef FIXED_POINT
fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296;
product_val = 0;
for (i = 0; i < g_b_scaler.nw; i++) {
if (product_val > g_b_scaler.nw)
product_val -= g_b_scaler.nw;
phase_offset_int = (fixed_mult * product_val) >> 16;
g_b_scaler.c_w[i * 4] = c3_fixed(phase_offset_int);
g_b_scaler.c_w[i * 4 + 1] = c2_fixed(phase_offset_int);
g_b_scaler.c_w[i * 4 + 2] = c1_fixed(phase_offset_int);
g_b_scaler.c_w[i * 4 + 3] = c0_fixed(phase_offset_int);
product_val += d_w;
}
fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296;
product_val = 0;
for (i = 0; i < g_b_scaler.nh; i++) {
if (product_val > g_b_scaler.nh)
product_val -= g_b_scaler.nh;
phase_offset_int = (fixed_mult * product_val) >> 16;
g_b_scaler.c_h[i * 4] = c0_fixed(phase_offset_int);
g_b_scaler.c_h[i * 4 + 1] = c1_fixed(phase_offset_int);
g_b_scaler.c_h[i * 4 + 2] = c2_fixed(phase_offset_int);
g_b_scaler.c_h[i * 4 + 3] = c3_fixed(phase_offset_int);
product_val += d_h;
}
fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296;
product_val = 0;
for (i = 0; i < g_b_scaler.nh_uv; i++) {
if (product_val > g_b_scaler.nh_uv)
product_val -= g_b_scaler.nh_uv;
phase_offset_int = (fixed_mult * product_val) >> 16;
g_b_scaler.c_h_uv[i * 4] = c0_fixed(phase_offset_int);
g_b_scaler.c_h_uv[i * 4 + 1] = c1_fixed(phase_offset_int);
g_b_scaler.c_h_uv[i * 4 + 2] = c2_fixed(phase_offset_int);
g_b_scaler.c_h_uv[i * 4 + 3] = c3_fixed(phase_offset_int);
product_val += d_h_uv;
}
#else
for (i = 0; i < g_nw; i++)
{
phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw;
g_c_w[i*4] = (C3(phase_offset) * 4096.0);
g_c_w[i*4+1] = (C2(phase_offset) * 4096.0);
g_c_w[i*4+2] = (C1(phase_offset) * 4096.0);
g_c_w[i*4+3] = (C0(phase_offset) * 4096.0);
}
for (i = 0; i < g_nw; i++) {
phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw;
g_c_w[i * 4] = (C3(phase_offset) * 4096.0);
g_c_w[i * 4 + 1] = (C2(phase_offset) * 4096.0);
g_c_w[i * 4 + 2] = (C1(phase_offset) * 4096.0);
g_c_w[i * 4 + 3] = (C0(phase_offset) * 4096.0);
}
for (i = 0; i < g_nh; i++)
{
phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh;
g_c_h[i*4] = (C0(phase_offset) * 4096.0);
g_c_h[i*4+1] = (C1(phase_offset) * 4096.0);
g_c_h[i*4+2] = (C2(phase_offset) * 4096.0);
g_c_h[i*4+3] = (C3(phase_offset) * 4096.0);
}
for (i = 0; i < g_nh; i++) {
phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh;
g_c_h[i * 4] = (C0(phase_offset) * 4096.0);
g_c_h[i * 4 + 1] = (C1(phase_offset) * 4096.0);
g_c_h[i * 4 + 2] = (C2(phase_offset) * 4096.0);
g_c_h[i * 4 + 3] = (C3(phase_offset) * 4096.0);
}
for (i = 0; i < g_nh_uv; i++)
{
phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv;
g_c_h_uv[i*4] = (C0(phase_offset) * 4096.0);
g_c_h_uv[i*4+1] = (C1(phase_offset) * 4096.0);
g_c_h_uv[i*4+2] = (C2(phase_offset) * 4096.0);
g_c_h_uv[i*4+3] = (C3(phase_offset) * 4096.0);
}
for (i = 0; i < g_nh_uv; i++) {
phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv;
g_c_h_uv[i * 4] = (C0(phase_offset) * 4096.0);
g_c_h_uv[i * 4 + 1] = (C1(phase_offset) * 4096.0);
g_c_h_uv[i * 4 + 2] = (C2(phase_offset) * 4096.0);
g_c_h_uv[i * 4 + 3] = (C3(phase_offset) * 4096.0);
}
#endif
// Create an array that corresponds input lines to output lines.
// This doesn't require floating point math, but it does require
// a division and because hardware division is not present that
// is a call.
for (i = 0; i < out_width; i++)
{
g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw;
// Create an array that corresponds input lines to output lines.
// This doesn't require floating point math, but it does require
// a division and because hardware division is not present that
// is a call.
for (i = 0; i < out_width; i++) {
g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw;
if ((g_b_scaler.l_w[i] + 2) <= in_width)
g_b_scaler.max_usable_out_width = i;
if ((g_b_scaler.l_w[i] + 2) <= in_width)
g_b_scaler.max_usable_out_width = i;
}
}
for (i = 0; i < out_height + 1; i++)
{
g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh;
g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv;
}
for (i = 0; i < out_height + 1; i++) {
g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh;
g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv;
}
return 0;
return 0;
}
int bicubic_scale(int in_width, int in_height, int in_stride,
int out_width, int out_height, int out_stride,
unsigned char *input_image, unsigned char *output_image)
{
short *RESTRICT l_w, * RESTRICT l_h;
short *RESTRICT c_w, * RESTRICT c_h;
unsigned char *RESTRICT ip, * RESTRICT op;
unsigned char *RESTRICT hbuf;
int h, w, lw, lh;
int temp_sum;
int phase_offset_w, phase_offset_h;
unsigned char *input_image, unsigned char *output_image) {
short *RESTRICT l_w, * RESTRICT l_h;
short *RESTRICT c_w, * RESTRICT c_h;
unsigned char *RESTRICT ip, * RESTRICT op;
unsigned char *RESTRICT hbuf;
int h, w, lw, lh;
int temp_sum;
int phase_offset_w, phase_offset_h;
c_w = g_b_scaler.c_w;
c_h = g_b_scaler.c_h;
c_w = g_b_scaler.c_w;
c_h = g_b_scaler.c_h;
op = output_image;
op = output_image;
l_w = g_b_scaler.l_w;
l_h = g_b_scaler.l_h;
l_w = g_b_scaler.l_w;
l_h = g_b_scaler.l_h;
phase_offset_h = 0;
phase_offset_h = 0;
for (h = 0; h < out_height; h++)
{
// select the row to work on
lh = l_h[h];
ip = input_image + (in_stride * lh);
for (h = 0; h < out_height; h++) {
// select the row to work on
lh = l_h[h];
ip = input_image + (in_stride * lh);
// vp8_filter the row vertically into an temporary buffer.
// If the phase offset == 0 then all the multiplication
// is going to result in the output equalling the input.
// So instead point the temporary buffer to the input.
// Also handle the boundry condition of not being able to
// filter that last lines.
if (phase_offset_h && (lh < in_height - 2))
{
hbuf = g_b_scaler.hbuf;
// vp8_filter the row vertically into an temporary buffer.
// If the phase offset == 0 then all the multiplication
// is going to result in the output equalling the input.
// So instead point the temporary buffer to the input.
// Also handle the boundry condition of not being able to
// filter that last lines.
if (phase_offset_h && (lh < in_height - 2)) {
hbuf = g_b_scaler.hbuf;
for (w = 0; w < in_width; w++)
{
temp_sum = c_h[phase_offset_h*4+3] * ip[w - in_stride];
temp_sum += c_h[phase_offset_h*4+2] * ip[w];
temp_sum += c_h[phase_offset_h*4+1] * ip[w + in_stride];
temp_sum += c_h[phase_offset_h*4] * ip[w + 2*in_stride];
for (w = 0; w < in_width; w++) {
temp_sum = c_h[phase_offset_h * 4 + 3] * ip[w - in_stride];
temp_sum += c_h[phase_offset_h * 4 + 2] * ip[w];
temp_sum += c_h[phase_offset_h * 4 + 1] * ip[w + in_stride];
temp_sum += c_h[phase_offset_h * 4] * ip[w + 2 * in_stride];
hbuf[w] = temp_sum >> 12;
}
}
else
hbuf = ip;
hbuf[w] = temp_sum >> 12;
}
} else
hbuf = ip;
// increase the phase offset for the next time around.
if (++phase_offset_h >= g_b_scaler.nh)
phase_offset_h = 0;
// increase the phase offset for the next time around.
if (++phase_offset_h >= g_b_scaler.nh)
phase_offset_h = 0;
// now filter and expand it horizontally into the final
// output buffer
// now filter and expand it horizontally into the final
// output buffer
phase_offset_w = 0;
for (w = 0; w < out_width; w++) {
// get the index to use to expand the image
lw = l_w[w];
temp_sum = c_w[phase_offset_w * 4] * hbuf[lw - 1];
temp_sum += c_w[phase_offset_w * 4 + 1] * hbuf[lw];
temp_sum += c_w[phase_offset_w * 4 + 2] * hbuf[lw + 1];
temp_sum += c_w[phase_offset_w * 4 + 3] * hbuf[lw + 2];
temp_sum = temp_sum >> 12;
if (++phase_offset_w >= g_b_scaler.nw)
phase_offset_w = 0;
for (w = 0; w < out_width; w++)
{
// get the index to use to expand the image
lw = l_w[w];
// boundry conditions
if ((lw + 2) >= in_width)
temp_sum = hbuf[lw];
temp_sum = c_w[phase_offset_w*4] * hbuf[lw - 1];
temp_sum += c_w[phase_offset_w*4+1] * hbuf[lw];
temp_sum += c_w[phase_offset_w*4+2] * hbuf[lw + 1];
temp_sum += c_w[phase_offset_w*4+3] * hbuf[lw + 2];
temp_sum = temp_sum >> 12;
if (lw == 0)
temp_sum = hbuf[0];
if (++phase_offset_w >= g_b_scaler.nw)
phase_offset_w = 0;
// boundry conditions
if ((lw + 2) >= in_width)
temp_sum = hbuf[lw];
if (lw == 0)
temp_sum = hbuf[0];
op[w] = temp_sum;
}
op += out_stride;
op[w] = temp_sum;
}
return 0;
op += out_stride;
}
return 0;
}
void bicubic_scale_frame_reset()
{
g_b_scaler.out_width = 0;
g_b_scaler.out_height = 0;
void bicubic_scale_frame_reset() {
g_b_scaler.out_width = 0;
g_b_scaler.out_height = 0;
}
void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
int new_width, int new_height)
{
int new_width, int new_height) {
dst->y_width = new_width;
dst->y_height = new_height;
dst->uv_width = new_width / 2;
dst->uv_height = new_height / 2;
dst->y_width = new_width;
dst->y_height = new_height;
dst->uv_width = new_width / 2;
dst->uv_height = new_height / 2;
dst->y_stride = dst->y_width;
dst->uv_stride = dst->uv_width;
dst->y_stride = dst->y_width;
dst->uv_stride = dst->uv_width;
bicubic_scale(src->y_width, src->y_height, src->y_stride,
new_width, new_height, dst->y_stride,
src->y_buffer, dst->y_buffer);
bicubic_scale(src->y_width, src->y_height, src->y_stride,
new_width, new_height, dst->y_stride,
src->y_buffer, dst->y_buffer);
bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
new_width / 2, new_height / 2, dst->uv_stride,
src->u_buffer, dst->u_buffer);
bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
new_width / 2, new_height / 2, dst->uv_stride,
src->u_buffer, dst->u_buffer);
bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
new_width / 2, new_height / 2, dst->uv_stride,
src->v_buffer, dst->v_buffer);
bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
new_width / 2, new_height / 2, dst->uv_stride,
src->v_buffer, dst->v_buffer);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -20,81 +20,73 @@
*
****************************************************************************/
int
vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf)
{
if (ybf)
{
vpx_free(ybf->buffer_alloc);
vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) {
if (ybf) {
vpx_free(ybf->buffer_alloc);
/* buffer_alloc isn't accessed by most functions. Rather y_buffer,
u_buffer and v_buffer point to buffer_alloc and are used. Clear out
all of this so that a freed pointer isn't inadvertently used */
vpx_memset (ybf, 0, sizeof (YV12_BUFFER_CONFIG));
}
else
{
return -1;
}
/* buffer_alloc isn't accessed by most functions. Rather y_buffer,
u_buffer and v_buffer point to buffer_alloc and are used. Clear out
all of this so that a freed pointer isn't inadvertently used */
vpx_memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG));
} else {
return -1;
}
return 0;
return 0;
}
/****************************************************************************
*
****************************************************************************/
int
vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border)
{
/*NOTE:*/
vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) {
/*NOTE:*/
if (ybf)
{
int y_stride = ((width + 2 * border) + 31) & ~31;
int yplane_size = (height + 2 * border) * y_stride;
int uv_width = width >> 1;
int uv_height = height >> 1;
/** There is currently a bunch of code which assumes
* uv_stride == y_stride/2, so enforce this here. */
int uv_stride = y_stride >> 1;
int uvplane_size = (uv_height + border) * uv_stride;
if (ybf) {
int y_stride = ((width + 2 * border) + 31) & ~31;
int yplane_size = (height + 2 * border) * y_stride;
int uv_width = width >> 1;
int uv_height = height >> 1;
/** There is currently a bunch of code which assumes
* uv_stride == y_stride/2, so enforce this here. */
int uv_stride = y_stride >> 1;
int uvplane_size = (uv_height + border) * uv_stride;
vp8_yv12_de_alloc_frame_buffer(ybf);
vp8_yv12_de_alloc_frame_buffer(ybf);
/** Only support allocating buffers that have a height and width that
* are multiples of 16, and a border that's a multiple of 32.
* The border restriction is required to get 16-byte alignment of the
* start of the chroma rows without intoducing an arbitrary gap
* between planes, which would break the semantics of things like
* vpx_img_set_rect(). */
if ((width & 0xf) | (height & 0xf) | (border & 0x1f))
return -3;
/** Only support allocating buffers that have a height and width that
* are multiples of 16, and a border that's a multiple of 32.
* The border restriction is required to get 16-byte alignment of the
* start of the chroma rows without intoducing an arbitrary gap
* between planes, which would break the semantics of things like
* vpx_img_set_rect(). */
if ((width & 0xf) | (height & 0xf) | (border & 0x1f))
return -3;
ybf->y_width = width;
ybf->y_height = height;
ybf->y_stride = y_stride;
ybf->y_width = width;
ybf->y_height = height;
ybf->y_stride = y_stride;
ybf->uv_width = uv_width;
ybf->uv_height = uv_height;
ybf->uv_stride = uv_stride;
ybf->uv_width = uv_width;
ybf->uv_height = uv_height;
ybf->uv_stride = uv_stride;
ybf->border = border;
ybf->frame_size = yplane_size + 2 * uvplane_size;
ybf->border = border;
ybf->frame_size = yplane_size + 2 * uvplane_size;
ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size);
ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size);
if (ybf->buffer_alloc == NULL)
return -1;
if (ybf->buffer_alloc == NULL)
return -1;
ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border;
ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2;
ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2;
ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border;
ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2;
ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2;
ybf->corrupted = 0; /* assume not currupted by errors */
}
else
{
return -2;
}
ybf->corrupted = 0; /* assume not currupted by errors */
} else {
return -2;
}
return 0;
return 0;
}

View File

@ -21,184 +21,174 @@
*
****************************************************************************/
void
vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf)
{
int i;
unsigned char *src_ptr1, *src_ptr2;
unsigned char *dest_ptr1, *dest_ptr2;
vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) {
int i;
unsigned char *src_ptr1, *src_ptr2;
unsigned char *dest_ptr1, *dest_ptr2;
unsigned int Border;
int plane_stride;
int plane_height;
int plane_width;
unsigned int Border;
int plane_stride;
int plane_height;
int plane_width;
/***********/
/* Y Plane */
/***********/
Border = ybf->border;
plane_stride = ybf->y_stride;
plane_height = ybf->y_height;
plane_width = ybf->y_width;
/***********/
/* Y Plane */
/***********/
Border = ybf->border;
plane_stride = ybf->y_stride;
plane_height = ybf->y_height;
plane_width = ybf->y_width;
/* copy the left and right most columns out */
src_ptr1 = ybf->y_buffer;
src_ptr2 = src_ptr1 + plane_width - 1;
dest_ptr1 = src_ptr1 - Border;
dest_ptr2 = src_ptr2 + 1;
/* copy the left and right most columns out */
src_ptr1 = ybf->y_buffer;
src_ptr2 = src_ptr1 + plane_width - 1;
dest_ptr1 = src_ptr1 - Border;
dest_ptr2 = src_ptr2 + 1;
for (i = 0; i < plane_height; i++)
{
vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border);
src_ptr1 += plane_stride;
src_ptr2 += plane_stride;
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
for (i = 0; i < plane_height; i++) {
vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border);
src_ptr1 += plane_stride;
src_ptr2 += plane_stride;
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
/* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = ybf->y_buffer - Border;
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
dest_ptr1 = src_ptr1 - (Border * plane_stride);
dest_ptr2 = src_ptr2 + plane_stride;
/* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = ybf->y_buffer - Border;
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
dest_ptr1 = src_ptr1 - (Border * plane_stride);
dest_ptr2 = src_ptr2 + plane_stride;
for (i = 0; i < (int)Border; i++)
{
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
for (i = 0; i < (int)Border; i++) {
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
/***********/
/* U Plane */
/***********/
plane_stride = ybf->uv_stride;
plane_height = ybf->uv_height;
plane_width = ybf->uv_width;
Border /= 2;
/***********/
/* U Plane */
/***********/
plane_stride = ybf->uv_stride;
plane_height = ybf->uv_height;
plane_width = ybf->uv_width;
Border /= 2;
/* copy the left and right most columns out */
src_ptr1 = ybf->u_buffer;
src_ptr2 = src_ptr1 + plane_width - 1;
dest_ptr1 = src_ptr1 - Border;
dest_ptr2 = src_ptr2 + 1;
/* copy the left and right most columns out */
src_ptr1 = ybf->u_buffer;
src_ptr2 = src_ptr1 + plane_width - 1;
dest_ptr1 = src_ptr1 - Border;
dest_ptr2 = src_ptr2 + 1;
for (i = 0; i < plane_height; i++)
{
vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border);
src_ptr1 += plane_stride;
src_ptr2 += plane_stride;
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
for (i = 0; i < plane_height; i++) {
vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border);
src_ptr1 += plane_stride;
src_ptr2 += plane_stride;
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
/* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = ybf->u_buffer - Border;
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
dest_ptr1 = src_ptr1 - (Border * plane_stride);
dest_ptr2 = src_ptr2 + plane_stride;
/* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = ybf->u_buffer - Border;
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
dest_ptr1 = src_ptr1 - (Border * plane_stride);
dest_ptr2 = src_ptr2 + plane_stride;
for (i = 0; i < (int)(Border); i++)
{
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
for (i = 0; i < (int)(Border); i++) {
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
/***********/
/* V Plane */
/***********/
/***********/
/* V Plane */
/***********/
/* copy the left and right most columns out */
src_ptr1 = ybf->v_buffer;
src_ptr2 = src_ptr1 + plane_width - 1;
dest_ptr1 = src_ptr1 - Border;
dest_ptr2 = src_ptr2 + 1;
/* copy the left and right most columns out */
src_ptr1 = ybf->v_buffer;
src_ptr2 = src_ptr1 + plane_width - 1;
dest_ptr1 = src_ptr1 - Border;
dest_ptr2 = src_ptr2 + 1;
for (i = 0; i < plane_height; i++)
{
vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border);
src_ptr1 += plane_stride;
src_ptr2 += plane_stride;
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
for (i = 0; i < plane_height; i++) {
vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border);
src_ptr1 += plane_stride;
src_ptr2 += plane_stride;
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
/* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = ybf->v_buffer - Border;
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
dest_ptr1 = src_ptr1 - (Border * plane_stride);
dest_ptr2 = src_ptr2 + plane_stride;
/* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = ybf->v_buffer - Border;
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
dest_ptr1 = src_ptr1 - (Border * plane_stride);
dest_ptr2 = src_ptr2 + plane_stride;
for (i = 0; i < (int)(Border); i++)
{
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
for (i = 0; i < (int)(Border); i++) {
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
}
static void
extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf)
{
int i;
unsigned char *src_ptr1, *src_ptr2;
unsigned char *dest_ptr1, *dest_ptr2;
extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) {
int i;
unsigned char *src_ptr1, *src_ptr2;
unsigned char *dest_ptr1, *dest_ptr2;
unsigned int Border;
int plane_stride;
int plane_height;
int plane_width;
unsigned int Border;
int plane_stride;
int plane_height;
int plane_width;
/***********/
/* Y Plane */
/***********/
Border = ybf->border;
plane_stride = ybf->y_stride;
plane_height = ybf->y_height;
plane_width = ybf->y_width;
/***********/
/* Y Plane */
/***********/
Border = ybf->border;
plane_stride = ybf->y_stride;
plane_height = ybf->y_height;
plane_width = ybf->y_width;
/* copy the left and right most columns out */
src_ptr1 = ybf->y_buffer;
src_ptr2 = src_ptr1 + plane_width - 1;
dest_ptr1 = src_ptr1 - Border;
dest_ptr2 = src_ptr2 + 1;
/* copy the left and right most columns out */
src_ptr1 = ybf->y_buffer;
src_ptr2 = src_ptr1 + plane_width - 1;
dest_ptr1 = src_ptr1 - Border;
dest_ptr2 = src_ptr2 + 1;
for (i = 0; i < plane_height; i++)
{
vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border);
src_ptr1 += plane_stride;
src_ptr2 += plane_stride;
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
for (i = 0; i < plane_height; i++) {
vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border);
src_ptr1 += plane_stride;
src_ptr2 += plane_stride;
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
/* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = ybf->y_buffer - Border;
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
dest_ptr1 = src_ptr1 - (Border * plane_stride);
dest_ptr2 = src_ptr2 + plane_stride;
/* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = ybf->y_buffer - Border;
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
dest_ptr1 = src_ptr1 - (Border * plane_stride);
dest_ptr2 = src_ptr2 + plane_stride;
for (i = 0; i < (int)Border; i++)
{
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
for (i = 0; i < (int)Border; i++) {
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
dest_ptr1 += plane_stride;
dest_ptr2 += plane_stride;
}
plane_stride /= 2;
plane_height /= 2;
plane_width /= 2;
Border /= 2;
plane_stride /= 2;
plane_height /= 2;
plane_width /= 2;
Border /= 2;
}
@ -221,57 +211,53 @@ extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf)
*
****************************************************************************/
void
vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc)
{
int row;
unsigned char *source, *dest;
vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc,
YV12_BUFFER_CONFIG *dst_ybc) {
int row;
unsigned char *source, *dest;
source = src_ybc->y_buffer;
dest = dst_ybc->y_buffer;
source = src_ybc->y_buffer;
dest = dst_ybc->y_buffer;
for (row = 0; row < src_ybc->y_height; row++)
{
vpx_memcpy(dest, source, src_ybc->y_width);
source += src_ybc->y_stride;
dest += dst_ybc->y_stride;
}
for (row = 0; row < src_ybc->y_height; row++) {
vpx_memcpy(dest, source, src_ybc->y_width);
source += src_ybc->y_stride;
dest += dst_ybc->y_stride;
}
source = src_ybc->u_buffer;
dest = dst_ybc->u_buffer;
source = src_ybc->u_buffer;
dest = dst_ybc->u_buffer;
for (row = 0; row < src_ybc->uv_height; row++)
{
vpx_memcpy(dest, source, src_ybc->uv_width);
source += src_ybc->uv_stride;
dest += dst_ybc->uv_stride;
}
for (row = 0; row < src_ybc->uv_height; row++) {
vpx_memcpy(dest, source, src_ybc->uv_width);
source += src_ybc->uv_stride;
dest += dst_ybc->uv_stride;
}
source = src_ybc->v_buffer;
dest = dst_ybc->v_buffer;
source = src_ybc->v_buffer;
dest = dst_ybc->v_buffer;
for (row = 0; row < src_ybc->uv_height; row++)
{
vpx_memcpy(dest, source, src_ybc->uv_width);
source += src_ybc->uv_stride;
dest += dst_ybc->uv_stride;
}
for (row = 0; row < src_ybc->uv_height; row++) {
vpx_memcpy(dest, source, src_ybc->uv_width);
source += src_ybc->uv_stride;
dest += dst_ybc->uv_stride;
}
vp8_yv12_extend_frame_borders_c(dst_ybc);
vp8_yv12_extend_frame_borders_c(dst_ybc);
}
void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc)
{
int row;
unsigned char *source, *dest;
void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc,
YV12_BUFFER_CONFIG *dst_ybc) {
int row;
unsigned char *source, *dest;
source = src_ybc->y_buffer;
dest = dst_ybc->y_buffer;
source = src_ybc->y_buffer;
dest = dst_ybc->y_buffer;
for (row = 0; row < src_ybc->y_height; row++)
{
vpx_memcpy(dest, source, src_ybc->y_width);
source += src_ybc->y_stride;
dest += dst_ybc->y_stride;
}
for (row = 0; row < src_ybc->y_height; row++) {
vpx_memcpy(dest, source, src_ybc->y_width);
source += src_ybc->y_stride;
dest += dst_ybc->y_stride;
}
}

View File

@ -14,33 +14,32 @@
#include "vpx_scale/yv12config.h"
typedef struct
{
int in_width;
int in_height;
typedef struct {
int in_width;
int in_height;
int out_width;
int out_height;
int max_usable_out_width;
int out_width;
int out_height;
int max_usable_out_width;
// numerator for the width and height
int nw;
int nh;
int nh_uv;
// numerator for the width and height
int nw;
int nh;
int nh_uv;
// output to input correspondance array
short *l_w;
short *l_h;
short *l_h_uv;
// output to input correspondance array
short *l_w;
short *l_h;
short *l_h_uv;
// polyphase coefficients
short *c_w;
short *c_h;
short *c_h_uv;
// polyphase coefficients
short *c_w;
short *c_h;
short *c_h_uv;
// buffer for horizontal filtering.
unsigned char *hbuf;
unsigned char *hbuf_uv;
// buffer for horizontal filtering.
unsigned char *hbuf;
unsigned char *hbuf_uv;
} BICUBIC_SCALER_STRUCT;
int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height);

View File

@ -17,12 +17,11 @@
#ifndef SCALE_MODE_H
#define SCALE_MODE_H
typedef enum
{
MAINTAIN_ASPECT_RATIO = 0x0,
SCALE_TO_FIT = 0x1,
CENTER = 0x2,
OTHER = 0x3
typedef enum {
MAINTAIN_ASPECT_RATIO = 0x0,
SCALE_TO_FIT = 0x1,
CENTER = 0x2,
OTHER = 0x3
} SCALE_MODE;

View File

@ -14,29 +14,24 @@
#include "vpx_scale/yv12config.h"
extern void vp8_yv12_scale_or_center
(
YV12_BUFFER_CONFIG *src_yuv_config,
YV12_BUFFER_CONFIG *dst_yuv_config,
int expanded_frame_width,
int expanded_frame_height,
int scaling_mode,
int HScale,
int HRatio,
int VScale,
int VRatio
);
extern void vp8_scale_frame
(
YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
unsigned char *temp_area,
unsigned char temp_height,
unsigned int hscale,
unsigned int hratio,
unsigned int vscale,
unsigned int vratio,
unsigned int interlaced
);
extern void vp8_yv12_scale_or_center(YV12_BUFFER_CONFIG *src_yuv_config,
YV12_BUFFER_CONFIG *dst_yuv_config,
int expanded_frame_width,
int expanded_frame_height,
int scaling_mode,
int HScale,
int HRatio,
int VScale,
int VRatio);
extern void vp8_scale_frame(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
unsigned char *temp_area,
unsigned char temp_height,
unsigned int hscale,
unsigned int hratio,
unsigned int vscale,
unsigned int vratio,
unsigned int interlaced);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -46,46 +46,42 @@ extern void register_mmxscalers(void);
*
****************************************************************************/
void
vp8_scale_machine_specific_config(void)
{
// If MMX supported then set to use MMX versions of functions else
// use original 'C' versions.
int mmx_enabled;
int xmm_enabled;
int wmt_enabled;
vp8_scale_machine_specific_config(void) {
// If MMX supported then set to use MMX versions of functions else
// use original 'C' versions.
int mmx_enabled;
int xmm_enabled;
int wmt_enabled;
vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
if (mmx_enabled || xmm_enabled || wmt_enabled)
{
register_mmxscalers();
}
else
{
vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c;
vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c;
vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c;
vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c;
vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c;
vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c;
vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c;
vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c;
vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c;
vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c;
vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c;
vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c;
vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c;
vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c;
vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c;
if (mmx_enabled || xmm_enabled || wmt_enabled) {
register_mmxscalers();
} else {
vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c;
vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c;
vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c;
vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c;
vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c;
vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c;
vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c;
vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c;
vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c;
vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c;
vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c;
vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c;
vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c;
vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c;
vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c;
vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c;
vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c;
vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c;
vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c;
vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c;
vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c;
vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c;
vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c;
vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c;
vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c;
vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c;
vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c;
vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c;
vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c;
}
}
}

View File

@ -16,54 +16,54 @@ extern "C"
{
#endif
#define VP7BORDERINPIXELS 48
#define VP8BORDERINPIXELS 32
#define VP9BORDERINPIXELS 64
#define VP9_INTERP_EXTEND 4
/*************************************
For INT_YUV:
/*************************************
For INT_YUV:
Y = (R+G*2+B)/4;
U = (R-B)/2;
V = (G*2 - R - B)/4;
And
R = Y+U-V;
G = Y+V;
B = Y-U-V;
************************************/
typedef enum
{
REG_YUV = 0, /* Regular yuv */
INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */
}
YUV_TYPE;
Y = (R+G*2+B)/4;
U = (R-B)/2;
V = (G*2 - R - B)/4;
And
R = Y+U-V;
G = Y+V;
B = Y-U-V;
************************************/
typedef enum
{
REG_YUV = 0, /* Regular yuv */
INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */
}
YUV_TYPE;
typedef struct yv12_buffer_config
{
int y_width;
int y_height;
int y_stride;
/* int yinternal_width; */
typedef struct yv12_buffer_config {
int y_width;
int y_height;
int y_stride;
/* int yinternal_width; */
int uv_width;
int uv_height;
int uv_stride;
/* int uvinternal_width; */
int uv_width;
int uv_height;
int uv_stride;
/* int uvinternal_width; */
unsigned char *y_buffer;
unsigned char *u_buffer;
unsigned char *v_buffer;
unsigned char *y_buffer;
unsigned char *u_buffer;
unsigned char *v_buffer;
unsigned char *buffer_alloc;
int border;
int frame_size;
YUV_TYPE clrtype;
unsigned char *buffer_alloc;
int border;
int frame_size;
YUV_TYPE clrtype;
int corrupted;
int flags;
} YV12_BUFFER_CONFIG;
int corrupted;
int flags;
} YV12_BUFFER_CONFIG;
int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border);
int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf);
int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border);
int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf);
#ifdef __cplusplus
}