Merge "vpx_scale: sync from experimental"

This commit is contained in:
John Koleszar 2012-11-02 09:16:41 -07:00 committed by Gerrit Code Review
commit 3b783d2217
12 changed files with 2640 additions and 2822 deletions

View File

@ -8,15 +8,14 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#include "vpx_rtcd.h" #include "./vpx_rtcd.h"
extern void vp8_yv12_copy_frame_func_neon(struct yv12_buffer_config *src_ybc, extern void vp8_yv12_copy_frame_func_neon(struct yv12_buffer_config *src_ybc,
struct yv12_buffer_config *dst_ybc); struct yv12_buffer_config *dst_ybc);
void vp8_yv12_copy_frame_neon(struct yv12_buffer_config *src_ybc, void vp8_yv12_copy_frame_neon(struct yv12_buffer_config *src_ybc,
struct yv12_buffer_config *dst_ybc) struct yv12_buffer_config *dst_ybc) {
{ vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc);
vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc);
vp8_yv12_extend_frame_borders_neon(dst_ybc); vp8_yv12_extend_frame_borders_neon(dst_ybc);
} }

View File

@ -46,557 +46,524 @@ static float a = -0.6;
// 3 2 // 3 2
// C0 = a*t - a*t // C0 = a*t - a*t
// //
static short c0_fixed(unsigned int t) static short c0_fixed(unsigned int t) {
{ // put t in Q16 notation
// put t in Q16 notation unsigned short v1, v2;
unsigned short v1, v2;
// Q16 // Q16
v1 = (a_i * t) >> 16; v1 = (a_i * t) >> 16;
v1 = (v1 * t) >> 16; v1 = (v1 * t) >> 16;
// Q16 // Q16
v2 = (a_i * t) >> 16; v2 = (a_i * t) >> 16;
v2 = (v2 * t) >> 16; v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16; v2 = (v2 * t) >> 16;
// Q12 // Q12
return -((v1 - v2) >> 4); return -((v1 - v2) >> 4);
} }
// 2 3 // 2 3
// C1 = a*t + (3-2*a)*t - (2-a)*t // C1 = a*t + (3-2*a)*t - (2-a)*t
// //
static short c1_fixed(unsigned int t) static short c1_fixed(unsigned int t) {
{ unsigned short v1, v2, v3;
unsigned short v1, v2, v3; unsigned short two, three;
unsigned short two, three;
// Q16 // Q16
v1 = (a_i * t) >> 16; v1 = (a_i * t) >> 16;
// Q13 // Q13
two = 2 << 13; two = 2 << 13;
v2 = two - (a_i >> 3); v2 = two - (a_i >> 3);
v2 = (v2 * t) >> 16; v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16; v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16; v2 = (v2 * t) >> 16;
// Q13 // Q13
three = 3 << 13; three = 3 << 13;
v3 = three - (2 * (a_i >> 3)); v3 = three - (2 * (a_i >> 3));
v3 = (v3 * t) >> 16; v3 = (v3 * t) >> 16;
v3 = (v3 * t) >> 16; v3 = (v3 * t) >> 16;
// Q12 // Q12
return (((v1 >> 3) - v2 + v3) >> 1); return (((v1 >> 3) - v2 + v3) >> 1);
} }
// 2 3 // 2 3
// C2 = 1 - (3-a)*t + (2-a)*t // C2 = 1 - (3-a)*t + (2-a)*t
// //
static short c2_fixed(unsigned int t) static short c2_fixed(unsigned int t) {
{ unsigned short v1, v2, v3;
unsigned short v1, v2, v3; unsigned short two, three;
unsigned short two, three;
// Q13 // Q13
v1 = 1 << 13; v1 = 1 << 13;
// Q13 // Q13
three = 3 << 13; three = 3 << 13;
v2 = three - (a_i >> 3); v2 = three - (a_i >> 3);
v2 = (v2 * t) >> 16; v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16; v2 = (v2 * t) >> 16;
// Q13 // Q13
two = 2 << 13; two = 2 << 13;
v3 = two - (a_i >> 3); v3 = two - (a_i >> 3);
v3 = (v3 * t) >> 16; v3 = (v3 * t) >> 16;
v3 = (v3 * t) >> 16; v3 = (v3 * t) >> 16;
v3 = (v3 * t) >> 16; v3 = (v3 * t) >> 16;
// Q12 // Q12
return (v1 - v2 + v3) >> 1; return (v1 - v2 + v3) >> 1;
} }
// 2 3 // 2 3
// C3 = a*t - 2*a*t + a*t // C3 = a*t - 2*a*t + a*t
// //
static short c3_fixed(unsigned int t) static short c3_fixed(unsigned int t) {
{ int v1, v2, v3;
int v1, v2, v3;
// Q16 // Q16
v1 = (a_i * t) >> 16; v1 = (a_i * t) >> 16;
// Q15 // Q15
v2 = 2 * (a_i >> 1); v2 = 2 * (a_i >> 1);
v2 = (v2 * t) >> 16; v2 = (v2 * t) >> 16;
v2 = (v2 * t) >> 16; v2 = (v2 * t) >> 16;
// Q16 // Q16
v3 = (a_i * t) >> 16; v3 = (a_i * t) >> 16;
v3 = (v3 * t) >> 16; v3 = (v3 * t) >> 16;
v3 = (v3 * t) >> 16; v3 = (v3 * t) >> 16;
// Q12 // Q12
return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3); return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3);
} }
#else #else
// 3 2 // 3 2
// C0 = -a*t + a*t // C0 = -a*t + a*t
// //
float C0(float t) float C0(float t) {
{ return -a * t * t * t + a * t * t;
return -a * t * t * t + a * t * t;
} }
// 2 3 // 2 3
// C1 = -a*t + (2*a+3)*t - (a+2)*t // C1 = -a*t + (2*a+3)*t - (a+2)*t
// //
float C1(float t) float C1(float t) {
{ return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t;
return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t;
} }
// 2 3 // 2 3
// C2 = 1 - (a+3)*t + (a+2)*t // C2 = 1 - (a+3)*t + (a+2)*t
// //
float C2(float t) float C2(float t) {
{ return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f;
return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f;
} }
// 2 3 // 2 3
// C3 = a*t - 2*a*t + a*t // C3 = a*t - 2*a*t + a*t
// //
float C3(float t) float C3(float t) {
{ return a * t * t * t - 2.0f * a * t * t + a * t;
return a * t * t * t - 2.0f * a * t * t + a * t;
} }
#endif #endif
#if 0 #if 0
int compare_real_fixed() int compare_real_fixed() {
{ int i, errors = 0;
int i, errors = 0; float mult = 1.0 / 10000.0;
float mult = 1.0 / 10000.0; unsigned int fixed_mult = mult * 4294967296;// 65536;
unsigned int fixed_mult = mult * 4294967296;//65536; unsigned int phase_offset_int;
unsigned int phase_offset_int; float phase_offset_real;
float phase_offset_real;
for (i = 0; i < 10000; i++) for (i = 0; i < 10000; i++) {
{ int fixed0, fixed1, fixed2, fixed3, fixed_total;
int fixed0, fixed1, fixed2, fixed3, fixed_total; int real0, real1, real2, real3, real_total;
int real0, real1, real2, real3, real_total;
phase_offset_real = (float)i * mult; phase_offset_real = (float)i * mult;
phase_offset_int = (fixed_mult * i) >> 16; phase_offset_int = (fixed_mult * i) >> 16;
// phase_offset_int = phase_offset_real * 65536; // phase_offset_int = phase_offset_real * 65536;
fixed0 = c0_fixed(phase_offset_int); fixed0 = c0_fixed(phase_offset_int);
real0 = C0(phase_offset_real) * 4096.0; real0 = C0(phase_offset_real) * 4096.0;
if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1))) if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1)))
errors++; errors++;
fixed1 = c1_fixed(phase_offset_int); fixed1 = c1_fixed(phase_offset_int);
real1 = C1(phase_offset_real) * 4096.0; real1 = C1(phase_offset_real) * 4096.0;
if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1))) if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1)))
errors++; errors++;
fixed2 = c2_fixed(phase_offset_int); fixed2 = c2_fixed(phase_offset_int);
real2 = C2(phase_offset_real) * 4096.0; real2 = C2(phase_offset_real) * 4096.0;
if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1))) if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1)))
errors++; errors++;
fixed3 = c3_fixed(phase_offset_int); fixed3 = c3_fixed(phase_offset_int);
real3 = C3(phase_offset_real) * 4096.0; real3 = C3(phase_offset_real) * 4096.0;
if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1))) if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1)))
errors++; errors++;
fixed_total = fixed0 + fixed1 + fixed2 + fixed3; fixed_total = fixed0 + fixed1 + fixed2 + fixed3;
real_total = real0 + real1 + real2 + real3; real_total = real0 + real1 + real2 + real3;
if ((fixed_total > 4097) || (fixed_total < 4094)) if ((fixed_total > 4097) || (fixed_total < 4094))
errors ++; errors++;
if ((real_total > 4097) || (real_total < 4095)) if ((real_total > 4097) || (real_total < 4095))
errors ++; errors++;
} }
return errors; return errors;
} }
#endif #endif
// Find greatest common denominator between two integers. Method used here is // Find greatest common denominator between two integers. Method used here is
// slow compared to Euclid's algorithm, but does not require any division. // slow compared to Euclid's algorithm, but does not require any division.
int gcd(int a, int b) int gcd(int a, int b) {
{ // Problem with this algorithm is that if a or b = 0 this function
// Problem with this algorithm is that if a or b = 0 this function // will never exit. Don't want to return 0 because any computation
// will never exit. Don't want to return 0 because any computation // that was based on a common denoninator and tried to reduce by
// that was based on a common denoninator and tried to reduce by // dividing by 0 would fail. Best solution that could be thought of
// dividing by 0 would fail. Best solution that could be thought of // would to be fail by returing a 1;
// would to be fail by returing a 1; if (a <= 0 || b <= 0)
if (a <= 0 || b <= 0) return 1;
return 1;
while (a != b) while (a != b) {
{ if (b > a)
if (b > a) b = b - a;
b = b - a; else {
else int tmp = a;// swap large and
{ a = b; // small
int tmp = a;//swap large and b = tmp;
a = b; //small
b = tmp;
}
} }
}
return b; return b;
} }
void bicubic_coefficient_init() void bicubic_coefficient_init() {
{ vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); g_first_time = 0;
g_first_time = 0;
} }
void bicubic_coefficient_destroy() void bicubic_coefficient_destroy() {
{ if (!g_first_time) {
if (!g_first_time)
{
vpx_free(g_b_scaler.l_w);
vpx_free(g_b_scaler.l_h);
vpx_free(g_b_scaler.l_h_uv);
vpx_free(g_b_scaler.c_w);
vpx_free(g_b_scaler.c_h);
vpx_free(g_b_scaler.c_h_uv);
vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
}
}
// Create the coeffients that will be used for the cubic interpolation.
// Because scaling does not have to be equal in the vertical and horizontal
// regimes the phase offsets will be different. There are 4 coefficents
// for each point, two on each side. The layout is that there are the
// 4 coefficents for each phase in the array and then the next phase.
int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height)
{
int i;
#ifdef FIXED_POINT
int phase_offset_int;
unsigned int fixed_mult;
int product_val = 0;
#else
float phase_offset;
#endif
int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv;
if (g_first_time)
bicubic_coefficient_init();
// check to see if the coefficents have already been set up correctly
if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height)
&& (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height))
return 0;
g_b_scaler.in_width = in_width;
g_b_scaler.in_height = in_height;
g_b_scaler.out_width = out_width;
g_b_scaler.out_height = out_height;
// Don't want to allow crazy scaling, just try and prevent a catastrophic
// failure here. Want to fail after setting the member functions so if
// if the scaler is called the member functions will not scale.
if (out_width <= 0 || out_height <= 0)
return -1;
// reduce in/out width and height ratios using the gcd
gcd_w = gcd(out_width, in_width);
gcd_h = gcd(out_height, in_height);
gcd_h_uv = gcd(out_height, in_height / 2);
// the numerator width and height are to be saved in
// globals so they can be used during the scaling process
// without having to be recalculated.
g_b_scaler.nw = out_width / gcd_w;
d_w = in_width / gcd_w;
g_b_scaler.nh = out_height / gcd_h;
d_h = in_height / gcd_h;
g_b_scaler.nh_uv = out_height / gcd_h_uv;
d_h_uv = (in_height / 2) / gcd_h_uv;
// allocate memory for the coefficents
vpx_free(g_b_scaler.l_w); vpx_free(g_b_scaler.l_w);
vpx_free(g_b_scaler.l_h); vpx_free(g_b_scaler.l_h);
vpx_free(g_b_scaler.l_h_uv); vpx_free(g_b_scaler.l_h_uv);
g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);
vpx_free(g_b_scaler.c_w); vpx_free(g_b_scaler.c_w);
vpx_free(g_b_scaler.c_h); vpx_free(g_b_scaler.c_h);
vpx_free(g_b_scaler.c_h_uv); vpx_free(g_b_scaler.c_h_uv);
g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2); vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2); }
g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2); }
g_b_scaler.hbuf = g_hbuf; // Create the coeffients that will be used for the cubic interpolation.
g_b_scaler.hbuf_uv = g_hbuf_uv; // Because scaling does not have to be equal in the vertical and horizontal
// regimes the phase offsets will be different. There are 4 coefficents
// Set up polyphase filter taps. This needs to be done before // for each point, two on each side. The layout is that there are the
// the scaling because of the floating point math required. The // 4 coefficents for each phase in the array and then the next phase.
// coefficients are multiplied by 2^12 so that fixed point math int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) {
// can be used in the main scaling loop. int i;
#ifdef FIXED_POINT #ifdef FIXED_POINT
fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296; int phase_offset_int;
unsigned int fixed_mult;
int product_val = 0;
#else
float phase_offset;
#endif
int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv;
product_val = 0; if (g_first_time)
bicubic_coefficient_init();
for (i = 0; i < g_b_scaler.nw; i++)
{
if (product_val > g_b_scaler.nw)
product_val -= g_b_scaler.nw;
phase_offset_int = (fixed_mult * product_val) >> 16;
g_b_scaler.c_w[i*4] = c3_fixed(phase_offset_int);
g_b_scaler.c_w[i*4+1] = c2_fixed(phase_offset_int);
g_b_scaler.c_w[i*4+2] = c1_fixed(phase_offset_int);
g_b_scaler.c_w[i*4+3] = c0_fixed(phase_offset_int);
product_val += d_w;
}
fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296; // check to see if the coefficents have already been set up correctly
if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height)
&& (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height))
return 0;
product_val = 0; g_b_scaler.in_width = in_width;
g_b_scaler.in_height = in_height;
g_b_scaler.out_width = out_width;
g_b_scaler.out_height = out_height;
for (i = 0; i < g_b_scaler.nh; i++) // Don't want to allow crazy scaling, just try and prevent a catastrophic
{ // failure here. Want to fail after setting the member functions so if
if (product_val > g_b_scaler.nh) // if the scaler is called the member functions will not scale.
product_val -= g_b_scaler.nh; if (out_width <= 0 || out_height <= 0)
return -1;
phase_offset_int = (fixed_mult * product_val) >> 16; // reduce in/out width and height ratios using the gcd
gcd_w = gcd(out_width, in_width);
gcd_h = gcd(out_height, in_height);
gcd_h_uv = gcd(out_height, in_height / 2);
g_b_scaler.c_h[i*4] = c0_fixed(phase_offset_int); // the numerator width and height are to be saved in
g_b_scaler.c_h[i*4+1] = c1_fixed(phase_offset_int); // globals so they can be used during the scaling process
g_b_scaler.c_h[i*4+2] = c2_fixed(phase_offset_int); // without having to be recalculated.
g_b_scaler.c_h[i*4+3] = c3_fixed(phase_offset_int); g_b_scaler.nw = out_width / gcd_w;
d_w = in_width / gcd_w;
product_val += d_h; g_b_scaler.nh = out_height / gcd_h;
} d_h = in_height / gcd_h;
fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296; g_b_scaler.nh_uv = out_height / gcd_h_uv;
d_h_uv = (in_height / 2) / gcd_h_uv;
product_val = 0; // allocate memory for the coefficents
vpx_free(g_b_scaler.l_w);
for (i = 0; i < g_b_scaler.nh_uv; i++) vpx_free(g_b_scaler.l_h);
{
if (product_val > g_b_scaler.nh_uv)
product_val -= g_b_scaler.nh_uv;
phase_offset_int = (fixed_mult * product_val) >> 16; vpx_free(g_b_scaler.l_h_uv);
g_b_scaler.c_h_uv[i*4] = c0_fixed(phase_offset_int); g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
g_b_scaler.c_h_uv[i*4+1] = c1_fixed(phase_offset_int); g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
g_b_scaler.c_h_uv[i*4+2] = c2_fixed(phase_offset_int); g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);
g_b_scaler.c_h_uv[i*4+3] = c3_fixed(phase_offset_int);
product_val += d_h_uv; vpx_free(g_b_scaler.c_w);
}
vpx_free(g_b_scaler.c_h);
vpx_free(g_b_scaler.c_h_uv);
g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2);
g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2);
g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2);
g_b_scaler.hbuf = g_hbuf;
g_b_scaler.hbuf_uv = g_hbuf_uv;
// Set up polyphase filter taps. This needs to be done before
// the scaling because of the floating point math required. The
// coefficients are multiplied by 2^12 so that fixed point math
// can be used in the main scaling loop.
#ifdef FIXED_POINT
fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296;
product_val = 0;
for (i = 0; i < g_b_scaler.nw; i++) {
if (product_val > g_b_scaler.nw)
product_val -= g_b_scaler.nw;
phase_offset_int = (fixed_mult * product_val) >> 16;
g_b_scaler.c_w[i * 4] = c3_fixed(phase_offset_int);
g_b_scaler.c_w[i * 4 + 1] = c2_fixed(phase_offset_int);
g_b_scaler.c_w[i * 4 + 2] = c1_fixed(phase_offset_int);
g_b_scaler.c_w[i * 4 + 3] = c0_fixed(phase_offset_int);
product_val += d_w;
}
fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296;
product_val = 0;
for (i = 0; i < g_b_scaler.nh; i++) {
if (product_val > g_b_scaler.nh)
product_val -= g_b_scaler.nh;
phase_offset_int = (fixed_mult * product_val) >> 16;
g_b_scaler.c_h[i * 4] = c0_fixed(phase_offset_int);
g_b_scaler.c_h[i * 4 + 1] = c1_fixed(phase_offset_int);
g_b_scaler.c_h[i * 4 + 2] = c2_fixed(phase_offset_int);
g_b_scaler.c_h[i * 4 + 3] = c3_fixed(phase_offset_int);
product_val += d_h;
}
fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296;
product_val = 0;
for (i = 0; i < g_b_scaler.nh_uv; i++) {
if (product_val > g_b_scaler.nh_uv)
product_val -= g_b_scaler.nh_uv;
phase_offset_int = (fixed_mult * product_val) >> 16;
g_b_scaler.c_h_uv[i * 4] = c0_fixed(phase_offset_int);
g_b_scaler.c_h_uv[i * 4 + 1] = c1_fixed(phase_offset_int);
g_b_scaler.c_h_uv[i * 4 + 2] = c2_fixed(phase_offset_int);
g_b_scaler.c_h_uv[i * 4 + 3] = c3_fixed(phase_offset_int);
product_val += d_h_uv;
}
#else #else
for (i = 0; i < g_nw; i++) for (i = 0; i < g_nw; i++) {
{ phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw;
phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw; g_c_w[i * 4] = (C3(phase_offset) * 4096.0);
g_c_w[i*4] = (C3(phase_offset) * 4096.0); g_c_w[i * 4 + 1] = (C2(phase_offset) * 4096.0);
g_c_w[i*4+1] = (C2(phase_offset) * 4096.0); g_c_w[i * 4 + 2] = (C1(phase_offset) * 4096.0);
g_c_w[i*4+2] = (C1(phase_offset) * 4096.0); g_c_w[i * 4 + 3] = (C0(phase_offset) * 4096.0);
g_c_w[i*4+3] = (C0(phase_offset) * 4096.0); }
}
for (i = 0; i < g_nh; i++) for (i = 0; i < g_nh; i++) {
{ phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh;
phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh; g_c_h[i * 4] = (C0(phase_offset) * 4096.0);
g_c_h[i*4] = (C0(phase_offset) * 4096.0); g_c_h[i * 4 + 1] = (C1(phase_offset) * 4096.0);
g_c_h[i*4+1] = (C1(phase_offset) * 4096.0); g_c_h[i * 4 + 2] = (C2(phase_offset) * 4096.0);
g_c_h[i*4+2] = (C2(phase_offset) * 4096.0); g_c_h[i * 4 + 3] = (C3(phase_offset) * 4096.0);
g_c_h[i*4+3] = (C3(phase_offset) * 4096.0); }
}
for (i = 0; i < g_nh_uv; i++) for (i = 0; i < g_nh_uv; i++) {
{ phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv;
phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv; g_c_h_uv[i * 4] = (C0(phase_offset) * 4096.0);
g_c_h_uv[i*4] = (C0(phase_offset) * 4096.0); g_c_h_uv[i * 4 + 1] = (C1(phase_offset) * 4096.0);
g_c_h_uv[i*4+1] = (C1(phase_offset) * 4096.0); g_c_h_uv[i * 4 + 2] = (C2(phase_offset) * 4096.0);
g_c_h_uv[i*4+2] = (C2(phase_offset) * 4096.0); g_c_h_uv[i * 4 + 3] = (C3(phase_offset) * 4096.0);
g_c_h_uv[i*4+3] = (C3(phase_offset) * 4096.0); }
}
#endif #endif
// Create an array that corresponds input lines to output lines. // Create an array that corresponds input lines to output lines.
// This doesn't require floating point math, but it does require // This doesn't require floating point math, but it does require
// a division and because hardware division is not present that // a division and because hardware division is not present that
// is a call. // is a call.
for (i = 0; i < out_width; i++) for (i = 0; i < out_width; i++) {
{ g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw;
g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw;
if ((g_b_scaler.l_w[i] + 2) <= in_width) if ((g_b_scaler.l_w[i] + 2) <= in_width)
g_b_scaler.max_usable_out_width = i; g_b_scaler.max_usable_out_width = i;
} }
for (i = 0; i < out_height + 1; i++) for (i = 0; i < out_height + 1; i++) {
{ g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh;
g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh; g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv;
g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv; }
}
return 0; return 0;
} }
int bicubic_scale(int in_width, int in_height, int in_stride, int bicubic_scale(int in_width, int in_height, int in_stride,
int out_width, int out_height, int out_stride, int out_width, int out_height, int out_stride,
unsigned char *input_image, unsigned char *output_image) unsigned char *input_image, unsigned char *output_image) {
{ short *RESTRICT l_w, * RESTRICT l_h;
short *RESTRICT l_w, * RESTRICT l_h; short *RESTRICT c_w, * RESTRICT c_h;
short *RESTRICT c_w, * RESTRICT c_h; unsigned char *RESTRICT ip, * RESTRICT op;
unsigned char *RESTRICT ip, * RESTRICT op; unsigned char *RESTRICT hbuf;
unsigned char *RESTRICT hbuf; int h, w, lw, lh;
int h, w, lw, lh; int temp_sum;
int temp_sum; int phase_offset_w, phase_offset_h;
int phase_offset_w, phase_offset_h;
c_w = g_b_scaler.c_w; c_w = g_b_scaler.c_w;
c_h = g_b_scaler.c_h; c_h = g_b_scaler.c_h;
op = output_image; op = output_image;
l_w = g_b_scaler.l_w; l_w = g_b_scaler.l_w;
l_h = g_b_scaler.l_h; l_h = g_b_scaler.l_h;
phase_offset_h = 0; phase_offset_h = 0;
for (h = 0; h < out_height; h++) for (h = 0; h < out_height; h++) {
{ // select the row to work on
// select the row to work on lh = l_h[h];
lh = l_h[h]; ip = input_image + (in_stride * lh);
ip = input_image + (in_stride * lh);
// vp8_filter the row vertically into an temporary buffer. // vp8_filter the row vertically into an temporary buffer.
// If the phase offset == 0 then all the multiplication // If the phase offset == 0 then all the multiplication
// is going to result in the output equalling the input. // is going to result in the output equalling the input.
// So instead point the temporary buffer to the input. // So instead point the temporary buffer to the input.
// Also handle the boundry condition of not being able to // Also handle the boundry condition of not being able to
// filter that last lines. // filter that last lines.
if (phase_offset_h && (lh < in_height - 2)) if (phase_offset_h && (lh < in_height - 2)) {
{ hbuf = g_b_scaler.hbuf;
hbuf = g_b_scaler.hbuf;
for (w = 0; w < in_width; w++) for (w = 0; w < in_width; w++) {
{ temp_sum = c_h[phase_offset_h * 4 + 3] * ip[w - in_stride];
temp_sum = c_h[phase_offset_h*4+3] * ip[w - in_stride]; temp_sum += c_h[phase_offset_h * 4 + 2] * ip[w];
temp_sum += c_h[phase_offset_h*4+2] * ip[w]; temp_sum += c_h[phase_offset_h * 4 + 1] * ip[w + in_stride];
temp_sum += c_h[phase_offset_h*4+1] * ip[w + in_stride]; temp_sum += c_h[phase_offset_h * 4] * ip[w + 2 * in_stride];
temp_sum += c_h[phase_offset_h*4] * ip[w + 2*in_stride];
hbuf[w] = temp_sum >> 12; hbuf[w] = temp_sum >> 12;
} }
} } else
else hbuf = ip;
hbuf = ip;
// increase the phase offset for the next time around. // increase the phase offset for the next time around.
if (++phase_offset_h >= g_b_scaler.nh) if (++phase_offset_h >= g_b_scaler.nh)
phase_offset_h = 0; phase_offset_h = 0;
// now filter and expand it horizontally into the final // now filter and expand it horizontally into the final
// output buffer // output buffer
phase_offset_w = 0;
for (w = 0; w < out_width; w++) {
// get the index to use to expand the image
lw = l_w[w];
temp_sum = c_w[phase_offset_w * 4] * hbuf[lw - 1];
temp_sum += c_w[phase_offset_w * 4 + 1] * hbuf[lw];
temp_sum += c_w[phase_offset_w * 4 + 2] * hbuf[lw + 1];
temp_sum += c_w[phase_offset_w * 4 + 3] * hbuf[lw + 2];
temp_sum = temp_sum >> 12;
if (++phase_offset_w >= g_b_scaler.nw)
phase_offset_w = 0; phase_offset_w = 0;
for (w = 0; w < out_width; w++) // boundry conditions
{ if ((lw + 2) >= in_width)
// get the index to use to expand the image temp_sum = hbuf[lw];
lw = l_w[w];
temp_sum = c_w[phase_offset_w*4] * hbuf[lw - 1]; if (lw == 0)
temp_sum += c_w[phase_offset_w*4+1] * hbuf[lw]; temp_sum = hbuf[0];
temp_sum += c_w[phase_offset_w*4+2] * hbuf[lw + 1];
temp_sum += c_w[phase_offset_w*4+3] * hbuf[lw + 2];
temp_sum = temp_sum >> 12;
if (++phase_offset_w >= g_b_scaler.nw) op[w] = temp_sum;
phase_offset_w = 0;
// boundry conditions
if ((lw + 2) >= in_width)
temp_sum = hbuf[lw];
if (lw == 0)
temp_sum = hbuf[0];
op[w] = temp_sum;
}
op += out_stride;
} }
return 0; op += out_stride;
}
return 0;
} }
void bicubic_scale_frame_reset() void bicubic_scale_frame_reset() {
{ g_b_scaler.out_width = 0;
g_b_scaler.out_width = 0; g_b_scaler.out_height = 0;
g_b_scaler.out_height = 0;
} }
void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
int new_width, int new_height) int new_width, int new_height) {
{
dst->y_width = new_width; dst->y_width = new_width;
dst->y_height = new_height; dst->y_height = new_height;
dst->uv_width = new_width / 2; dst->uv_width = new_width / 2;
dst->uv_height = new_height / 2; dst->uv_height = new_height / 2;
dst->y_stride = dst->y_width; dst->y_stride = dst->y_width;
dst->uv_stride = dst->uv_width; dst->uv_stride = dst->uv_width;
bicubic_scale(src->y_width, src->y_height, src->y_stride, bicubic_scale(src->y_width, src->y_height, src->y_stride,
new_width, new_height, dst->y_stride, new_width, new_height, dst->y_stride,
src->y_buffer, dst->y_buffer); src->y_buffer, dst->y_buffer);
bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
new_width / 2, new_height / 2, dst->uv_stride, new_width / 2, new_height / 2, dst->uv_stride,
src->u_buffer, dst->u_buffer); src->u_buffer, dst->u_buffer);
bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
new_width / 2, new_height / 2, dst->uv_stride, new_width / 2, new_height / 2, dst->uv_stride,
src->v_buffer, dst->v_buffer); src->v_buffer, dst->v_buffer);
} }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -20,81 +20,73 @@
* *
****************************************************************************/ ****************************************************************************/
int int
vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) {
{ if (ybf) {
if (ybf) vpx_free(ybf->buffer_alloc);
{
vpx_free(ybf->buffer_alloc);
/* buffer_alloc isn't accessed by most functions. Rather y_buffer, /* buffer_alloc isn't accessed by most functions. Rather y_buffer,
u_buffer and v_buffer point to buffer_alloc and are used. Clear out u_buffer and v_buffer point to buffer_alloc and are used. Clear out
all of this so that a freed pointer isn't inadvertently used */ all of this so that a freed pointer isn't inadvertently used */
vpx_memset (ybf, 0, sizeof (YV12_BUFFER_CONFIG)); vpx_memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG));
} } else {
else return -1;
{ }
return -1;
}
return 0; return 0;
} }
/**************************************************************************** /****************************************************************************
* *
****************************************************************************/ ****************************************************************************/
int int
vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) {
{ /*NOTE:*/
/*NOTE:*/
if (ybf) if (ybf) {
{ int y_stride = ((width + 2 * border) + 31) & ~31;
int y_stride = ((width + 2 * border) + 31) & ~31; int yplane_size = (height + 2 * border) * y_stride;
int yplane_size = (height + 2 * border) * y_stride; int uv_width = width >> 1;
int uv_width = width >> 1; int uv_height = height >> 1;
int uv_height = height >> 1; /** There is currently a bunch of code which assumes
/** There is currently a bunch of code which assumes * uv_stride == y_stride/2, so enforce this here. */
* uv_stride == y_stride/2, so enforce this here. */ int uv_stride = y_stride >> 1;
int uv_stride = y_stride >> 1; int uvplane_size = (uv_height + border) * uv_stride;
int uvplane_size = (uv_height + border) * uv_stride;
vp8_yv12_de_alloc_frame_buffer(ybf); vp8_yv12_de_alloc_frame_buffer(ybf);
/** Only support allocating buffers that have a height and width that /** Only support allocating buffers that have a height and width that
* are multiples of 16, and a border that's a multiple of 32. * are multiples of 16, and a border that's a multiple of 32.
* The border restriction is required to get 16-byte alignment of the * The border restriction is required to get 16-byte alignment of the
* start of the chroma rows without intoducing an arbitrary gap * start of the chroma rows without intoducing an arbitrary gap
* between planes, which would break the semantics of things like * between planes, which would break the semantics of things like
* vpx_img_set_rect(). */ * vpx_img_set_rect(). */
if ((width & 0xf) | (height & 0xf) | (border & 0x1f)) if ((width & 0xf) | (height & 0xf) | (border & 0x1f))
return -3; return -3;
ybf->y_width = width; ybf->y_width = width;
ybf->y_height = height; ybf->y_height = height;
ybf->y_stride = y_stride; ybf->y_stride = y_stride;
ybf->uv_width = uv_width; ybf->uv_width = uv_width;
ybf->uv_height = uv_height; ybf->uv_height = uv_height;
ybf->uv_stride = uv_stride; ybf->uv_stride = uv_stride;
ybf->border = border; ybf->border = border;
ybf->frame_size = yplane_size + 2 * uvplane_size; ybf->frame_size = yplane_size + 2 * uvplane_size;
ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size); ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size);
if (ybf->buffer_alloc == NULL) if (ybf->buffer_alloc == NULL)
return -1; return -1;
ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border;
ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2;
ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2;
ybf->corrupted = 0; /* assume not currupted by errors */ ybf->corrupted = 0; /* assume not currupted by errors */
} } else {
else return -2;
{ }
return -2;
}
return 0; return 0;
} }

View File

@ -21,184 +21,174 @@
* *
****************************************************************************/ ****************************************************************************/
void void
vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) {
{ int i;
int i; unsigned char *src_ptr1, *src_ptr2;
unsigned char *src_ptr1, *src_ptr2; unsigned char *dest_ptr1, *dest_ptr2;
unsigned char *dest_ptr1, *dest_ptr2;
unsigned int Border; unsigned int Border;
int plane_stride; int plane_stride;
int plane_height; int plane_height;
int plane_width; int plane_width;
/***********/ /***********/
/* Y Plane */ /* Y Plane */
/***********/ /***********/
Border = ybf->border; Border = ybf->border;
plane_stride = ybf->y_stride; plane_stride = ybf->y_stride;
plane_height = ybf->y_height; plane_height = ybf->y_height;
plane_width = ybf->y_width; plane_width = ybf->y_width;
/* copy the left and right most columns out */ /* copy the left and right most columns out */
src_ptr1 = ybf->y_buffer; src_ptr1 = ybf->y_buffer;
src_ptr2 = src_ptr1 + plane_width - 1; src_ptr2 = src_ptr1 + plane_width - 1;
dest_ptr1 = src_ptr1 - Border; dest_ptr1 = src_ptr1 - Border;
dest_ptr2 = src_ptr2 + 1; dest_ptr2 = src_ptr2 + 1;
for (i = 0; i < plane_height; i++) for (i = 0; i < plane_height; i++) {
{ vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr1, src_ptr1[0], Border); vpx_memset(dest_ptr2, src_ptr2[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border); src_ptr1 += plane_stride;
src_ptr1 += plane_stride; src_ptr2 += plane_stride;
src_ptr2 += plane_stride; dest_ptr1 += plane_stride;
dest_ptr1 += plane_stride; dest_ptr2 += plane_stride;
dest_ptr2 += plane_stride; }
}
/* Now copy the top and bottom source lines into each line of the respective borders */ /* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = ybf->y_buffer - Border; src_ptr1 = ybf->y_buffer - Border;
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
dest_ptr1 = src_ptr1 - (Border * plane_stride); dest_ptr1 = src_ptr1 - (Border * plane_stride);
dest_ptr2 = src_ptr2 + plane_stride; dest_ptr2 = src_ptr2 + plane_stride;
for (i = 0; i < (int)Border; i++) for (i = 0; i < (int)Border; i++) {
{ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); dest_ptr1 += plane_stride;
dest_ptr1 += plane_stride; dest_ptr2 += plane_stride;
dest_ptr2 += plane_stride; }
}
/***********/ /***********/
/* U Plane */ /* U Plane */
/***********/ /***********/
plane_stride = ybf->uv_stride; plane_stride = ybf->uv_stride;
plane_height = ybf->uv_height; plane_height = ybf->uv_height;
plane_width = ybf->uv_width; plane_width = ybf->uv_width;
Border /= 2; Border /= 2;
/* copy the left and right most columns out */ /* copy the left and right most columns out */
src_ptr1 = ybf->u_buffer; src_ptr1 = ybf->u_buffer;
src_ptr2 = src_ptr1 + plane_width - 1; src_ptr2 = src_ptr1 + plane_width - 1;
dest_ptr1 = src_ptr1 - Border; dest_ptr1 = src_ptr1 - Border;
dest_ptr2 = src_ptr2 + 1; dest_ptr2 = src_ptr2 + 1;
for (i = 0; i < plane_height; i++) for (i = 0; i < plane_height; i++) {
{ vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr1, src_ptr1[0], Border); vpx_memset(dest_ptr2, src_ptr2[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border); src_ptr1 += plane_stride;
src_ptr1 += plane_stride; src_ptr2 += plane_stride;
src_ptr2 += plane_stride; dest_ptr1 += plane_stride;
dest_ptr1 += plane_stride; dest_ptr2 += plane_stride;
dest_ptr2 += plane_stride; }
}
/* Now copy the top and bottom source lines into each line of the respective borders */ /* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = ybf->u_buffer - Border; src_ptr1 = ybf->u_buffer - Border;
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
dest_ptr1 = src_ptr1 - (Border * plane_stride); dest_ptr1 = src_ptr1 - (Border * plane_stride);
dest_ptr2 = src_ptr2 + plane_stride; dest_ptr2 = src_ptr2 + plane_stride;
for (i = 0; i < (int)(Border); i++) for (i = 0; i < (int)(Border); i++) {
{ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); dest_ptr1 += plane_stride;
dest_ptr1 += plane_stride; dest_ptr2 += plane_stride;
dest_ptr2 += plane_stride; }
}
/***********/ /***********/
/* V Plane */ /* V Plane */
/***********/ /***********/
/* copy the left and right most columns out */ /* copy the left and right most columns out */
src_ptr1 = ybf->v_buffer; src_ptr1 = ybf->v_buffer;
src_ptr2 = src_ptr1 + plane_width - 1; src_ptr2 = src_ptr1 + plane_width - 1;
dest_ptr1 = src_ptr1 - Border; dest_ptr1 = src_ptr1 - Border;
dest_ptr2 = src_ptr2 + 1; dest_ptr2 = src_ptr2 + 1;
for (i = 0; i < plane_height; i++) for (i = 0; i < plane_height; i++) {
{ vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr1, src_ptr1[0], Border); vpx_memset(dest_ptr2, src_ptr2[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border); src_ptr1 += plane_stride;
src_ptr1 += plane_stride; src_ptr2 += plane_stride;
src_ptr2 += plane_stride; dest_ptr1 += plane_stride;
dest_ptr1 += plane_stride; dest_ptr2 += plane_stride;
dest_ptr2 += plane_stride; }
}
/* Now copy the top and bottom source lines into each line of the respective borders */ /* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = ybf->v_buffer - Border; src_ptr1 = ybf->v_buffer - Border;
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
dest_ptr1 = src_ptr1 - (Border * plane_stride); dest_ptr1 = src_ptr1 - (Border * plane_stride);
dest_ptr2 = src_ptr2 + plane_stride; dest_ptr2 = src_ptr2 + plane_stride;
for (i = 0; i < (int)(Border); i++) for (i = 0; i < (int)(Border); i++) {
{ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); dest_ptr1 += plane_stride;
dest_ptr1 += plane_stride; dest_ptr2 += plane_stride;
dest_ptr2 += plane_stride; }
}
} }
static void static void
extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) {
{ int i;
int i; unsigned char *src_ptr1, *src_ptr2;
unsigned char *src_ptr1, *src_ptr2; unsigned char *dest_ptr1, *dest_ptr2;
unsigned char *dest_ptr1, *dest_ptr2;
unsigned int Border; unsigned int Border;
int plane_stride; int plane_stride;
int plane_height; int plane_height;
int plane_width; int plane_width;
/***********/ /***********/
/* Y Plane */ /* Y Plane */
/***********/ /***********/
Border = ybf->border; Border = ybf->border;
plane_stride = ybf->y_stride; plane_stride = ybf->y_stride;
plane_height = ybf->y_height; plane_height = ybf->y_height;
plane_width = ybf->y_width; plane_width = ybf->y_width;
/* copy the left and right most columns out */ /* copy the left and right most columns out */
src_ptr1 = ybf->y_buffer; src_ptr1 = ybf->y_buffer;
src_ptr2 = src_ptr1 + plane_width - 1; src_ptr2 = src_ptr1 + plane_width - 1;
dest_ptr1 = src_ptr1 - Border; dest_ptr1 = src_ptr1 - Border;
dest_ptr2 = src_ptr2 + 1; dest_ptr2 = src_ptr2 + 1;
for (i = 0; i < plane_height; i++) for (i = 0; i < plane_height; i++) {
{ vpx_memset(dest_ptr1, src_ptr1[0], Border);
vpx_memset(dest_ptr1, src_ptr1[0], Border); vpx_memset(dest_ptr2, src_ptr2[0], Border);
vpx_memset(dest_ptr2, src_ptr2[0], Border); src_ptr1 += plane_stride;
src_ptr1 += plane_stride; src_ptr2 += plane_stride;
src_ptr2 += plane_stride; dest_ptr1 += plane_stride;
dest_ptr1 += plane_stride; dest_ptr2 += plane_stride;
dest_ptr2 += plane_stride; }
}
/* Now copy the top and bottom source lines into each line of the respective borders */ /* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = ybf->y_buffer - Border; src_ptr1 = ybf->y_buffer - Border;
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
dest_ptr1 = src_ptr1 - (Border * plane_stride); dest_ptr1 = src_ptr1 - (Border * plane_stride);
dest_ptr2 = src_ptr2 + plane_stride; dest_ptr2 = src_ptr2 + plane_stride;
for (i = 0; i < (int)Border; i++) for (i = 0; i < (int)Border; i++) {
{ vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); dest_ptr1 += plane_stride;
dest_ptr1 += plane_stride; dest_ptr2 += plane_stride;
dest_ptr2 += plane_stride; }
}
plane_stride /= 2; plane_stride /= 2;
plane_height /= 2; plane_height /= 2;
plane_width /= 2; plane_width /= 2;
Border /= 2; Border /= 2;
} }
@ -221,57 +211,53 @@ extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf)
* *
****************************************************************************/ ****************************************************************************/
void void
vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) vp8_yv12_copy_frame_c(YV12_BUFFER_CONFIG *src_ybc,
{ YV12_BUFFER_CONFIG *dst_ybc) {
int row; int row;
unsigned char *source, *dest; unsigned char *source, *dest;
source = src_ybc->y_buffer; source = src_ybc->y_buffer;
dest = dst_ybc->y_buffer; dest = dst_ybc->y_buffer;
for (row = 0; row < src_ybc->y_height; row++) for (row = 0; row < src_ybc->y_height; row++) {
{ vpx_memcpy(dest, source, src_ybc->y_width);
vpx_memcpy(dest, source, src_ybc->y_width); source += src_ybc->y_stride;
source += src_ybc->y_stride; dest += dst_ybc->y_stride;
dest += dst_ybc->y_stride; }
}
source = src_ybc->u_buffer; source = src_ybc->u_buffer;
dest = dst_ybc->u_buffer; dest = dst_ybc->u_buffer;
for (row = 0; row < src_ybc->uv_height; row++) for (row = 0; row < src_ybc->uv_height; row++) {
{ vpx_memcpy(dest, source, src_ybc->uv_width);
vpx_memcpy(dest, source, src_ybc->uv_width); source += src_ybc->uv_stride;
source += src_ybc->uv_stride; dest += dst_ybc->uv_stride;
dest += dst_ybc->uv_stride; }
}
source = src_ybc->v_buffer; source = src_ybc->v_buffer;
dest = dst_ybc->v_buffer; dest = dst_ybc->v_buffer;
for (row = 0; row < src_ybc->uv_height; row++) for (row = 0; row < src_ybc->uv_height; row++) {
{ vpx_memcpy(dest, source, src_ybc->uv_width);
vpx_memcpy(dest, source, src_ybc->uv_width); source += src_ybc->uv_stride;
source += src_ybc->uv_stride; dest += dst_ybc->uv_stride;
dest += dst_ybc->uv_stride; }
}
vp8_yv12_extend_frame_borders_c(dst_ybc); vp8_yv12_extend_frame_borders_c(dst_ybc);
} }
void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) void vp8_yv12_copy_y_c(YV12_BUFFER_CONFIG *src_ybc,
{ YV12_BUFFER_CONFIG *dst_ybc) {
int row; int row;
unsigned char *source, *dest; unsigned char *source, *dest;
source = src_ybc->y_buffer; source = src_ybc->y_buffer;
dest = dst_ybc->y_buffer; dest = dst_ybc->y_buffer;
for (row = 0; row < src_ybc->y_height; row++) for (row = 0; row < src_ybc->y_height; row++) {
{ vpx_memcpy(dest, source, src_ybc->y_width);
vpx_memcpy(dest, source, src_ybc->y_width); source += src_ybc->y_stride;
source += src_ybc->y_stride; dest += dst_ybc->y_stride;
dest += dst_ybc->y_stride; }
}
} }

View File

@ -14,33 +14,32 @@
#include "vpx_scale/yv12config.h" #include "vpx_scale/yv12config.h"
typedef struct typedef struct {
{ int in_width;
int in_width; int in_height;
int in_height;
int out_width; int out_width;
int out_height; int out_height;
int max_usable_out_width; int max_usable_out_width;
// numerator for the width and height // numerator for the width and height
int nw; int nw;
int nh; int nh;
int nh_uv; int nh_uv;
// output to input correspondance array // output to input correspondance array
short *l_w; short *l_w;
short *l_h; short *l_h;
short *l_h_uv; short *l_h_uv;
// polyphase coefficients // polyphase coefficients
short *c_w; short *c_w;
short *c_h; short *c_h;
short *c_h_uv; short *c_h_uv;
// buffer for horizontal filtering. // buffer for horizontal filtering.
unsigned char *hbuf; unsigned char *hbuf;
unsigned char *hbuf_uv; unsigned char *hbuf_uv;
} BICUBIC_SCALER_STRUCT; } BICUBIC_SCALER_STRUCT;
int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height); int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height);

View File

@ -17,12 +17,11 @@
#ifndef SCALE_MODE_H #ifndef SCALE_MODE_H
#define SCALE_MODE_H #define SCALE_MODE_H
typedef enum typedef enum {
{ MAINTAIN_ASPECT_RATIO = 0x0,
MAINTAIN_ASPECT_RATIO = 0x0, SCALE_TO_FIT = 0x1,
SCALE_TO_FIT = 0x1, CENTER = 0x2,
CENTER = 0x2, OTHER = 0x3
OTHER = 0x3
} SCALE_MODE; } SCALE_MODE;

View File

@ -14,29 +14,24 @@
#include "vpx_scale/yv12config.h" #include "vpx_scale/yv12config.h"
extern void vp8_yv12_scale_or_center extern void vp8_yv12_scale_or_center(YV12_BUFFER_CONFIG *src_yuv_config,
( YV12_BUFFER_CONFIG *dst_yuv_config,
YV12_BUFFER_CONFIG *src_yuv_config, int expanded_frame_width,
YV12_BUFFER_CONFIG *dst_yuv_config, int expanded_frame_height,
int expanded_frame_width, int scaling_mode,
int expanded_frame_height, int HScale,
int scaling_mode, int HRatio,
int HScale, int VScale,
int HRatio, int VRatio);
int VScale,
int VRatio extern void vp8_scale_frame(YV12_BUFFER_CONFIG *src,
); YV12_BUFFER_CONFIG *dst,
extern void vp8_scale_frame unsigned char *temp_area,
( unsigned char temp_height,
YV12_BUFFER_CONFIG *src, unsigned int hscale,
YV12_BUFFER_CONFIG *dst, unsigned int hratio,
unsigned char *temp_area, unsigned int vscale,
unsigned char temp_height, unsigned int vratio,
unsigned int hscale, unsigned int interlaced);
unsigned int hratio,
unsigned int vscale,
unsigned int vratio,
unsigned int interlaced
);
#endif #endif

File diff suppressed because it is too large Load Diff

View File

@ -46,46 +46,42 @@ extern void register_mmxscalers(void);
* *
****************************************************************************/ ****************************************************************************/
void void
vp8_scale_machine_specific_config(void) vp8_scale_machine_specific_config(void) {
{ // If MMX supported then set to use MMX versions of functions else
// If MMX supported then set to use MMX versions of functions else // use original 'C' versions.
// use original 'C' versions. int mmx_enabled;
int mmx_enabled; int xmm_enabled;
int xmm_enabled; int wmt_enabled;
int wmt_enabled;
vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled); vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
if (mmx_enabled || xmm_enabled || wmt_enabled) if (mmx_enabled || xmm_enabled || wmt_enabled) {
{ register_mmxscalers();
register_mmxscalers(); } else {
} vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c;
else vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c;
{ vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c;
vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c; vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c;
vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c; vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c;
vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c;
vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c; vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c;
vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c; vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c;
vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c;
vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c;
vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c;
vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c;
vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c;
vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c;
vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c;
vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c;
vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c;
vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c;
vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c;
vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c;
vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c;
vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c;
vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c;
vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c;
vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c;
} }
} }

View File

@ -16,54 +16,54 @@ extern "C"
{ {
#endif #endif
#define VP7BORDERINPIXELS 48
#define VP8BORDERINPIXELS 32 #define VP8BORDERINPIXELS 32
#define VP9BORDERINPIXELS 64
#define VP9_INTERP_EXTEND 4
/************************************* /*************************************
For INT_YUV: For INT_YUV:
Y = (R+G*2+B)/4; Y = (R+G*2+B)/4;
U = (R-B)/2; U = (R-B)/2;
V = (G*2 - R - B)/4; V = (G*2 - R - B)/4;
And And
R = Y+U-V; R = Y+U-V;
G = Y+V; G = Y+V;
B = Y-U-V; B = Y-U-V;
************************************/ ************************************/
typedef enum typedef enum
{ {
REG_YUV = 0, /* Regular yuv */ REG_YUV = 0, /* Regular yuv */
INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */ INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */
} }
YUV_TYPE; YUV_TYPE;
typedef struct yv12_buffer_config typedef struct yv12_buffer_config {
{ int y_width;
int y_width; int y_height;
int y_height; int y_stride;
int y_stride; /* int yinternal_width; */
/* int yinternal_width; */
int uv_width; int uv_width;
int uv_height; int uv_height;
int uv_stride; int uv_stride;
/* int uvinternal_width; */ /* int uvinternal_width; */
unsigned char *y_buffer; unsigned char *y_buffer;
unsigned char *u_buffer; unsigned char *u_buffer;
unsigned char *v_buffer; unsigned char *v_buffer;
unsigned char *buffer_alloc; unsigned char *buffer_alloc;
int border; int border;
int frame_size; int frame_size;
YUV_TYPE clrtype; YUV_TYPE clrtype;
int corrupted; int corrupted;
int flags; int flags;
} YV12_BUFFER_CONFIG; } YV12_BUFFER_CONFIG;
int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border); int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border);
int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf); int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf);
#ifdef __cplusplus #ifdef __cplusplus
} }