2010-05-18 17:58:33 +02:00
|
|
|
/*
|
2010-09-09 14:16:39 +02:00
|
|
|
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
2010-05-18 17:58:33 +02:00
|
|
|
*
|
2010-06-18 18:39:21 +02:00
|
|
|
* Use of this source code is governed by a BSD-style license
|
2010-06-04 22:19:40 +02:00
|
|
|
* that can be found in the LICENSE file in the root of the source
|
|
|
|
* tree. An additional intellectual property rights grant can be found
|
2010-06-18 18:39:21 +02:00
|
|
|
* in the file PATENTS. All contributing project authors may
|
2010-06-04 22:19:40 +02:00
|
|
|
* be found in the AUTHORS file in the root of the source tree.
|
2010-05-18 17:58:33 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
*
|
|
|
|
* Module Title : gen_scalers.c
|
|
|
|
*
|
|
|
|
* Description : Generic image scaling functions.
|
|
|
|
*
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
* Header Files
|
|
|
|
****************************************************************************/
|
|
|
|
#include "vpx_scale/vpxscale.h"
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
* Imports
|
|
|
|
****************************************************************************/
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
*
|
|
|
|
* ROUTINE : horizontal_line_4_5_scale_c4
|
|
|
|
*
|
|
|
|
* INPUTS : const unsigned char *source : Pointer to source data.
|
|
|
|
* unsigned int source_width : Stride of source.
|
|
|
|
* unsigned char *dest : Pointer to destination data.
|
|
|
|
* unsigned int dest_width : Stride of destination (NOT USED).
|
|
|
|
*
|
|
|
|
* OUTPUTS : None.
|
|
|
|
*
|
|
|
|
* RETURNS : void
|
|
|
|
*
|
|
|
|
* FUNCTION : Copies horizontal line of pixels from source to
|
|
|
|
* destination scaling up by 4 to 5.
|
|
|
|
*
|
|
|
|
* SPECIAL NOTES : None.
|
|
|
|
*
|
|
|
|
****************************************************************************/
|
|
|
|
static
|
|
|
|
void horizontal_line_4_5_scale_c64
|
|
|
|
(
|
|
|
|
const unsigned char *source,
|
|
|
|
unsigned int source_width,
|
|
|
|
unsigned char *dest,
|
|
|
|
unsigned int dest_width
|
|
|
|
)
|
|
|
|
{
|
|
|
|
unsigned i;
|
|
|
|
unsigned int ba, cb, dc, ed;
|
|
|
|
unsigned char *restrict des = dest;
|
|
|
|
unsigned int *restrict src = (unsigned int *)source;
|
|
|
|
unsigned int const_51_205, const_102_154,
|
|
|
|
const_205_51, const_154_102;
|
|
|
|
|
|
|
|
unsigned int src_current, src_next;
|
|
|
|
|
|
|
|
(void) dest_width;
|
|
|
|
|
|
|
|
// Constants that are to be used for the filtering. For
|
|
|
|
// best speed we are going to want to right shift by 16.
|
|
|
|
// In the generic version they were shift by 8, so put
|
|
|
|
// an extra 8 in now so that 16 will come out later.
|
|
|
|
const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
|
|
|
|
const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
|
|
|
|
const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
|
|
|
|
const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
|
|
|
|
|
|
|
|
// 5 points are needed to filter to give 5 output points.
|
|
|
|
// A load can pull up 4 at a time, and one needs to be
|
|
|
|
// "borrowed" from the next set of data. So instead of
|
|
|
|
// loading those 5 points each time, "steal" a point from
|
|
|
|
// the next set and only load up 4 each time through.
|
|
|
|
src_current = _mem4(src);
|
|
|
|
|
|
|
|
for (i = 0; i < source_width - 4; i += 4)
|
|
|
|
{
|
|
|
|
src_next = _mem4(src++);
|
|
|
|
|
|
|
|
// Reorder the data so that it is ready for the
|
|
|
|
// dot product.
|
|
|
|
ba = _unpklu4(src_current);
|
|
|
|
cb = _unpkhu4(_rotl(src_current, 8));
|
|
|
|
dc = _unpkhu4(src_current);
|
|
|
|
ed = _unpkhu4(_shrmb(src_next, src_current));
|
|
|
|
|
|
|
|
// Use the dot product with round and shift.
|
|
|
|
des [0] = src_current & 0xff;
|
|
|
|
des [1] = _dotprsu2(ba, const_205_51);
|
|
|
|
des [2] = _dotprsu2(cb, const_154_102);
|
|
|
|
des [3] = _dotprsu2(dc, const_102_154);
|
|
|
|
des [4] = _dotprsu2(ed, const_51_205);
|
|
|
|
|
|
|
|
des += 5;
|
|
|
|
|
|
|
|
// reuse loaded vales next time around.
|
|
|
|
src_current = src_next;
|
|
|
|
}
|
|
|
|
|
|
|
|
// vp8_filter the last set of points. Normally a point from the next set
|
|
|
|
// would be used, but there is no next set, so just fill.
|
|
|
|
ba = _unpklu4(src_current);
|
|
|
|
cb = _unpkhu4(_rotl(src_current, 8));
|
|
|
|
dc = _unpkhu4(src_current);
|
|
|
|
|
|
|
|
des [0] = src_current & 0xff;
|
|
|
|
des [1] = _dotprsu2(ba, const_205_51);
|
|
|
|
des [2] = _dotprsu2(cb, const_154_102);
|
|
|
|
des [3] = _dotprsu2(dc, const_102_154);
|
|
|
|
des [4] = src_current & 0xff;
|
|
|
|
|
|
|
|
}
|
|
|
|
/****************************************************************************
|
|
|
|
*
|
|
|
|
* ROUTINE : vertical_band_4_5_scale_c64
|
|
|
|
*
|
|
|
|
* INPUTS : unsigned char *dest : Pointer to destination data.
|
|
|
|
* unsigned int dest_pitch : Stride of destination data.
|
|
|
|
* unsigned int dest_width : Width of destination data.
|
|
|
|
*
|
|
|
|
* OUTPUTS : None.
|
|
|
|
*
|
|
|
|
* RETURNS : void
|
|
|
|
*
|
|
|
|
* FUNCTION : Scales vertical band of pixels by scale 4 to 5. The
|
|
|
|
* height of the band scaled is 4-pixels.
|
|
|
|
*
|
|
|
|
* SPECIAL NOTES : The routine uses the first line of the band below
|
|
|
|
* the current band.
|
|
|
|
*
|
|
|
|
****************************************************************************/
|
|
|
|
static
|
|
|
|
void vertical_band_4_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
unsigned int a, b, c, d, e;
|
|
|
|
unsigned int ba, cb, dc, ed;
|
|
|
|
unsigned char *restrict src = dest;
|
|
|
|
unsigned char *restrict des = dest;
|
|
|
|
unsigned int const_51_205, const_102_154,
|
|
|
|
const_205_51, const_154_102;
|
|
|
|
|
|
|
|
const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
|
|
|
|
const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
|
|
|
|
const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
|
|
|
|
const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
|
|
|
|
|
|
|
|
// Force a loop unroll here so that there is not such a
|
|
|
|
// dependancy.
|
|
|
|
a = src [0];
|
|
|
|
b = src [dest_pitch];
|
|
|
|
c = src [dest_pitch*2];
|
|
|
|
d = src [dest_pitch*3];
|
|
|
|
e = src [dest_pitch*5];
|
|
|
|
src ++;
|
|
|
|
|
|
|
|
for (i = 0; i < dest_width; i++)
|
|
|
|
{
|
|
|
|
ba = _pack2(b, a);
|
|
|
|
cb = _pack2(c, b);
|
|
|
|
dc = _pack2(d, c);
|
|
|
|
ed = _pack2(e, d);
|
|
|
|
|
|
|
|
a = src [0];
|
|
|
|
b = src [dest_pitch];
|
|
|
|
c = src [dest_pitch*2];
|
|
|
|
d = src [dest_pitch*3];
|
|
|
|
e = src [dest_pitch*5];
|
|
|
|
src ++;
|
|
|
|
|
|
|
|
des [dest_pitch] = _dotprsu2(ba, const_205_51);
|
|
|
|
des [dest_pitch*2] = _dotprsu2(cb, const_154_102);
|
|
|
|
des [dest_pitch*3] = _dotprsu2(dc, const_102_154);
|
|
|
|
des [dest_pitch*4] = _dotprsu2(ed, const_51_205);
|
|
|
|
|
|
|
|
des ++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
*
|
|
|
|
* ROUTINE : last_vertical_band_4_5_scale_c64
|
|
|
|
*
|
|
|
|
* INPUTS : unsigned char *dest : Pointer to destination data.
|
|
|
|
* unsigned int dest_pitch : Stride of destination data.
|
|
|
|
* unsigned int dest_width : Width of destination data.
|
|
|
|
*
|
|
|
|
* OUTPUTS : None.
|
|
|
|
*
|
|
|
|
* RETURNS : void
|
|
|
|
*
|
|
|
|
* FUNCTION : Scales last vertical band of pixels by scale 4 to 5. The
|
|
|
|
* height of the band scaled is 4-pixels.
|
|
|
|
*
|
|
|
|
* SPECIAL NOTES : The routine does not have available the first line of
|
|
|
|
* the band below the current band, since this is the
|
|
|
|
* last band.
|
|
|
|
*
|
|
|
|
****************************************************************************/
|
|
|
|
static
|
|
|
|
void last_vertical_band_4_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
unsigned int a, b, c, d;
|
|
|
|
unsigned int ba, cb, dc;
|
|
|
|
unsigned char *restrict src = dest;
|
|
|
|
unsigned char *restrict des = dest;
|
|
|
|
unsigned int const_102_154, const_205_51, const_154_102;
|
|
|
|
|
|
|
|
const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
|
|
|
|
const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
|
|
|
|
const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
|
|
|
|
|
|
|
|
a = src [0];
|
|
|
|
b = src [dest_pitch];
|
|
|
|
c = src [dest_pitch*2];
|
|
|
|
d = src [dest_pitch*3];
|
|
|
|
src ++;
|
|
|
|
|
|
|
|
for (i = 0; i < dest_width; ++i)
|
|
|
|
{
|
|
|
|
ba = _pack2(b, a);
|
|
|
|
cb = _pack2(c, b);
|
|
|
|
dc = _pack2(d, c);
|
|
|
|
|
|
|
|
a = src [0];
|
|
|
|
b = src [dest_pitch];
|
|
|
|
c = src [dest_pitch*2];
|
|
|
|
d = src [dest_pitch*3];
|
|
|
|
src ++;
|
|
|
|
|
|
|
|
des [dest_pitch] = _dotprsu2(ba, const_205_51);
|
|
|
|
des [dest_pitch*2] = _dotprsu2(cb, const_154_102);
|
|
|
|
des [dest_pitch*3] = _dotprsu2(dc, const_102_154);
|
|
|
|
des [dest_pitch*4] = (unsigned char) d;
|
|
|
|
|
|
|
|
des++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
*
|
|
|
|
* ROUTINE : horizontal_line_3_5_scale_c64
|
|
|
|
*
|
|
|
|
* INPUTS : const unsigned char *source : Pointer to source data.
|
|
|
|
* unsigned int source_width : Stride of source.
|
|
|
|
* unsigned char *dest : Pointer to destination data.
|
|
|
|
* unsigned int dest_width : Stride of destination (NOT USED).
|
|
|
|
*
|
|
|
|
* OUTPUTS : None.
|
|
|
|
*
|
|
|
|
* RETURNS : void
|
|
|
|
*
|
|
|
|
* FUNCTION : Copies horizontal line of pixels from source to
|
|
|
|
* destination scaling up by 3 to 5.
|
|
|
|
*
|
|
|
|
* SPECIAL NOTES : None.
|
|
|
|
*
|
|
|
|
*
|
|
|
|
****************************************************************************/
|
|
|
|
static
|
|
|
|
void horizontal_line_3_5_scale_c64
|
|
|
|
(
|
|
|
|
const unsigned char *source,
|
|
|
|
unsigned int source_width,
|
|
|
|
unsigned char *dest,
|
|
|
|
unsigned int dest_width
|
|
|
|
)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
unsigned int ba, cb, dc;
|
|
|
|
unsigned int src_current;
|
|
|
|
unsigned char *restrict des = dest;
|
|
|
|
unsigned char *restrict src = (unsigned char *)source;
|
|
|
|
unsigned int const_51_205, const_102_154,
|
|
|
|
const_205_51, const_154_102;
|
|
|
|
|
|
|
|
(void) dest_width;
|
|
|
|
|
|
|
|
const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
|
|
|
|
const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
|
|
|
|
const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
|
|
|
|
const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
|
|
|
|
|
|
|
|
for (i = 0; i < source_width - 3; i += 3)
|
|
|
|
{
|
|
|
|
src_current = _mem4(src);
|
|
|
|
|
|
|
|
// Reorder the data so that it is ready for the
|
|
|
|
// dot product.
|
|
|
|
ba = _unpklu4(src_current);
|
|
|
|
cb = _unpkhu4(_rotl(src_current, 8));
|
|
|
|
dc = _unpkhu4(src_current);
|
|
|
|
|
|
|
|
des [0] = src_current & 0xff;
|
|
|
|
des [1] = _dotprsu2(ba, const_154_102);
|
|
|
|
des [2] = _dotprsu2(cb, const_51_205);
|
|
|
|
des [3] = _dotprsu2(cb, const_205_51);
|
|
|
|
des [4] = _dotprsu2(dc, const_102_154);
|
|
|
|
|
|
|
|
src += 3;
|
|
|
|
des += 5;
|
|
|
|
}
|
|
|
|
|
|
|
|
src_current = _mem4(src);
|
|
|
|
|
|
|
|
ba = _unpklu4(src_current);
|
|
|
|
cb = _unpkhu4(_rotl(src_current, 8));
|
|
|
|
dc = _unpkhu4(src_current);
|
|
|
|
|
|
|
|
|
|
|
|
des [0] = src_current & 0xff;
|
|
|
|
des [1] = _dotprsu2(ba, const_154_102);
|
|
|
|
des [2] = _dotprsu2(cb, const_51_205);
|
|
|
|
des [3] = _dotprsu2(cb, const_205_51);
|
|
|
|
des [4] = dc & 0xff;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
*
|
|
|
|
* ROUTINE : vertical_band_3_5_scale_c64
|
|
|
|
*
|
|
|
|
* INPUTS : unsigned char *dest : Pointer to destination data.
|
|
|
|
* unsigned int dest_pitch : Stride of destination data.
|
|
|
|
* unsigned int dest_width : Width of destination data.
|
|
|
|
*
|
|
|
|
* OUTPUTS : None.
|
|
|
|
*
|
|
|
|
* RETURNS : void
|
|
|
|
*
|
|
|
|
* FUNCTION : Scales vertical band of pixels by scale 3 to 5. The
|
|
|
|
* height of the band scaled is 3-pixels.
|
|
|
|
*
|
|
|
|
* SPECIAL NOTES : The routine uses the first line of the band below
|
|
|
|
* the current band.
|
|
|
|
*
|
|
|
|
****************************************************************************/
|
|
|
|
static
|
|
|
|
void vertical_band_3_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
unsigned int a, b, c, d;
|
|
|
|
unsigned int ba, cb, dc;
|
|
|
|
unsigned char *restrict src = dest;
|
|
|
|
unsigned char *restrict des = dest;
|
|
|
|
unsigned int const_51_205, const_102_154,
|
|
|
|
const_205_51, const_154_102;
|
|
|
|
|
|
|
|
const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
|
|
|
|
const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
|
|
|
|
const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
|
|
|
|
const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
|
|
|
|
|
|
|
|
a = src [0];
|
|
|
|
b = src [dest_pitch];
|
|
|
|
c = src [dest_pitch*2];
|
|
|
|
d = src [dest_pitch*5];
|
|
|
|
src ++;
|
|
|
|
|
|
|
|
for (i = 0; i < dest_width; i++)
|
|
|
|
{
|
|
|
|
ba = _pack2(b, a);
|
|
|
|
cb = _pack2(c, b);
|
|
|
|
dc = _pack2(d, c);
|
|
|
|
|
|
|
|
a = src [0];
|
|
|
|
b = src [dest_pitch];
|
|
|
|
c = src [dest_pitch*2];
|
|
|
|
d = src [dest_pitch*5];
|
|
|
|
src ++;
|
|
|
|
|
|
|
|
des [dest_pitch] = _dotprsu2(ba, const_154_102);
|
|
|
|
des [dest_pitch*2] = _dotprsu2(cb, const_51_205);
|
|
|
|
des [dest_pitch*3] = _dotprsu2(cb, const_205_51);
|
|
|
|
des [dest_pitch*4] = _dotprsu2(dc, const_102_154);
|
|
|
|
|
|
|
|
des++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
*
|
|
|
|
* ROUTINE : last_vertical_band_3_5_scale_c64
|
|
|
|
*
|
|
|
|
* INPUTS : unsigned char *dest : Pointer to destination data.
|
|
|
|
* unsigned int dest_pitch : Stride of destination data.
|
|
|
|
* unsigned int dest_width : Width of destination data.
|
|
|
|
*
|
|
|
|
* OUTPUTS : None.
|
|
|
|
*
|
|
|
|
* RETURNS : void
|
|
|
|
*
|
|
|
|
* FUNCTION : Scales last vertical band of pixels by scale 3 to 5. The
|
|
|
|
* height of the band scaled is 3-pixels.
|
|
|
|
*
|
|
|
|
* SPECIAL NOTES : The routine does not have available the first line of
|
|
|
|
* the band below the current band, since this is the
|
|
|
|
* last band.
|
|
|
|
*
|
|
|
|
****************************************************************************/
|
|
|
|
static
|
|
|
|
void last_vertical_band_3_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
unsigned int a, b, c;
|
|
|
|
unsigned int ba, cb;
|
|
|
|
unsigned char *restrict src = dest;
|
|
|
|
unsigned char *restrict des = dest;
|
|
|
|
unsigned int const_51_205, const_205_51, const_154_102;
|
|
|
|
|
|
|
|
const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
|
|
|
|
const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
|
|
|
|
const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
|
|
|
|
|
|
|
|
a = src [0];
|
|
|
|
b = src [dest_pitch];
|
|
|
|
c = src [dest_pitch*2];
|
|
|
|
src ++;
|
|
|
|
|
|
|
|
for (i = 0; i < dest_width; ++i)
|
|
|
|
{
|
|
|
|
ba = _pack2(b, a);
|
|
|
|
cb = _pack2(c, b);
|
|
|
|
|
|
|
|
a = src [0];
|
|
|
|
b = src [dest_pitch];
|
|
|
|
c = src [dest_pitch*2];
|
|
|
|
src ++;
|
|
|
|
|
|
|
|
des [dest_pitch] = _dotprsu2(ba, const_154_102);
|
|
|
|
des [dest_pitch*2] = _dotprsu2(cb, const_51_205);
|
|
|
|
des [dest_pitch*3] = _dotprsu2(cb, const_205_51);
|
|
|
|
des [dest_pitch*4] = (unsigned char)(c) ;
|
|
|
|
|
|
|
|
des++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
*
|
|
|
|
* ROUTINE : horizontal_line_1_2_scale_c64
|
|
|
|
*
|
|
|
|
* INPUTS : const unsigned char *source : Pointer to source data.
|
|
|
|
* unsigned int source_width : Stride of source.
|
|
|
|
* unsigned char *dest : Pointer to destination data.
|
|
|
|
* unsigned int dest_width : Stride of destination (NOT USED).
|
|
|
|
*
|
|
|
|
* OUTPUTS : None.
|
|
|
|
*
|
|
|
|
* RETURNS : void
|
|
|
|
*
|
|
|
|
* FUNCTION : Copies horizontal line of pixels from source to
|
|
|
|
* destination scaling up by 1 to 2.
|
|
|
|
*
|
|
|
|
* SPECIAL NOTES : source width must be a multiple of 4.
|
|
|
|
*
|
|
|
|
****************************************************************************/
|
|
|
|
void horizontal_line_1_2_scale_c64
|
|
|
|
(
|
|
|
|
const unsigned char *source,
|
|
|
|
unsigned int source_width,
|
|
|
|
unsigned char *dest,
|
|
|
|
unsigned int dest_width
|
|
|
|
)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
unsigned char *restrict des = dest;
|
|
|
|
unsigned char *restrict src = (unsigned char *)source;
|
|
|
|
unsigned int src7_4i, src4_1i, src3_0i;
|
|
|
|
unsigned int a4_0i, ahi, alo;
|
|
|
|
double src7_0d, src3_0d;
|
|
|
|
const unsigned int k01 = 0x01010101;
|
|
|
|
|
|
|
|
for (i = 0; i < source_width / 4; i += 1)
|
|
|
|
{
|
|
|
|
// Load up the data from src. Here a wide load is
|
|
|
|
// used to get 8 bytes at once, only 5 will be used
|
|
|
|
// for the actual computation.
|
|
|
|
src7_0d = _memd8(src);
|
|
|
|
src3_0i = _lo(src7_0d);
|
|
|
|
src7_4i = _hi(src7_0d);
|
|
|
|
|
|
|
|
// Need to average between points. Shift byte 5 into
|
|
|
|
// the lower word. This will result in bytes 5-1
|
|
|
|
// averaged with 4-0.
|
|
|
|
src4_1i = _shrmb(src7_4i, src3_0i);
|
|
|
|
a4_0i = _avgu4(src4_1i, src3_0i);
|
|
|
|
|
|
|
|
// Expand the data out. Could do an unpack, however
|
|
|
|
// all but the multiply units are getting pretty hard
|
|
|
|
// here the multiply unit can take some of the computations.
|
|
|
|
src3_0d = _mpyu4(src3_0i, k01);
|
|
|
|
|
|
|
|
// The averages need to be unpacked so that they are in 16
|
|
|
|
// bit form and will be able to be interleaved with the
|
|
|
|
// original data
|
|
|
|
ahi = _unpkhu4(a4_0i);
|
|
|
|
alo = _unpklu4(a4_0i);
|
|
|
|
|
|
|
|
ahi = _swap4(ahi);
|
|
|
|
alo = _swap4(alo);
|
|
|
|
|
|
|
|
// Mix the average result in with the orginal data.
|
|
|
|
ahi = _hi(src3_0d) | ahi;
|
|
|
|
alo = _lo(src3_0d) | alo;
|
|
|
|
|
|
|
|
_memd8(des) = _itod(ahi, alo);
|
|
|
|
|
|
|
|
des += 8;
|
|
|
|
src += 4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
*
|
|
|
|
* ROUTINE : vertical_band_1_2_scale_c64
|
|
|
|
*
|
|
|
|
* INPUTS : unsigned char *dest : Pointer to destination data.
|
|
|
|
* unsigned int dest_pitch : Stride of destination data.
|
|
|
|
* unsigned int dest_width : Width of destination data.
|
|
|
|
*
|
|
|
|
* OUTPUTS : None.
|
|
|
|
*
|
|
|
|
* RETURNS : void
|
|
|
|
*
|
|
|
|
* FUNCTION : Scales vertical band of pixels by scale 1 to 2. The
|
|
|
|
* height of the band scaled is 1-pixel.
|
|
|
|
*
|
|
|
|
* SPECIAL NOTES : The routine uses the first line of the band below
|
|
|
|
* the current band.
|
|
|
|
* Destination width must be a multiple of 4. Because the
|
|
|
|
* intput must be, therefore the output must be.
|
|
|
|
*
|
|
|
|
****************************************************************************/
|
|
|
|
static
|
|
|
|
void vertical_band_1_2_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
unsigned int a, b;
|
|
|
|
unsigned int *restrict line_a = (unsigned int *)dest;
|
|
|
|
unsigned int *restrict line_b = (unsigned int *)(dest + (dest_pitch * 2));
|
|
|
|
unsigned int *restrict des = (unsigned int *)(dest + dest_pitch);
|
|
|
|
|
|
|
|
for (i = 0; i < dest_width / 4; i++)
|
|
|
|
{
|
|
|
|
a = _mem4(line_a++);
|
|
|
|
b = _mem4(line_b++);
|
|
|
|
|
|
|
|
_mem4(des++) = _avgu4(a, b);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
*
|
|
|
|
* ROUTINE : last_vertical_band_1_2_scale_c64
|
|
|
|
*
|
|
|
|
* INPUTS : unsigned char *dest : Pointer to destination data.
|
|
|
|
* unsigned int dest_pitch : Stride of destination data.
|
|
|
|
* unsigned int dest_width : Width of destination data.
|
|
|
|
*
|
|
|
|
* OUTPUTS : None.
|
|
|
|
*
|
|
|
|
* RETURNS : void
|
|
|
|
*
|
|
|
|
* FUNCTION : Scales last vertical band of pixels by scale 1 to 2. The
|
|
|
|
* height of the band scaled is 1-pixel.
|
|
|
|
*
|
|
|
|
* SPECIAL NOTES : The routine does not have available the first line of
|
|
|
|
* the band below the current band, since this is the
|
|
|
|
* last band. Again, width must be a multiple of 4.
|
|
|
|
*
|
|
|
|
****************************************************************************/
|
|
|
|
static
|
|
|
|
void last_vertical_band_1_2_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
unsigned int *restrict src = (unsigned int *)dest;
|
|
|
|
unsigned int *restrict des = (unsigned int *)(dest + dest_pitch);
|
|
|
|
|
|
|
|
for (i = 0; i < dest_width / 4; ++i)
|
|
|
|
{
|
|
|
|
_mem4(des++) = _mem4(src++);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
register_generic_scalers(void)
|
|
|
|
{
|
|
|
|
vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_c64;
|
|
|
|
vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_c64;
|
|
|
|
vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_c64;
|
|
|
|
vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_c64;
|
|
|
|
vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_c64;
|
|
|
|
vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_c64;
|
|
|
|
vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_c64;
|
|
|
|
vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_c64;
|
|
|
|
vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_c64;
|
|
|
|
}
|