vp8: apply clang-format

Change-Id: I7605b6678014a5426ceb45c27b54885e0c4e06ed
This commit is contained in:
clang-format 2016-07-13 22:26:28 -07:00 committed by James Zern
parent 65daa41378
commit 81a6739533
177 changed files with 45232 additions and 51209 deletions

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "alloccommon.h"
#include "blockd.h"
@ -18,176 +17,166 @@
#include "entropymode.h"
#include "systemdependent.h"
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
{
int i;
for (i = 0; i < NUM_YV12_BUFFERS; i++)
vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) {
int i;
for (i = 0; i < NUM_YV12_BUFFERS; i++)
vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
#if CONFIG_POSTPROC
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
if (oci->post_proc_buffer_int_used)
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
if (oci->post_proc_buffer_int_used)
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
vpx_free(oci->pp_limits_buffer);
oci->pp_limits_buffer = NULL;
vpx_free(oci->pp_limits_buffer);
oci->pp_limits_buffer = NULL;
vpx_free(oci->postproc_state.generated_noise);
oci->postproc_state.generated_noise = NULL;
vpx_free(oci->postproc_state.generated_noise);
oci->postproc_state.generated_noise = NULL;
#endif
vpx_free(oci->above_context);
vpx_free(oci->mip);
vpx_free(oci->above_context);
vpx_free(oci->mip);
#if CONFIG_ERROR_CONCEALMENT
vpx_free(oci->prev_mip);
oci->prev_mip = NULL;
vpx_free(oci->prev_mip);
oci->prev_mip = NULL;
#endif
oci->above_context = NULL;
oci->mip = NULL;
oci->above_context = NULL;
oci->mip = NULL;
}
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
{
int i;
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) {
int i;
vp8_de_alloc_frame_buffers(oci);
vp8_de_alloc_frame_buffers(oci);
/* our internal buffers are always multiples of 16 */
if ((width & 0xf) != 0)
width += 16 - (width & 0xf);
/* our internal buffers are always multiples of 16 */
if ((width & 0xf) != 0) width += 16 - (width & 0xf);
if ((height & 0xf) != 0)
height += 16 - (height & 0xf);
if ((height & 0xf) != 0) height += 16 - (height & 0xf);
for (i = 0; i < NUM_YV12_BUFFERS; i++) {
oci->fb_idx_ref_cnt[i] = 0;
oci->yv12_fb[i].flags = 0;
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height,
VP8BORDERINPIXELS) < 0)
goto allocation_fail;
}
for (i = 0; i < NUM_YV12_BUFFERS; i++)
{
oci->fb_idx_ref_cnt[i] = 0;
oci->yv12_fb[i].flags = 0;
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
goto allocation_fail;
}
oci->new_fb_idx = 0;
oci->lst_fb_idx = 1;
oci->gld_fb_idx = 2;
oci->alt_fb_idx = 3;
oci->new_fb_idx = 0;
oci->lst_fb_idx = 1;
oci->gld_fb_idx = 2;
oci->alt_fb_idx = 3;
oci->fb_idx_ref_cnt[0] = 1;
oci->fb_idx_ref_cnt[1] = 1;
oci->fb_idx_ref_cnt[2] = 1;
oci->fb_idx_ref_cnt[3] = 1;
oci->fb_idx_ref_cnt[0] = 1;
oci->fb_idx_ref_cnt[1] = 1;
oci->fb_idx_ref_cnt[2] = 1;
oci->fb_idx_ref_cnt[3] = 1;
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16,
VP8BORDERINPIXELS) < 0)
goto allocation_fail;
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
goto allocation_fail;
oci->mb_rows = height >> 4;
oci->mb_cols = width >> 4;
oci->MBs = oci->mb_rows * oci->mb_cols;
oci->mode_info_stride = oci->mb_cols + 1;
oci->mip =
vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
oci->mb_rows = height >> 4;
oci->mb_cols = width >> 4;
oci->MBs = oci->mb_rows * oci->mb_cols;
oci->mode_info_stride = oci->mb_cols + 1;
oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
if (!oci->mip) goto allocation_fail;
if (!oci->mip)
goto allocation_fail;
oci->mi = oci->mip + oci->mode_info_stride + 1;
oci->mi = oci->mip + oci->mode_info_stride + 1;
/* Allocation of previous mode info will be done in vp8_decode_frame()
* as it is a decoder only data */
/* Allocation of previous mode info will be done in vp8_decode_frame()
* as it is a decoder only data */
oci->above_context =
vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
if (!oci->above_context)
goto allocation_fail;
if (!oci->above_context) goto allocation_fail;
#if CONFIG_POSTPROC
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
goto allocation_fail;
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height,
VP8BORDERINPIXELS) < 0)
goto allocation_fail;
oci->post_proc_buffer_int_used = 0;
memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
memset(oci->post_proc_buffer.buffer_alloc, 128,
oci->post_proc_buffer.frame_size);
oci->post_proc_buffer_int_used = 0;
memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
memset(oci->post_proc_buffer.buffer_alloc, 128,
oci->post_proc_buffer.frame_size);
/* Allocate buffer to store post-processing filter coefficients.
*
* Note: Round up mb_cols to support SIMD reads
*/
oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1));
if (!oci->pp_limits_buffer)
goto allocation_fail;
/* Allocate buffer to store post-processing filter coefficients.
*
* Note: Round up mb_cols to support SIMD reads
*/
oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1));
if (!oci->pp_limits_buffer) goto allocation_fail;
#endif
return 0;
return 0;
allocation_fail:
vp8_de_alloc_frame_buffers(oci);
return 1;
vp8_de_alloc_frame_buffers(oci);
return 1;
}
void vp8_setup_version(VP8_COMMON *cm)
{
switch (cm->version)
{
void vp8_setup_version(VP8_COMMON *cm) {
switch (cm->version) {
case 0:
cm->no_lpf = 0;
cm->filter_type = NORMAL_LOOPFILTER;
cm->use_bilinear_mc_filter = 0;
cm->full_pixel = 0;
break;
cm->no_lpf = 0;
cm->filter_type = NORMAL_LOOPFILTER;
cm->use_bilinear_mc_filter = 0;
cm->full_pixel = 0;
break;
case 1:
cm->no_lpf = 0;
cm->filter_type = SIMPLE_LOOPFILTER;
cm->use_bilinear_mc_filter = 1;
cm->full_pixel = 0;
break;
cm->no_lpf = 0;
cm->filter_type = SIMPLE_LOOPFILTER;
cm->use_bilinear_mc_filter = 1;
cm->full_pixel = 0;
break;
case 2:
cm->no_lpf = 1;
cm->filter_type = NORMAL_LOOPFILTER;
cm->use_bilinear_mc_filter = 1;
cm->full_pixel = 0;
break;
cm->no_lpf = 1;
cm->filter_type = NORMAL_LOOPFILTER;
cm->use_bilinear_mc_filter = 1;
cm->full_pixel = 0;
break;
case 3:
cm->no_lpf = 1;
cm->filter_type = SIMPLE_LOOPFILTER;
cm->use_bilinear_mc_filter = 1;
cm->full_pixel = 1;
break;
cm->no_lpf = 1;
cm->filter_type = SIMPLE_LOOPFILTER;
cm->use_bilinear_mc_filter = 1;
cm->full_pixel = 1;
break;
default:
/*4,5,6,7 are reserved for future use*/
cm->no_lpf = 0;
cm->filter_type = NORMAL_LOOPFILTER;
cm->use_bilinear_mc_filter = 0;
cm->full_pixel = 0;
break;
}
/*4,5,6,7 are reserved for future use*/
cm->no_lpf = 0;
cm->filter_type = NORMAL_LOOPFILTER;
cm->use_bilinear_mc_filter = 0;
cm->full_pixel = 0;
break;
}
}
void vp8_create_common(VP8_COMMON *oci)
{
vp8_machine_specific_config(oci);
void vp8_create_common(VP8_COMMON *oci) {
vp8_machine_specific_config(oci);
vp8_init_mbmode_probs(oci);
vp8_default_bmode_probs(oci->fc.bmode_prob);
vp8_init_mbmode_probs(oci);
vp8_default_bmode_probs(oci->fc.bmode_prob);
oci->mb_no_coeff_skip = 1;
oci->no_lpf = 0;
oci->filter_type = NORMAL_LOOPFILTER;
oci->use_bilinear_mc_filter = 0;
oci->full_pixel = 0;
oci->multi_token_partition = ONE_PARTITION;
oci->clamp_type = RECON_CLAMP_REQUIRED;
oci->mb_no_coeff_skip = 1;
oci->no_lpf = 0;
oci->filter_type = NORMAL_LOOPFILTER;
oci->use_bilinear_mc_filter = 0;
oci->full_pixel = 0;
oci->multi_token_partition = ONE_PARTITION;
oci->clamp_type = RECON_CLAMP_REQUIRED;
/* Initialize reference frame sign bias structure to defaults */
memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
/* Initialize reference frame sign bias structure to defaults */
memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
/* Default disable buffer to buffer copying */
oci->copy_buffer_to_gf = 0;
oci->copy_buffer_to_arf = 0;
/* Default disable buffer to buffer copying */
oci->copy_buffer_to_gf = 0;
oci->copy_buffer_to_arf = 0;
}
void vp8_remove_common(VP8_COMMON *oci)
{
vp8_de_alloc_frame_buffers(oci);
}
void vp8_remove_common(VP8_COMMON *oci) { vp8_de_alloc_frame_buffers(oci); }

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ALLOCCOMMON_H_
#define VP8_COMMON_ALLOCCOMMON_H_

View File

@ -11,105 +11,90 @@
#include "vpx_config.h"
#include "vp8_rtcd.h"
void vp8_dequant_idct_add_y_block_v6(short *q, short *dq, unsigned char *dst,
int stride, char *eobs) {
int i;
void vp8_dequant_idct_add_y_block_v6(short *q, short *dq,
unsigned char *dst,
int stride, char *eobs)
{
int i;
for (i = 0; i < 4; i++)
{
if (eobs[0] > 1)
vp8_dequant_idct_add_v6 (q, dq, dst, stride);
else if (eobs[0] == 1)
{
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dst, stride, dst, stride);
((int *)q)[0] = 0;
}
if (eobs[1] > 1)
vp8_dequant_idct_add_v6 (q+16, dq, dst+4, stride);
else if (eobs[1] == 1)
{
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dst+4, stride, dst+4, stride);
((int *)(q+16))[0] = 0;
}
if (eobs[2] > 1)
vp8_dequant_idct_add_v6 (q+32, dq, dst+8, stride);
else if (eobs[2] == 1)
{
vp8_dc_only_idct_add_v6 (q[32]*dq[0], dst+8, stride, dst+8, stride);
((int *)(q+32))[0] = 0;
}
if (eobs[3] > 1)
vp8_dequant_idct_add_v6 (q+48, dq, dst+12, stride);
else if (eobs[3] == 1)
{
vp8_dc_only_idct_add_v6 (q[48]*dq[0], dst+12, stride,dst+12,stride);
((int *)(q+48))[0] = 0;
}
q += 64;
dst += 4*stride;
eobs += 4;
for (i = 0; i < 4; i++) {
if (eobs[0] > 1)
vp8_dequant_idct_add_v6(q, dq, dst, stride);
else if (eobs[0] == 1) {
vp8_dc_only_idct_add_v6(q[0] * dq[0], dst, stride, dst, stride);
((int *)q)[0] = 0;
}
if (eobs[1] > 1)
vp8_dequant_idct_add_v6(q + 16, dq, dst + 4, stride);
else if (eobs[1] == 1) {
vp8_dc_only_idct_add_v6(q[16] * dq[0], dst + 4, stride, dst + 4, stride);
((int *)(q + 16))[0] = 0;
}
if (eobs[2] > 1)
vp8_dequant_idct_add_v6(q + 32, dq, dst + 8, stride);
else if (eobs[2] == 1) {
vp8_dc_only_idct_add_v6(q[32] * dq[0], dst + 8, stride, dst + 8, stride);
((int *)(q + 32))[0] = 0;
}
if (eobs[3] > 1)
vp8_dequant_idct_add_v6(q + 48, dq, dst + 12, stride);
else if (eobs[3] == 1) {
vp8_dc_only_idct_add_v6(q[48] * dq[0], dst + 12, stride, dst + 12,
stride);
((int *)(q + 48))[0] = 0;
}
q += 64;
dst += 4 * stride;
eobs += 4;
}
}
void vp8_dequant_idct_add_uv_block_v6(short *q, short *dq,
unsigned char *dstu,
unsigned char *dstv,
int stride, char *eobs)
{
int i;
void vp8_dequant_idct_add_uv_block_v6(short *q, short *dq, unsigned char *dstu,
unsigned char *dstv, int stride,
char *eobs) {
int i;
for (i = 0; i < 2; i++)
{
if (eobs[0] > 1)
vp8_dequant_idct_add_v6 (q, dq, dstu, stride);
else if (eobs[0] == 1)
{
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstu, stride, dstu, stride);
((int *)q)[0] = 0;
}
if (eobs[1] > 1)
vp8_dequant_idct_add_v6 (q+16, dq, dstu+4, stride);
else if (eobs[1] == 1)
{
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstu+4, stride,
dstu+4, stride);
((int *)(q+16))[0] = 0;
}
q += 32;
dstu += 4*stride;
eobs += 2;
for (i = 0; i < 2; i++) {
if (eobs[0] > 1)
vp8_dequant_idct_add_v6(q, dq, dstu, stride);
else if (eobs[0] == 1) {
vp8_dc_only_idct_add_v6(q[0] * dq[0], dstu, stride, dstu, stride);
((int *)q)[0] = 0;
}
for (i = 0; i < 2; i++)
{
if (eobs[0] > 1)
vp8_dequant_idct_add_v6 (q, dq, dstv, stride);
else if (eobs[0] == 1)
{
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstv, stride, dstv, stride);
((int *)q)[0] = 0;
}
if (eobs[1] > 1)
vp8_dequant_idct_add_v6 (q+16, dq, dstv+4, stride);
else if (eobs[1] == 1)
{
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstv+4, stride,
dstv+4, stride);
((int *)(q+16))[0] = 0;
}
q += 32;
dstv += 4*stride;
eobs += 2;
if (eobs[1] > 1)
vp8_dequant_idct_add_v6(q + 16, dq, dstu + 4, stride);
else if (eobs[1] == 1) {
vp8_dc_only_idct_add_v6(q[16] * dq[0], dstu + 4, stride, dstu + 4,
stride);
((int *)(q + 16))[0] = 0;
}
q += 32;
dstu += 4 * stride;
eobs += 2;
}
for (i = 0; i < 2; i++) {
if (eobs[0] > 1)
vp8_dequant_idct_add_v6(q, dq, dstv, stride);
else if (eobs[0] == 1) {
vp8_dc_only_idct_add_v6(q[0] * dq[0], dstv, stride, dstv, stride);
((int *)q)[0] = 0;
}
if (eobs[1] > 1)
vp8_dequant_idct_add_v6(q + 16, dq, dstv + 4, stride);
else if (eobs[1] == 1) {
vp8_dc_only_idct_add_v6(q[16] * dq[0], dstv + 4, stride, dstv + 4,
stride);
((int *)(q + 16))[0] = 0;
}
q += 32;
dstv += 4 * stride;
eobs += 2;
}
}

View File

@ -14,100 +14,74 @@
#include "vp8/common/filter.h"
#include "bilinearfilter_arm.h"
void vp8_filter_block2d_bil_armv6
(
unsigned char *src_ptr,
unsigned char *dst_ptr,
unsigned int src_pitch,
unsigned int dst_pitch,
const short *HFilter,
const short *VFilter,
int Width,
int Height
)
{
unsigned short FData[36*16]; /* Temp data buffer used in filtering */
void vp8_filter_block2d_bil_armv6(unsigned char *src_ptr,
unsigned char *dst_ptr,
unsigned int src_pitch,
unsigned int dst_pitch, const short *HFilter,
const short *VFilter, int Width, int Height) {
unsigned short FData[36 * 16]; /* Temp data buffer used in filtering */
/* First filter 1-D horizontally... */
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
/* First filter 1-D horizontally... */
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1,
Width, HFilter);
/* then 1-D vertically... */
vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
/* then 1-D vertically... */
vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height,
Width, VFilter);
}
void vp8_bilinear_predict4x4_armv6(unsigned char *src_ptr,
int src_pixels_per_line, int xoffset,
int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
void vp8_bilinear_predict4x4_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch,
HFilter, VFilter, 4, 4);
}
void vp8_bilinear_predict8x8_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
void vp8_bilinear_predict8x8_armv6(unsigned char *src_ptr,
int src_pixels_per_line, int xoffset,
int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch,
HFilter, VFilter, 8, 8);
}
void vp8_bilinear_predict8x4_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
void vp8_bilinear_predict8x4_armv6(unsigned char *src_ptr,
int src_pixels_per_line, int xoffset,
int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch,
HFilter, VFilter, 8, 4);
}
void vp8_bilinear_predict16x16_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
void vp8_bilinear_predict16x16_armv6(unsigned char *src_ptr,
int src_pixels_per_line, int xoffset,
int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch,
HFilter, VFilter, 16, 16);
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
#define VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
@ -16,25 +15,14 @@
extern "C" {
#endif
extern void vp8_filter_block2d_bil_first_pass_armv6
(
const unsigned char *src_ptr,
unsigned short *dst_ptr,
unsigned int src_pitch,
unsigned int height,
unsigned int width,
const short *vp8_filter
);
extern void vp8_filter_block2d_bil_first_pass_armv6(
const unsigned char *src_ptr, unsigned short *dst_ptr,
unsigned int src_pitch, unsigned int height, unsigned int width,
const short *vp8_filter);
extern void vp8_filter_block2d_bil_second_pass_armv6
(
const unsigned short *src_ptr,
unsigned char *dst_ptr,
int dst_pitch,
unsigned int height,
unsigned int width,
const short *vp8_filter
);
extern void vp8_filter_block2d_bil_second_pass_armv6(
const unsigned short *src_ptr, unsigned char *dst_ptr, int dst_pitch,
unsigned int height, unsigned int width, const short *vp8_filter);
#ifdef __cplusplus
} // extern "C"

View File

@ -8,18 +8,16 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8/common/blockd.h"
#if HAVE_MEDIA
extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)
{
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
void vp8_dequantize_b_v6(BLOCKD *d, short *DQC) {
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
vp8_dequantize_b_loop_v6(Q, DQC, DQ);
vp8_dequantize_b_loop_v6(Q, DQC, DQ);
}
#endif

View File

@ -8,214 +8,169 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include <math.h>
#include "vp8/common/filter.h"
#include "vpx_ports/mem.h"
extern void vp8_filter_block2d_first_pass_armv6
(
unsigned char *src_ptr,
short *output_ptr,
unsigned int src_pixels_per_line,
unsigned int output_width,
unsigned int output_height,
const short *vp8_filter
);
extern void vp8_filter_block2d_first_pass_armv6(
unsigned char *src_ptr, short *output_ptr, unsigned int src_pixels_per_line,
unsigned int output_width, unsigned int output_height,
const short *vp8_filter);
// 8x8
extern void vp8_filter_block2d_first_pass_8x8_armv6
(
unsigned char *src_ptr,
short *output_ptr,
unsigned int src_pixels_per_line,
unsigned int output_width,
unsigned int output_height,
const short *vp8_filter
);
extern void vp8_filter_block2d_first_pass_8x8_armv6(
unsigned char *src_ptr, short *output_ptr, unsigned int src_pixels_per_line,
unsigned int output_width, unsigned int output_height,
const short *vp8_filter);
// 16x16
extern void vp8_filter_block2d_first_pass_16x16_armv6
(
unsigned char *src_ptr,
short *output_ptr,
unsigned int src_pixels_per_line,
unsigned int output_width,
unsigned int output_height,
const short *vp8_filter
);
extern void vp8_filter_block2d_first_pass_16x16_armv6(
unsigned char *src_ptr, short *output_ptr, unsigned int src_pixels_per_line,
unsigned int output_width, unsigned int output_height,
const short *vp8_filter);
extern void vp8_filter_block2d_second_pass_armv6
(
short *src_ptr,
unsigned char *output_ptr,
unsigned int output_pitch,
unsigned int cnt,
const short *vp8_filter
);
extern void vp8_filter_block2d_second_pass_armv6(short *src_ptr,
unsigned char *output_ptr,
unsigned int output_pitch,
unsigned int cnt,
const short *vp8_filter);
extern void vp8_filter4_block2d_second_pass_armv6
(
short *src_ptr,
unsigned char *output_ptr,
unsigned int output_pitch,
unsigned int cnt,
const short *vp8_filter
);
extern void vp8_filter4_block2d_second_pass_armv6(short *src_ptr,
unsigned char *output_ptr,
unsigned int output_pitch,
unsigned int cnt,
const short *vp8_filter);
extern void vp8_filter_block2d_first_pass_only_armv6
(
unsigned char *src_ptr,
unsigned char *output_ptr,
unsigned int src_pixels_per_line,
unsigned int cnt,
unsigned int output_pitch,
const short *vp8_filter
);
extern void vp8_filter_block2d_first_pass_only_armv6(
unsigned char *src_ptr, unsigned char *output_ptr,
unsigned int src_pixels_per_line, unsigned int cnt,
unsigned int output_pitch, const short *vp8_filter);
extern void vp8_filter_block2d_second_pass_only_armv6
(
unsigned char *src_ptr,
unsigned char *output_ptr,
unsigned int src_pixels_per_line,
unsigned int cnt,
unsigned int output_pitch,
const short *vp8_filter
);
extern void vp8_filter_block2d_second_pass_only_armv6(
unsigned char *src_ptr, unsigned char *output_ptr,
unsigned int src_pixels_per_line, unsigned int cnt,
unsigned int output_pitch, const short *vp8_filter);
#if HAVE_MEDIA
void vp8_sixtap_predict4x4_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED(4, short, FData[12*4]); /* Temp data buffer used in filtering */
void vp8_sixtap_predict4x4_armv6(unsigned char *src_ptr,
int src_pixels_per_line, int xoffset,
int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED(4, short,
FData[12 * 4]); /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* Vfilter is null. First pass only */
if (xoffset && !yoffset) {
/*vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2,
src_pixels_per_line, 4, 4, HFilter );
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4,
VFilter );*/
/* Vfilter is null. First pass only */
if (xoffset && !yoffset)
{
/*vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );*/
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter);
vp8_filter_block2d_first_pass_only_armv6(
src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter);
}
/* Hfilter is null. Second pass only */
else if (!xoffset && yoffset) {
vp8_filter_block2d_second_pass_only_armv6(
src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter);
} else {
/* Vfilter is a 4 tap filter */
if (yoffset & 0x1) {
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line,
FData + 1, src_pixels_per_line, 4, 7,
HFilter);
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4,
VFilter);
}
/* Hfilter is null. Second pass only */
else if (!xoffset && yoffset)
{
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter);
}
else
{
/* Vfilter is a 4 tap filter */
if (yoffset & 0x1)
{
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 4, 7, HFilter);
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
}
/* Vfilter is 6 tap filter */
else
{
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 4, 9, HFilter);
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
}
/* Vfilter is 6 tap filter */
else {
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line),
FData, src_pixels_per_line, 4, 9,
HFilter);
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4,
VFilter);
}
}
}
void vp8_sixtap_predict8x8_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED(4, short, FData[16*8]); /* Temp data buffer used in filtering */
void vp8_sixtap_predict8x8_armv6(unsigned char *src_ptr,
int src_pixels_per_line, int xoffset,
int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED(4, short,
FData[16 * 8]); /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
if (xoffset && !yoffset)
{
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter);
}
/* Hfilter is null. Second pass only */
else if (!xoffset && yoffset)
{
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter);
}
else
{
if (yoffset & 0x1)
{
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
}
else
{
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
}
if (xoffset && !yoffset) {
vp8_filter_block2d_first_pass_only_armv6(
src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter);
}
/* Hfilter is null. Second pass only */
else if (!xoffset && yoffset) {
vp8_filter_block2d_second_pass_only_armv6(
src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter);
} else {
if (yoffset & 0x1) {
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - src_pixels_per_line,
FData + 1, src_pixels_per_line, 8,
11, HFilter);
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8,
VFilter);
} else {
vp8_filter_block2d_first_pass_8x8_armv6(
src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8,
13, HFilter);
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8,
VFilter);
}
}
}
void vp8_sixtap_predict16x16_armv6(unsigned char *src_ptr,
int src_pixels_per_line, int xoffset,
int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED(4, short,
FData[24 * 16]); /* Temp data buffer used in filtering */
void vp8_sixtap_predict16x16_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED(4, short, FData[24*16]); /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
if (xoffset && !yoffset)
{
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter);
if (xoffset && !yoffset) {
vp8_filter_block2d_first_pass_only_armv6(
src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter);
}
/* Hfilter is null. Second pass only */
else if (!xoffset && yoffset) {
vp8_filter_block2d_second_pass_only_armv6(
src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter);
} else {
if (yoffset & 0x1) {
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - src_pixels_per_line,
FData + 1, src_pixels_per_line,
16, 19, HFilter);
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16,
VFilter);
} else {
vp8_filter_block2d_first_pass_16x16_armv6(
src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16,
21, HFilter);
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16,
VFilter);
}
/* Hfilter is null. Second pass only */
else if (!xoffset && yoffset)
{
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter);
}
else
{
if (yoffset & 0x1)
{
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
}
else
{
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
}
}
}
}
#endif

View File

@ -8,15 +8,14 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vp8/common/loopfilter.h"
#include "vp8/common/onyxc_int.h"
#define prototype_loopfilter(sym) \
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
const unsigned char *limit, const unsigned char *thresh, int count)
#define prototype_loopfilter(sym) \
void sym(unsigned char *src, int pitch, const unsigned char *blimit, \
const unsigned char *limit, const unsigned char *thresh, int count)
#if HAVE_MEDIA
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
@ -27,10 +26,11 @@ extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
#if HAVE_NEON
typedef void loopfilter_y_neon(unsigned char *src, int pitch,
unsigned char blimit, unsigned char limit, unsigned char thresh);
unsigned char blimit, unsigned char limit,
unsigned char thresh);
typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
unsigned char blimit, unsigned char limit, unsigned char thresh,
unsigned char *v);
unsigned char blimit, unsigned char limit,
unsigned char thresh, unsigned char *v);
extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
@ -46,136 +46,163 @@ extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
#if HAVE_MEDIA
/* ARMV6/MEDIA loopfilter functions*/
/* Horizontal MB filtering */
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride,
int uv_stride, loop_filter_info *lfi) {
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim,
lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim,
lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim,
lfi->lim, lfi->hev_thr, 1);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride,
int uv_stride, loop_filter_info *lfi) {
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim,
lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim,
lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim,
lfi->lim, lfi->hev_thr, 1);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride,
lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride,
lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride,
lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (u_ptr)
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride,
lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride,
lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit)
{
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit);
const unsigned char *blimit) {
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride,
blimit);
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride,
blimit);
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride,
blimit);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim,
lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim,
lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim,
lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (u_ptr)
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim,
lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim,
lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit)
{
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
const unsigned char *blimit) {
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
}
#endif
#if HAVE_NEON
/* NEON loopfilter functions */
/* Horizontal MB filtering */
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
unsigned char mblim = *lfi->mblim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
unsigned char mblim = *lfi->mblim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim,
hev_thr);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim,
hev_thr, v_ptr);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
unsigned char mblim = *lfi->mblim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
unsigned char mblim = *lfi->mblim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim,
hev_thr, v_ptr);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
unsigned char blim = *lfi->blim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
unsigned char blim = *lfi->blim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr);
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr);
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr);
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim,
lim, hev_thr);
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim,
lim, hev_thr);
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim,
lim, hev_thr);
if (u_ptr)
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride);
if (u_ptr)
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride,
blim, lim, hev_thr,
v_ptr + 4 * uv_stride);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
unsigned char blim = *lfi->blim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
unsigned char blim = *lfi->blim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr);
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim,
hev_thr);
if (u_ptr)
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4);
if (u_ptr)
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim,
hev_thr, v_ptr + 4);
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -10,50 +10,41 @@
#include <arm_neon.h>
void vp8_copy_mem8x4_neon(
unsigned char *src,
int src_stride,
unsigned char *dst,
int dst_stride) {
uint8x8_t vtmp;
int r;
void vp8_copy_mem8x4_neon(unsigned char *src, int src_stride,
unsigned char *dst, int dst_stride) {
uint8x8_t vtmp;
int r;
for (r = 0; r < 4; r++) {
vtmp = vld1_u8(src);
vst1_u8(dst, vtmp);
src += src_stride;
dst += dst_stride;
}
for (r = 0; r < 4; r++) {
vtmp = vld1_u8(src);
vst1_u8(dst, vtmp);
src += src_stride;
dst += dst_stride;
}
}
void vp8_copy_mem8x8_neon(
unsigned char *src,
int src_stride,
unsigned char *dst,
int dst_stride) {
uint8x8_t vtmp;
int r;
void vp8_copy_mem8x8_neon(unsigned char *src, int src_stride,
unsigned char *dst, int dst_stride) {
uint8x8_t vtmp;
int r;
for (r = 0; r < 8; r++) {
vtmp = vld1_u8(src);
vst1_u8(dst, vtmp);
src += src_stride;
dst += dst_stride;
}
for (r = 0; r < 8; r++) {
vtmp = vld1_u8(src);
vst1_u8(dst, vtmp);
src += src_stride;
dst += dst_stride;
}
}
void vp8_copy_mem16x16_neon(
unsigned char *src,
int src_stride,
unsigned char *dst,
int dst_stride) {
int r;
uint8x16_t qtmp;
void vp8_copy_mem16x16_neon(unsigned char *src, int src_stride,
unsigned char *dst, int dst_stride) {
int r;
uint8x16_t qtmp;
for (r = 0; r < 16; r++) {
qtmp = vld1q_u8(src);
vst1q_u8(dst, qtmp);
src += src_stride;
dst += dst_stride;
}
for (r = 0; r < 16; r++) {
qtmp = vld1q_u8(src);
vst1q_u8(dst, qtmp);
src += src_stride;
dst += dst_stride;
}
}

View File

@ -10,33 +10,30 @@
#include <arm_neon.h>
void vp8_dc_only_idct_add_neon(
int16_t input_dc,
unsigned char *pred_ptr,
int pred_stride,
unsigned char *dst_ptr,
int dst_stride) {
int i;
uint16_t a1 = ((input_dc + 4) >> 3);
uint32x2_t d2u32 = vdup_n_u32(0);
uint8x8_t d2u8;
uint16x8_t q1u16;
uint16x8_t qAdd;
void vp8_dc_only_idct_add_neon(int16_t input_dc, unsigned char *pred_ptr,
int pred_stride, unsigned char *dst_ptr,
int dst_stride) {
int i;
uint16_t a1 = ((input_dc + 4) >> 3);
uint32x2_t d2u32 = vdup_n_u32(0);
uint8x8_t d2u8;
uint16x8_t q1u16;
uint16x8_t qAdd;
qAdd = vdupq_n_u16(a1);
qAdd = vdupq_n_u16(a1);
for (i = 0; i < 2; i++) {
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0);
pred_ptr += pred_stride;
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1);
pred_ptr += pred_stride;
for (i = 0; i < 2; i++) {
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0);
pred_ptr += pred_stride;
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1);
pred_ptr += pred_stride;
q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32));
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32));
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
dst_ptr += dst_stride;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
dst_ptr += dst_stride;
}
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
dst_ptr += dst_stride;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
dst_ptr += dst_stride;
}
}

View File

@ -11,132 +11,129 @@
#include <arm_neon.h>
static const int16_t cospi8sqrt2minus1 = 20091;
static const int16_t sinpi8sqrt2 = 35468;
static const int16_t sinpi8sqrt2 = 35468;
void vp8_dequant_idct_add_neon(
int16_t *input,
int16_t *dq,
unsigned char *dst,
int stride) {
unsigned char *dst0;
int32x2_t d14, d15;
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
int16x8_t q1, q2, q3, q4, q5, q6;
int16x8_t qEmpty = vdupq_n_s16(0);
int32x2x2_t d2tmp0, d2tmp1;
int16x4x2_t d2tmp2, d2tmp3;
void vp8_dequant_idct_add_neon(int16_t *input, int16_t *dq, unsigned char *dst,
int stride) {
unsigned char *dst0;
int32x2_t d14, d15;
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
int16x8_t q1, q2, q3, q4, q5, q6;
int16x8_t qEmpty = vdupq_n_s16(0);
int32x2x2_t d2tmp0, d2tmp1;
int16x4x2_t d2tmp2, d2tmp3;
d14 = d15 = vdup_n_s32(0);
d14 = d15 = vdup_n_s32(0);
// load input
q3 = vld1q_s16(input);
vst1q_s16(input, qEmpty);
input += 8;
q4 = vld1q_s16(input);
vst1q_s16(input, qEmpty);
// load input
q3 = vld1q_s16(input);
vst1q_s16(input, qEmpty);
input += 8;
q4 = vld1q_s16(input);
vst1q_s16(input, qEmpty);
// load dq
q5 = vld1q_s16(dq);
dq += 8;
q6 = vld1q_s16(dq);
// load dq
q5 = vld1q_s16(dq);
dq += 8;
q6 = vld1q_s16(dq);
// load src from dst
dst0 = dst;
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0);
dst0 += stride;
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1);
dst0 += stride;
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0);
dst0 += stride;
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1);
// load src from dst
dst0 = dst;
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0);
dst0 += stride;
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1);
dst0 += stride;
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0);
dst0 += stride;
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1);
q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3),
vreinterpretq_u16_s16(q5)));
q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4),
vreinterpretq_u16_s16(q6)));
q1 = vreinterpretq_s16_u16(
vmulq_u16(vreinterpretq_u16_s16(q3), vreinterpretq_u16_s16(q5)));
q2 = vreinterpretq_s16_u16(
vmulq_u16(vreinterpretq_u16_s16(q4), vreinterpretq_u16_s16(q6)));
d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2));
d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2));
d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2));
d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2));
q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2));
q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2));
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
q3 = vshrq_n_s16(q3, 1);
q4 = vshrq_n_s16(q4, 1);
q3 = vshrq_n_s16(q3, 1);
q4 = vshrq_n_s16(q4, 1);
q3 = vqaddq_s16(q3, q2);
q4 = vqaddq_s16(q4, q2);
q3 = vqaddq_s16(q3, q2);
q4 = vqaddq_s16(q4, q2);
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
d2 = vqadd_s16(d12, d11);
d3 = vqadd_s16(d13, d10);
d4 = vqsub_s16(d13, d10);
d5 = vqsub_s16(d12, d11);
d2 = vqadd_s16(d12, d11);
d3 = vqadd_s16(d13, d10);
d4 = vqsub_s16(d13, d10);
d5 = vqsub_s16(d12, d11);
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
vreinterpret_s16_s32(d2tmp1.val[0]));
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
vreinterpret_s16_s32(d2tmp1.val[1]));
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
vreinterpret_s16_s32(d2tmp1.val[0]));
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
vreinterpret_s16_s32(d2tmp1.val[1]));
// loop 2
q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]);
// loop 2
q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]);
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]);
d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]);
d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]);
d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]);
q3 = vshrq_n_s16(q3, 1);
q4 = vshrq_n_s16(q4, 1);
q3 = vshrq_n_s16(q3, 1);
q4 = vshrq_n_s16(q4, 1);
q3 = vqaddq_s16(q3, q2);
q4 = vqaddq_s16(q4, q2);
q3 = vqaddq_s16(q3, q2);
q4 = vqaddq_s16(q4, q2);
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
d2 = vqadd_s16(d12, d11);
d3 = vqadd_s16(d13, d10);
d4 = vqsub_s16(d13, d10);
d5 = vqsub_s16(d12, d11);
d2 = vqadd_s16(d12, d11);
d3 = vqadd_s16(d13, d10);
d4 = vqsub_s16(d13, d10);
d5 = vqsub_s16(d12, d11);
d2 = vrshr_n_s16(d2, 3);
d3 = vrshr_n_s16(d3, 3);
d4 = vrshr_n_s16(d4, 3);
d5 = vrshr_n_s16(d5, 3);
d2 = vrshr_n_s16(d2, 3);
d3 = vrshr_n_s16(d3, 3);
d4 = vrshr_n_s16(d4, 3);
d5 = vrshr_n_s16(d5, 3);
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
vreinterpret_s16_s32(d2tmp1.val[0]));
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
vreinterpret_s16_s32(d2tmp1.val[1]));
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
vreinterpret_s16_s32(d2tmp1.val[0]));
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
vreinterpret_s16_s32(d2tmp1.val[1]));
q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]);
q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]);
q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]);
q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]);
q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1),
vreinterpret_u8_s32(d14)));
q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2),
vreinterpret_u8_s32(d15)));
q1 = vreinterpretq_s16_u16(
vaddw_u8(vreinterpretq_u16_s16(q1), vreinterpret_u8_s32(d14)));
q2 = vreinterpretq_s16_u16(
vaddw_u8(vreinterpretq_u16_s16(q2), vreinterpret_u8_s32(d15)));
d14 = vreinterpret_s32_u8(vqmovun_s16(q1));
d15 = vreinterpret_s32_u8(vqmovun_s16(q2));
d14 = vreinterpret_s32_u8(vqmovun_s16(q1));
d15 = vreinterpret_s32_u8(vqmovun_s16(q2));
dst0 = dst;
vst1_lane_s32((int32_t *)dst0, d14, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d14, 1);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d15, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d15, 1);
return;
dst0 = dst;
vst1_lane_s32((int32_t *)dst0, d14, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d14, 1);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d15, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d15, 1);
return;
}

View File

@ -13,13 +13,13 @@
#include "vp8/common/blockd.h"
void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) {
int16x8x2_t qQ, qDQC, qDQ;
int16x8x2_t qQ, qDQC, qDQ;
qQ = vld2q_s16(d->qcoeff);
qDQC = vld2q_s16(DQC);
qQ = vld2q_s16(d->qcoeff);
qDQC = vld2q_s16(DQC);
qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
vst2q_s16(d->dqcoeff, qDQ);
vst2q_s16(d->dqcoeff, qDQ);
}

View File

@ -14,83 +14,71 @@
/* place these declarations here because we don't want to maintain them
* outside of this scope
*/
void idct_dequant_full_2x_neon(short *q, short *dq,
unsigned char *dst, int stride);
void idct_dequant_0_2x_neon(short *q, short dq,
unsigned char *dst, int stride);
void idct_dequant_full_2x_neon(short *q, short *dq, unsigned char *dst,
int stride);
void idct_dequant_0_2x_neon(short *q, short dq, unsigned char *dst, int stride);
void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, unsigned char *dst,
int stride, char *eobs) {
int i;
void vp8_dequant_idct_add_y_block_neon(short *q, short *dq,
unsigned char *dst,
int stride, char *eobs)
{
int i;
for (i = 0; i < 4; i++)
{
if (((short *)(eobs))[0])
{
if (((short *)eobs)[0] & 0xfefe)
idct_dequant_full_2x_neon (q, dq, dst, stride);
else
idct_dequant_0_2x_neon (q, dq[0], dst, stride);
}
if (((short *)(eobs))[1])
{
if (((short *)eobs)[1] & 0xfefe)
idct_dequant_full_2x_neon (q+32, dq, dst+8, stride);
else
idct_dequant_0_2x_neon (q+32, dq[0], dst+8, stride);
}
q += 64;
dst += 4*stride;
eobs += 4;
for (i = 0; i < 4; i++) {
if (((short *)(eobs))[0]) {
if (((short *)eobs)[0] & 0xfefe)
idct_dequant_full_2x_neon(q, dq, dst, stride);
else
idct_dequant_0_2x_neon(q, dq[0], dst, stride);
}
if (((short *)(eobs))[1]) {
if (((short *)eobs)[1] & 0xfefe)
idct_dequant_full_2x_neon(q + 32, dq, dst + 8, stride);
else
idct_dequant_0_2x_neon(q + 32, dq[0], dst + 8, stride);
}
q += 64;
dst += 4 * stride;
eobs += 4;
}
}
void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq,
unsigned char *dstu,
unsigned char *dstv,
int stride, char *eobs)
{
if (((short *)(eobs))[0])
{
if (((short *)eobs)[0] & 0xfefe)
idct_dequant_full_2x_neon (q, dq, dstu, stride);
else
idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
}
unsigned char *dstv, int stride,
char *eobs) {
if (((short *)(eobs))[0]) {
if (((short *)eobs)[0] & 0xfefe)
idct_dequant_full_2x_neon(q, dq, dstu, stride);
else
idct_dequant_0_2x_neon(q, dq[0], dstu, stride);
}
q += 32;
dstu += 4*stride;
q += 32;
dstu += 4 * stride;
if (((short *)(eobs))[1])
{
if (((short *)eobs)[1] & 0xfefe)
idct_dequant_full_2x_neon (q, dq, dstu, stride);
else
idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
}
if (((short *)(eobs))[1]) {
if (((short *)eobs)[1] & 0xfefe)
idct_dequant_full_2x_neon(q, dq, dstu, stride);
else
idct_dequant_0_2x_neon(q, dq[0], dstu, stride);
}
q += 32;
q += 32;
if (((short *)(eobs))[2])
{
if (((short *)eobs)[2] & 0xfefe)
idct_dequant_full_2x_neon (q, dq, dstv, stride);
else
idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
}
if (((short *)(eobs))[2]) {
if (((short *)eobs)[2] & 0xfefe)
idct_dequant_full_2x_neon(q, dq, dstv, stride);
else
idct_dequant_0_2x_neon(q, dq[0], dstv, stride);
}
q += 32;
dstv += 4*stride;
q += 32;
dstv += 4 * stride;
if (((short *)(eobs))[3])
{
if (((short *)eobs)[3] & 0xfefe)
idct_dequant_full_2x_neon (q, dq, dstv, stride);
else
idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
}
if (((short *)(eobs))[3]) {
if (((short *)eobs)[3] & 0xfefe)
idct_dequant_full_2x_neon(q, dq, dstv, stride);
else
idct_dequant_0_2x_neon(q, dq[0], dstv, stride);
}
}

View File

@ -10,54 +10,50 @@
#include <arm_neon.h>
void idct_dequant_0_2x_neon(
int16_t *q,
int16_t dq,
unsigned char *dst,
int stride) {
unsigned char *dst0;
int i, a0, a1;
int16x8x2_t q2Add;
int32x2_t d2s32 = vdup_n_s32(0),
d4s32 = vdup_n_s32(0);
uint8x8_t d2u8, d4u8;
uint16x8_t q1u16, q2u16;
void idct_dequant_0_2x_neon(int16_t *q, int16_t dq, unsigned char *dst,
int stride) {
unsigned char *dst0;
int i, a0, a1;
int16x8x2_t q2Add;
int32x2_t d2s32 = vdup_n_s32(0), d4s32 = vdup_n_s32(0);
uint8x8_t d2u8, d4u8;
uint16x8_t q1u16, q2u16;
a0 = ((q[0] * dq) + 4) >> 3;
a1 = ((q[16] * dq) + 4) >> 3;
q[0] = q[16] = 0;
q2Add.val[0] = vdupq_n_s16((int16_t)a0);
q2Add.val[1] = vdupq_n_s16((int16_t)a1);
a0 = ((q[0] * dq) + 4) >> 3;
a1 = ((q[16] * dq) + 4) >> 3;
q[0] = q[16] = 0;
q2Add.val[0] = vdupq_n_s16((int16_t)a0);
q2Add.val[1] = vdupq_n_s16((int16_t)a1);
for (i = 0; i < 2; i++, dst += 4) {
dst0 = dst;
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0);
dst0 += stride;
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1);
dst0 += stride;
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0);
dst0 += stride;
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1);
for (i = 0; i < 2; i++, dst += 4) {
dst0 = dst;
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0);
dst0 += stride;
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1);
dst0 += stride;
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0);
dst0 += stride;
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1);
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
vreinterpret_u8_s32(d2s32));
q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
vreinterpret_u8_s32(d4s32));
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
vreinterpret_u8_s32(d2s32));
q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
vreinterpret_u8_s32(d4s32));
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16));
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16));
d2s32 = vreinterpret_s32_u8(d2u8);
d4s32 = vreinterpret_s32_u8(d4u8);
d2s32 = vreinterpret_s32_u8(d2u8);
d4s32 = vreinterpret_s32_u8(d4u8);
dst0 = dst;
vst1_lane_s32((int32_t *)dst0, d2s32, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d2s32, 1);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d4s32, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d4s32, 1);
}
return;
dst0 = dst;
vst1_lane_s32((int32_t *)dst0, d2s32, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d2s32, 1);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d4s32, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst0, d4s32, 1);
}
return;
}

View File

@ -11,175 +11,172 @@
#include <arm_neon.h>
static const int16_t cospi8sqrt2minus1 = 20091;
static const int16_t sinpi8sqrt2 = 17734;
static const int16_t sinpi8sqrt2 = 17734;
// because the lowest bit in 0x8a8c is 0, we can pre-shift this
void idct_dequant_full_2x_neon(
int16_t *q,
int16_t *dq,
unsigned char *dst,
int stride) {
unsigned char *dst0, *dst1;
int32x2_t d28, d29, d30, d31;
int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11;
int16x8_t qEmpty = vdupq_n_s16(0);
int32x4x2_t q2tmp0, q2tmp1;
int16x8x2_t q2tmp2, q2tmp3;
int16x4_t dLow0, dLow1, dHigh0, dHigh1;
void idct_dequant_full_2x_neon(int16_t *q, int16_t *dq, unsigned char *dst,
int stride) {
unsigned char *dst0, *dst1;
int32x2_t d28, d29, d30, d31;
int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11;
int16x8_t qEmpty = vdupq_n_s16(0);
int32x4x2_t q2tmp0, q2tmp1;
int16x8x2_t q2tmp2, q2tmp3;
int16x4_t dLow0, dLow1, dHigh0, dHigh1;
d28 = d29 = d30 = d31 = vdup_n_s32(0);
d28 = d29 = d30 = d31 = vdup_n_s32(0);
// load dq
q0 = vld1q_s16(dq);
dq += 8;
q1 = vld1q_s16(dq);
// load dq
q0 = vld1q_s16(dq);
dq += 8;
q1 = vld1q_s16(dq);
// load q
q2 = vld1q_s16(q);
vst1q_s16(q, qEmpty);
q += 8;
q3 = vld1q_s16(q);
vst1q_s16(q, qEmpty);
q += 8;
q4 = vld1q_s16(q);
vst1q_s16(q, qEmpty);
q += 8;
q5 = vld1q_s16(q);
vst1q_s16(q, qEmpty);
// load q
q2 = vld1q_s16(q);
vst1q_s16(q, qEmpty);
q += 8;
q3 = vld1q_s16(q);
vst1q_s16(q, qEmpty);
q += 8;
q4 = vld1q_s16(q);
vst1q_s16(q, qEmpty);
q += 8;
q5 = vld1q_s16(q);
vst1q_s16(q, qEmpty);
// load src from dst
dst0 = dst;
dst1 = dst + 4;
d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0);
dst0 += stride;
d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1);
dst1 += stride;
d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0);
dst0 += stride;
d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1);
dst1 += stride;
// load src from dst
dst0 = dst;
dst1 = dst + 4;
d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0);
dst0 += stride;
d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1);
dst1 += stride;
d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0);
dst0 += stride;
d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1);
dst1 += stride;
d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0);
dst0 += stride;
d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1);
dst1 += stride;
d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0);
d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1);
d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0);
dst0 += stride;
d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1);
dst1 += stride;
d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0);
d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1);
q2 = vmulq_s16(q2, q0);
q3 = vmulq_s16(q3, q1);
q4 = vmulq_s16(q4, q0);
q5 = vmulq_s16(q5, q1);
q2 = vmulq_s16(q2, q0);
q3 = vmulq_s16(q3, q1);
q4 = vmulq_s16(q4, q0);
q5 = vmulq_s16(q5, q1);
// vswp
dLow0 = vget_low_s16(q2);
dHigh0 = vget_high_s16(q2);
dLow1 = vget_low_s16(q4);
dHigh1 = vget_high_s16(q4);
q2 = vcombine_s16(dLow0, dLow1);
q4 = vcombine_s16(dHigh0, dHigh1);
// vswp
dLow0 = vget_low_s16(q2);
dHigh0 = vget_high_s16(q2);
dLow1 = vget_low_s16(q4);
dHigh1 = vget_high_s16(q4);
q2 = vcombine_s16(dLow0, dLow1);
q4 = vcombine_s16(dHigh0, dHigh1);
dLow0 = vget_low_s16(q3);
dHigh0 = vget_high_s16(q3);
dLow1 = vget_low_s16(q5);
dHigh1 = vget_high_s16(q5);
q3 = vcombine_s16(dLow0, dLow1);
q5 = vcombine_s16(dHigh0, dHigh1);
dLow0 = vget_low_s16(q3);
dHigh0 = vget_high_s16(q3);
dLow1 = vget_low_s16(q5);
dHigh1 = vget_high_s16(q5);
q3 = vcombine_s16(dLow0, dLow1);
q5 = vcombine_s16(dHigh0, dHigh1);
q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2);
q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2);
q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1);
q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1);
q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2);
q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2);
q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1);
q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1);
q10 = vqaddq_s16(q2, q3);
q11 = vqsubq_s16(q2, q3);
q10 = vqaddq_s16(q2, q3);
q11 = vqsubq_s16(q2, q3);
q8 = vshrq_n_s16(q8, 1);
q9 = vshrq_n_s16(q9, 1);
q8 = vshrq_n_s16(q8, 1);
q9 = vshrq_n_s16(q9, 1);
q4 = vqaddq_s16(q4, q8);
q5 = vqaddq_s16(q5, q9);
q4 = vqaddq_s16(q4, q8);
q5 = vqaddq_s16(q5, q9);
q2 = vqsubq_s16(q6, q5);
q3 = vqaddq_s16(q7, q4);
q2 = vqsubq_s16(q6, q5);
q3 = vqaddq_s16(q7, q4);
q4 = vqaddq_s16(q10, q3);
q5 = vqaddq_s16(q11, q2);
q6 = vqsubq_s16(q11, q2);
q7 = vqsubq_s16(q10, q3);
q4 = vqaddq_s16(q10, q3);
q5 = vqaddq_s16(q11, q2);
q6 = vqsubq_s16(q11, q2);
q7 = vqsubq_s16(q10, q3);
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
vreinterpretq_s16_s32(q2tmp1.val[0]));
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
vreinterpretq_s16_s32(q2tmp1.val[1]));
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
vreinterpretq_s16_s32(q2tmp1.val[0]));
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
vreinterpretq_s16_s32(q2tmp1.val[1]));
// loop 2
q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2);
q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2);
q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1);
q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1);
// loop 2
q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2);
q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2);
q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1);
q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1);
q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]);
q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]);
q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]);
q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]);
q10 = vshrq_n_s16(q10, 1);
q11 = vshrq_n_s16(q11, 1);
q10 = vshrq_n_s16(q10, 1);
q11 = vshrq_n_s16(q11, 1);
q10 = vqaddq_s16(q2tmp2.val[1], q10);
q11 = vqaddq_s16(q2tmp3.val[1], q11);
q10 = vqaddq_s16(q2tmp2.val[1], q10);
q11 = vqaddq_s16(q2tmp3.val[1], q11);
q8 = vqsubq_s16(q8, q11);
q9 = vqaddq_s16(q9, q10);
q8 = vqsubq_s16(q8, q11);
q9 = vqaddq_s16(q9, q10);
q4 = vqaddq_s16(q2, q9);
q5 = vqaddq_s16(q3, q8);
q6 = vqsubq_s16(q3, q8);
q7 = vqsubq_s16(q2, q9);
q4 = vqaddq_s16(q2, q9);
q5 = vqaddq_s16(q3, q8);
q6 = vqsubq_s16(q3, q8);
q7 = vqsubq_s16(q2, q9);
q4 = vrshrq_n_s16(q4, 3);
q5 = vrshrq_n_s16(q5, 3);
q6 = vrshrq_n_s16(q6, 3);
q7 = vrshrq_n_s16(q7, 3);
q4 = vrshrq_n_s16(q4, 3);
q5 = vrshrq_n_s16(q5, 3);
q6 = vrshrq_n_s16(q6, 3);
q7 = vrshrq_n_s16(q7, 3);
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
vreinterpretq_s16_s32(q2tmp1.val[0]));
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
vreinterpretq_s16_s32(q2tmp1.val[1]));
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
vreinterpretq_s16_s32(q2tmp1.val[0]));
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
vreinterpretq_s16_s32(q2tmp1.val[1]));
q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]),
vreinterpret_u8_s32(d28)));
q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]),
vreinterpret_u8_s32(d29)));
q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]),
vreinterpret_u8_s32(d30)));
q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]),
vreinterpret_u8_s32(d31)));
q4 = vreinterpretq_s16_u16(
vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]), vreinterpret_u8_s32(d28)));
q5 = vreinterpretq_s16_u16(
vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]), vreinterpret_u8_s32(d29)));
q6 = vreinterpretq_s16_u16(
vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]), vreinterpret_u8_s32(d30)));
q7 = vreinterpretq_s16_u16(
vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]), vreinterpret_u8_s32(d31)));
d28 = vreinterpret_s32_u8(vqmovun_s16(q4));
d29 = vreinterpret_s32_u8(vqmovun_s16(q5));
d30 = vreinterpret_s32_u8(vqmovun_s16(q6));
d31 = vreinterpret_s32_u8(vqmovun_s16(q7));
d28 = vreinterpret_s32_u8(vqmovun_s16(q4));
d29 = vreinterpret_s32_u8(vqmovun_s16(q5));
d30 = vreinterpret_s32_u8(vqmovun_s16(q6));
d31 = vreinterpret_s32_u8(vqmovun_s16(q7));
dst0 = dst;
dst1 = dst + 4;
vst1_lane_s32((int32_t *)dst0, d28, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst1, d28, 1);
dst1 += stride;
vst1_lane_s32((int32_t *)dst0, d29, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst1, d29, 1);
dst1 += stride;
dst0 = dst;
dst1 = dst + 4;
vst1_lane_s32((int32_t *)dst0, d28, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst1, d28, 1);
dst1 += stride;
vst1_lane_s32((int32_t *)dst0, d29, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst1, d29, 1);
dst1 += stride;
vst1_lane_s32((int32_t *)dst0, d30, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst1, d30, 1);
dst1 += stride;
vst1_lane_s32((int32_t *)dst0, d31, 0);
vst1_lane_s32((int32_t *)dst1, d31, 1);
return;
vst1_lane_s32((int32_t *)dst0, d30, 0);
dst0 += stride;
vst1_lane_s32((int32_t *)dst1, d30, 1);
dst1 += stride;
vst1_lane_s32((int32_t *)dst0, d31, 0);
vst1_lane_s32((int32_t *)dst1, d31, 1);
return;
}

View File

@ -10,93 +10,91 @@
#include <arm_neon.h>
void vp8_short_inv_walsh4x4_neon(
int16_t *input,
int16_t *mb_dqcoeff) {
int16x8_t q0s16, q1s16, q2s16, q3s16;
int16x4_t d4s16, d5s16, d6s16, d7s16;
int16x4x2_t v2tmp0, v2tmp1;
int32x2x2_t v2tmp2, v2tmp3;
int16x8_t qAdd3;
void vp8_short_inv_walsh4x4_neon(int16_t *input, int16_t *mb_dqcoeff) {
int16x8_t q0s16, q1s16, q2s16, q3s16;
int16x4_t d4s16, d5s16, d6s16, d7s16;
int16x4x2_t v2tmp0, v2tmp1;
int32x2x2_t v2tmp2, v2tmp3;
int16x8_t qAdd3;
q0s16 = vld1q_s16(input);
q1s16 = vld1q_s16(input + 8);
q0s16 = vld1q_s16(input);
q1s16 = vld1q_s16(input + 8);
// 1st for loop
d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
// 1st for loop
d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
q2s16 = vcombine_s16(d4s16, d5s16);
q3s16 = vcombine_s16(d6s16, d7s16);
q2s16 = vcombine_s16(d4s16, d5s16);
q3s16 = vcombine_s16(d6s16, d7s16);
q0s16 = vaddq_s16(q2s16, q3s16);
q1s16 = vsubq_s16(q2s16, q3s16);
q0s16 = vaddq_s16(q2s16, q3s16);
q1s16 = vsubq_s16(q2s16, q3s16);
v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)),
vreinterpret_s32_s16(vget_low_s16(q1s16)));
v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)),
vreinterpret_s32_s16(vget_high_s16(q1s16)));
v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]),
vreinterpret_s16_s32(v2tmp3.val[0]));
v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]),
vreinterpret_s16_s32(v2tmp3.val[1]));
v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)),
vreinterpret_s32_s16(vget_low_s16(q1s16)));
v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)),
vreinterpret_s32_s16(vget_high_s16(q1s16)));
v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]),
vreinterpret_s16_s32(v2tmp3.val[0]));
v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]),
vreinterpret_s16_s32(v2tmp3.val[1]));
// 2nd for loop
d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]);
d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]);
d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]);
d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]);
q2s16 = vcombine_s16(d4s16, d5s16);
q3s16 = vcombine_s16(d6s16, d7s16);
// 2nd for loop
d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]);
d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]);
d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]);
d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]);
q2s16 = vcombine_s16(d4s16, d5s16);
q3s16 = vcombine_s16(d6s16, d7s16);
qAdd3 = vdupq_n_s16(3);
qAdd3 = vdupq_n_s16(3);
q0s16 = vaddq_s16(q2s16, q3s16);
q1s16 = vsubq_s16(q2s16, q3s16);
q0s16 = vaddq_s16(q2s16, q3s16);
q1s16 = vsubq_s16(q2s16, q3s16);
q0s16 = vaddq_s16(q0s16, qAdd3);
q1s16 = vaddq_s16(q1s16, qAdd3);
q0s16 = vaddq_s16(q0s16, qAdd3);
q1s16 = vaddq_s16(q1s16, qAdd3);
q0s16 = vshrq_n_s16(q0s16, 3);
q1s16 = vshrq_n_s16(q1s16, 3);
q0s16 = vshrq_n_s16(q0s16, 3);
q1s16 = vshrq_n_s16(q1s16, 3);
// store
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0);
mb_dqcoeff += 16;
// store
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 2);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 2);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 2);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 2);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 3);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 3);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3);
mb_dqcoeff += 16;
return;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 3);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 3);
mb_dqcoeff += 16;
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3);
mb_dqcoeff += 16;
return;
}

View File

@ -12,100 +12,93 @@
#include "./vpx_config.h"
static INLINE void vp8_loop_filter_simple_horizontal_edge_neon(
unsigned char *s,
int p,
const unsigned char *blimit) {
uint8_t *sp;
uint8x16_t qblimit, q0u8;
uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8;
int16x8_t q2s16, q3s16, q13s16;
int8x8_t d8s8, d9s8;
int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8;
unsigned char *s, int p, const unsigned char *blimit) {
uint8_t *sp;
uint8x16_t qblimit, q0u8;
uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8;
int16x8_t q2s16, q3s16, q13s16;
int8x8_t d8s8, d9s8;
int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8;
qblimit = vdupq_n_u8(*blimit);
qblimit = vdupq_n_u8(*blimit);
sp = s - (p << 1);
q5u8 = vld1q_u8(sp);
sp += p;
q6u8 = vld1q_u8(sp);
sp += p;
q7u8 = vld1q_u8(sp);
sp += p;
q8u8 = vld1q_u8(sp);
sp = s - (p << 1);
q5u8 = vld1q_u8(sp);
sp += p;
q6u8 = vld1q_u8(sp);
sp += p;
q7u8 = vld1q_u8(sp);
sp += p;
q8u8 = vld1q_u8(sp);
q15u8 = vabdq_u8(q6u8, q7u8);
q14u8 = vabdq_u8(q5u8, q8u8);
q15u8 = vabdq_u8(q6u8, q7u8);
q14u8 = vabdq_u8(q5u8, q8u8);
q15u8 = vqaddq_u8(q15u8, q15u8);
q14u8 = vshrq_n_u8(q14u8, 1);
q0u8 = vdupq_n_u8(0x80);
q13s16 = vdupq_n_s16(3);
q15u8 = vqaddq_u8(q15u8, q14u8);
q15u8 = vqaddq_u8(q15u8, q15u8);
q14u8 = vshrq_n_u8(q14u8, 1);
q0u8 = vdupq_n_u8(0x80);
q13s16 = vdupq_n_s16(3);
q15u8 = vqaddq_u8(q15u8, q14u8);
q5u8 = veorq_u8(q5u8, q0u8);
q6u8 = veorq_u8(q6u8, q0u8);
q7u8 = veorq_u8(q7u8, q0u8);
q8u8 = veorq_u8(q8u8, q0u8);
q5u8 = veorq_u8(q5u8, q0u8);
q6u8 = veorq_u8(q6u8, q0u8);
q7u8 = veorq_u8(q7u8, q0u8);
q8u8 = veorq_u8(q8u8, q0u8);
q15u8 = vcgeq_u8(qblimit, q15u8);
q15u8 = vcgeq_u8(qblimit, q15u8);
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)),
vget_low_s8(vreinterpretq_s8_u8(q6u8)));
q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)),
vget_high_s8(vreinterpretq_s8_u8(q6u8)));
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)),
vget_low_s8(vreinterpretq_s8_u8(q6u8)));
q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)),
vget_high_s8(vreinterpretq_s8_u8(q6u8)));
q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8),
vreinterpretq_s8_u8(q8u8));
q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8), vreinterpretq_s8_u8(q8u8));
q2s16 = vmulq_s16(q2s16, q13s16);
q3s16 = vmulq_s16(q3s16, q13s16);
q2s16 = vmulq_s16(q2s16, q13s16);
q3s16 = vmulq_s16(q3s16, q13s16);
q10u8 = vdupq_n_u8(3);
q9u8 = vdupq_n_u8(4);
q10u8 = vdupq_n_u8(3);
q9u8 = vdupq_n_u8(4);
q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8));
q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8));
q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8));
q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8));
d8s8 = vqmovn_s16(q2s16);
d9s8 = vqmovn_s16(q3s16);
q4s8 = vcombine_s8(d8s8, d9s8);
d8s8 = vqmovn_s16(q2s16);
d9s8 = vqmovn_s16(q3s16);
q4s8 = vcombine_s8(d8s8, d9s8);
q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8));
q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8));
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8));
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8));
q2s8 = vshrq_n_s8(q2s8, 3);
q3s8 = vshrq_n_s8(q3s8, 3);
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8));
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8));
q2s8 = vshrq_n_s8(q2s8, 3);
q3s8 = vshrq_n_s8(q3s8, 3);
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8);
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8);
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8);
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8);
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
vst1q_u8(s, q7u8);
s -= p;
vst1q_u8(s, q6u8);
return;
vst1q_u8(s, q7u8);
s -= p;
vst1q_u8(s, q6u8);
return;
}
void vp8_loop_filter_bhs_neon(
unsigned char *y_ptr,
int y_stride,
const unsigned char *blimit) {
y_ptr += y_stride * 4;
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
y_ptr += y_stride * 4;
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
y_ptr += y_stride * 4;
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
return;
void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit) {
y_ptr += y_stride * 4;
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
y_ptr += y_stride * 4;
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
y_ptr += y_stride * 4;
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
return;
}
void vp8_loop_filter_mbhs_neon(
unsigned char *y_ptr,
int y_stride,
const unsigned char *blimit) {
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
return;
void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit) {
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
return;
}

View File

@ -15,34 +15,33 @@
#ifdef VPX_INCOMPATIBLE_GCC
static INLINE void write_2x4(unsigned char *dst, int pitch,
const uint8x8x2_t result) {
/*
* uint8x8x2_t result
00 01 02 03 | 04 05 06 07
10 11 12 13 | 14 15 16 17
---
* after vtrn_u8
00 10 02 12 | 04 14 06 16
01 11 03 13 | 05 15 07 17
*/
const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0],
result.val[1]);
const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]);
const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]);
vst1_lane_u16((uint16_t *)dst, x_0_4, 0);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_1_5, 0);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_0_4, 1);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_1_5, 1);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_0_4, 2);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_1_5, 2);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_0_4, 3);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_1_5, 3);
/*
* uint8x8x2_t result
00 01 02 03 | 04 05 06 07
10 11 12 13 | 14 15 16 17
---
* after vtrn_u8
00 10 02 12 | 04 14 06 16
01 11 03 13 | 05 15 07 17
*/
const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0], result.val[1]);
const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]);
const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]);
vst1_lane_u16((uint16_t *)dst, x_0_4, 0);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_1_5, 0);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_0_4, 1);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_1_5, 1);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_0_4, 2);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_1_5, 2);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_0_4, 3);
dst += pitch;
vst1_lane_u16((uint16_t *)dst, x_1_5, 3);
}
static INLINE void write_2x8(unsigned char *dst, int pitch,
@ -91,193 +90,183 @@ static INLINE void write_2x8(unsigned char *dst, int pitch,
}
#endif // VPX_INCOMPATIBLE_GCC
#ifdef VPX_INCOMPATIBLE_GCC
static INLINE
uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
uint8x8x4_t x;
const uint8x8_t a = vld1_u8(src);
const uint8x8_t b = vld1_u8(src + pitch * 1);
const uint8x8_t c = vld1_u8(src + pitch * 2);
const uint8x8_t d = vld1_u8(src + pitch * 3);
const uint8x8_t e = vld1_u8(src + pitch * 4);
const uint8x8_t f = vld1_u8(src + pitch * 5);
const uint8x8_t g = vld1_u8(src + pitch * 6);
const uint8x8_t h = vld1_u8(src + pitch * 7);
const uint32x2x2_t r04_u32 = vtrn_u32(vreinterpret_u32_u8(a),
vreinterpret_u32_u8(e));
const uint32x2x2_t r15_u32 = vtrn_u32(vreinterpret_u32_u8(b),
vreinterpret_u32_u8(f));
const uint32x2x2_t r26_u32 = vtrn_u32(vreinterpret_u32_u8(c),
vreinterpret_u32_u8(g));
const uint32x2x2_t r37_u32 = vtrn_u32(vreinterpret_u32_u8(d),
vreinterpret_u32_u8(h));
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]),
vreinterpret_u16_u32(r26_u32.val[0]));
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]),
vreinterpret_u16_u32(r37_u32.val[0]));
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
vreinterpret_u8_u16(r13_u16.val[0]));
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
vreinterpret_u8_u16(r13_u16.val[1]));
/*
* after vtrn_u32
00 01 02 03 | 40 41 42 43
10 11 12 13 | 50 51 52 53
20 21 22 23 | 60 61 62 63
30 31 32 33 | 70 71 72 73
---
* after vtrn_u16
00 01 20 21 | 40 41 60 61
02 03 22 23 | 42 43 62 63
10 11 30 31 | 50 51 70 71
12 13 32 33 | 52 52 72 73
static INLINE uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
uint8x8x4_t x;
const uint8x8_t a = vld1_u8(src);
const uint8x8_t b = vld1_u8(src + pitch * 1);
const uint8x8_t c = vld1_u8(src + pitch * 2);
const uint8x8_t d = vld1_u8(src + pitch * 3);
const uint8x8_t e = vld1_u8(src + pitch * 4);
const uint8x8_t f = vld1_u8(src + pitch * 5);
const uint8x8_t g = vld1_u8(src + pitch * 6);
const uint8x8_t h = vld1_u8(src + pitch * 7);
const uint32x2x2_t r04_u32 =
vtrn_u32(vreinterpret_u32_u8(a), vreinterpret_u32_u8(e));
const uint32x2x2_t r15_u32 =
vtrn_u32(vreinterpret_u32_u8(b), vreinterpret_u32_u8(f));
const uint32x2x2_t r26_u32 =
vtrn_u32(vreinterpret_u32_u8(c), vreinterpret_u32_u8(g));
const uint32x2x2_t r37_u32 =
vtrn_u32(vreinterpret_u32_u8(d), vreinterpret_u32_u8(h));
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]),
vreinterpret_u16_u32(r26_u32.val[0]));
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]),
vreinterpret_u16_u32(r37_u32.val[0]));
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
vreinterpret_u8_u16(r13_u16.val[0]));
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
vreinterpret_u8_u16(r13_u16.val[1]));
/*
* after vtrn_u32
00 01 02 03 | 40 41 42 43
10 11 12 13 | 50 51 52 53
20 21 22 23 | 60 61 62 63
30 31 32 33 | 70 71 72 73
---
* after vtrn_u16
00 01 20 21 | 40 41 60 61
02 03 22 23 | 42 43 62 63
10 11 30 31 | 50 51 70 71
12 13 32 33 | 52 52 72 73
00 01 20 21 | 40 41 60 61
10 11 30 31 | 50 51 70 71
02 03 22 23 | 42 43 62 63
12 13 32 33 | 52 52 72 73
---
* after vtrn_u8
00 10 20 30 | 40 50 60 70
01 11 21 31 | 41 51 61 71
02 12 22 32 | 42 52 62 72
03 13 23 33 | 43 53 63 73
*/
x.val[0] = r01_u8.val[0];
x.val[1] = r01_u8.val[1];
x.val[2] = r23_u8.val[0];
x.val[3] = r23_u8.val[1];
00 01 20 21 | 40 41 60 61
10 11 30 31 | 50 51 70 71
02 03 22 23 | 42 43 62 63
12 13 32 33 | 52 52 72 73
---
* after vtrn_u8
00 10 20 30 | 40 50 60 70
01 11 21 31 | 41 51 61 71
02 12 22 32 | 42 52 62 72
03 13 23 33 | 43 53 63 73
*/
x.val[0] = r01_u8.val[0];
x.val[1] = r01_u8.val[1];
x.val[2] = r23_u8.val[0];
x.val[3] = r23_u8.val[1];
return x;
return x;
}
#else
static INLINE
uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
uint8x8x4_t x;
x.val[0] = x.val[1] = x.val[2] = x.val[3] = vdup_n_u8(0);
x = vld4_lane_u8(src, x, 0);
src += pitch;
x = vld4_lane_u8(src, x, 1);
src += pitch;
x = vld4_lane_u8(src, x, 2);
src += pitch;
x = vld4_lane_u8(src, x, 3);
src += pitch;
x = vld4_lane_u8(src, x, 4);
src += pitch;
x = vld4_lane_u8(src, x, 5);
src += pitch;
x = vld4_lane_u8(src, x, 6);
src += pitch;
x = vld4_lane_u8(src, x, 7);
return x;
static INLINE uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
uint8x8x4_t x;
x.val[0] = x.val[1] = x.val[2] = x.val[3] = vdup_n_u8(0);
x = vld4_lane_u8(src, x, 0);
src += pitch;
x = vld4_lane_u8(src, x, 1);
src += pitch;
x = vld4_lane_u8(src, x, 2);
src += pitch;
x = vld4_lane_u8(src, x, 3);
src += pitch;
x = vld4_lane_u8(src, x, 4);
src += pitch;
x = vld4_lane_u8(src, x, 5);
src += pitch;
x = vld4_lane_u8(src, x, 6);
src += pitch;
x = vld4_lane_u8(src, x, 7);
return x;
}
#endif // VPX_INCOMPATIBLE_GCC
static INLINE void vp8_loop_filter_simple_vertical_edge_neon(
unsigned char *s,
int p,
const unsigned char *blimit) {
unsigned char *src1;
uint8x16_t qblimit, q0u8;
uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8;
int16x8_t q2s16, q13s16, q11s16;
int8x8_t d28s8, d29s8;
int8x16_t q2s8, q3s8, q10s8, q11s8, q14s8;
uint8x8x4_t d0u8x4; // d6, d7, d8, d9
uint8x8x4_t d1u8x4; // d10, d11, d12, d13
uint8x8x2_t d2u8x2; // d12, d13
uint8x8x2_t d3u8x2; // d14, d15
unsigned char *s, int p, const unsigned char *blimit) {
unsigned char *src1;
uint8x16_t qblimit, q0u8;
uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8;
int16x8_t q2s16, q13s16, q11s16;
int8x8_t d28s8, d29s8;
int8x16_t q2s8, q3s8, q10s8, q11s8, q14s8;
uint8x8x4_t d0u8x4; // d6, d7, d8, d9
uint8x8x4_t d1u8x4; // d10, d11, d12, d13
uint8x8x2_t d2u8x2; // d12, d13
uint8x8x2_t d3u8x2; // d14, d15
qblimit = vdupq_n_u8(*blimit);
qblimit = vdupq_n_u8(*blimit);
src1 = s - 2;
d0u8x4 = read_4x8(src1, p);
src1 += p * 8;
d1u8x4 = read_4x8(src1, p);
src1 = s - 2;
d0u8x4 = read_4x8(src1, p);
src1 += p * 8;
d1u8x4 = read_4x8(src1, p);
q3u8 = vcombine_u8(d0u8x4.val[0], d1u8x4.val[0]); // d6 d10
q4u8 = vcombine_u8(d0u8x4.val[2], d1u8x4.val[2]); // d8 d12
q5u8 = vcombine_u8(d0u8x4.val[1], d1u8x4.val[1]); // d7 d11
q6u8 = vcombine_u8(d0u8x4.val[3], d1u8x4.val[3]); // d9 d13
q3u8 = vcombine_u8(d0u8x4.val[0], d1u8x4.val[0]); // d6 d10
q4u8 = vcombine_u8(d0u8x4.val[2], d1u8x4.val[2]); // d8 d12
q5u8 = vcombine_u8(d0u8x4.val[1], d1u8x4.val[1]); // d7 d11
q6u8 = vcombine_u8(d0u8x4.val[3], d1u8x4.val[3]); // d9 d13
q15u8 = vabdq_u8(q5u8, q4u8);
q14u8 = vabdq_u8(q3u8, q6u8);
q15u8 = vabdq_u8(q5u8, q4u8);
q14u8 = vabdq_u8(q3u8, q6u8);
q15u8 = vqaddq_u8(q15u8, q15u8);
q14u8 = vshrq_n_u8(q14u8, 1);
q0u8 = vdupq_n_u8(0x80);
q11s16 = vdupq_n_s16(3);
q15u8 = vqaddq_u8(q15u8, q14u8);
q15u8 = vqaddq_u8(q15u8, q15u8);
q14u8 = vshrq_n_u8(q14u8, 1);
q0u8 = vdupq_n_u8(0x80);
q11s16 = vdupq_n_s16(3);
q15u8 = vqaddq_u8(q15u8, q14u8);
q3u8 = veorq_u8(q3u8, q0u8);
q4u8 = veorq_u8(q4u8, q0u8);
q5u8 = veorq_u8(q5u8, q0u8);
q6u8 = veorq_u8(q6u8, q0u8);
q3u8 = veorq_u8(q3u8, q0u8);
q4u8 = veorq_u8(q4u8, q0u8);
q5u8 = veorq_u8(q5u8, q0u8);
q6u8 = veorq_u8(q6u8, q0u8);
q15u8 = vcgeq_u8(qblimit, q15u8);
q15u8 = vcgeq_u8(qblimit, q15u8);
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q4u8)),
vget_low_s8(vreinterpretq_s8_u8(q5u8)));
q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q4u8)),
vget_high_s8(vreinterpretq_s8_u8(q5u8)));
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q4u8)),
vget_low_s8(vreinterpretq_s8_u8(q5u8)));
q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q4u8)),
vget_high_s8(vreinterpretq_s8_u8(q5u8)));
q14s8 = vqsubq_s8(vreinterpretq_s8_u8(q3u8),
vreinterpretq_s8_u8(q6u8));
q14s8 = vqsubq_s8(vreinterpretq_s8_u8(q3u8), vreinterpretq_s8_u8(q6u8));
q2s16 = vmulq_s16(q2s16, q11s16);
q13s16 = vmulq_s16(q13s16, q11s16);
q2s16 = vmulq_s16(q2s16, q11s16);
q13s16 = vmulq_s16(q13s16, q11s16);
q11u8 = vdupq_n_u8(3);
q12u8 = vdupq_n_u8(4);
q11u8 = vdupq_n_u8(3);
q12u8 = vdupq_n_u8(4);
q2s16 = vaddw_s8(q2s16, vget_low_s8(q14s8));
q13s16 = vaddw_s8(q13s16, vget_high_s8(q14s8));
q2s16 = vaddw_s8(q2s16, vget_low_s8(q14s8));
q13s16 = vaddw_s8(q13s16, vget_high_s8(q14s8));
d28s8 = vqmovn_s16(q2s16);
d29s8 = vqmovn_s16(q13s16);
q14s8 = vcombine_s8(d28s8, d29s8);
d28s8 = vqmovn_s16(q2s16);
d29s8 = vqmovn_s16(q13s16);
q14s8 = vcombine_s8(d28s8, d29s8);
q14s8 = vandq_s8(q14s8, vreinterpretq_s8_u8(q15u8));
q14s8 = vandq_s8(q14s8, vreinterpretq_s8_u8(q15u8));
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q11u8));
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q12u8));
q2s8 = vshrq_n_s8(q2s8, 3);
q14s8 = vshrq_n_s8(q3s8, 3);
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q11u8));
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q12u8));
q2s8 = vshrq_n_s8(q2s8, 3);
q14s8 = vshrq_n_s8(q3s8, 3);
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q5u8), q2s8);
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q4u8), q14s8);
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q5u8), q2s8);
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q4u8), q14s8);
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
d2u8x2.val[0] = vget_low_u8(q6u8); // d12
d2u8x2.val[1] = vget_low_u8(q7u8); // d14
d3u8x2.val[0] = vget_high_u8(q6u8); // d13
d3u8x2.val[1] = vget_high_u8(q7u8); // d15
d2u8x2.val[0] = vget_low_u8(q6u8); // d12
d2u8x2.val[1] = vget_low_u8(q7u8); // d14
d3u8x2.val[0] = vget_high_u8(q6u8); // d13
d3u8x2.val[1] = vget_high_u8(q7u8); // d15
src1 = s - 1;
write_2x8(src1, p, d2u8x2, d3u8x2);
src1 = s - 1;
write_2x8(src1, p, d2u8x2, d3u8x2);
}
void vp8_loop_filter_bvs_neon(
unsigned char *y_ptr,
int y_stride,
const unsigned char *blimit) {
y_ptr += 4;
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
y_ptr += 4;
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
y_ptr += 4;
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
return;
void vp8_loop_filter_bvs_neon(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit) {
y_ptr += 4;
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
y_ptr += 4;
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
y_ptr += 4;
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
return;
}
void vp8_loop_filter_mbvs_neon(
unsigned char *y_ptr,
int y_stride,
const unsigned char *blimit) {
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
return;
void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit) {
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
return;
}

File diff suppressed because it is too large Load Diff

View File

@ -11,113 +11,109 @@
#include <arm_neon.h>
static const int16_t cospi8sqrt2minus1 = 20091;
static const int16_t sinpi8sqrt2 = 35468;
static const int16_t sinpi8sqrt2 = 35468;
void vp8_short_idct4x4llm_neon(
int16_t *input,
unsigned char *pred_ptr,
int pred_stride,
unsigned char *dst_ptr,
int dst_stride) {
int i;
uint32x2_t d6u32 = vdup_n_u32(0);
uint8x8_t d1u8;
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
uint16x8_t q1u16;
int16x8_t q1s16, q2s16, q3s16, q4s16;
int32x2x2_t v2tmp0, v2tmp1;
int16x4x2_t v2tmp2, v2tmp3;
void vp8_short_idct4x4llm_neon(int16_t *input, unsigned char *pred_ptr,
int pred_stride, unsigned char *dst_ptr,
int dst_stride) {
int i;
uint32x2_t d6u32 = vdup_n_u32(0);
uint8x8_t d1u8;
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
uint16x8_t q1u16;
int16x8_t q1s16, q2s16, q3s16, q4s16;
int32x2x2_t v2tmp0, v2tmp1;
int16x4x2_t v2tmp2, v2tmp3;
d2 = vld1_s16(input);
d3 = vld1_s16(input + 4);
d4 = vld1_s16(input + 8);
d5 = vld1_s16(input + 12);
d2 = vld1_s16(input);
d3 = vld1_s16(input + 4);
d4 = vld1_s16(input + 8);
d5 = vld1_s16(input + 12);
// 1st for loop
q1s16 = vcombine_s16(d2, d4); // Swap d3 d4 here
q2s16 = vcombine_s16(d3, d5);
// 1st for loop
q1s16 = vcombine_s16(d2, d4); // Swap d3 d4 here
q2s16 = vcombine_s16(d3, d5);
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
q3s16 = vshrq_n_s16(q3s16, 1);
q4s16 = vshrq_n_s16(q4s16, 1);
q3s16 = vshrq_n_s16(q3s16, 1);
q4s16 = vshrq_n_s16(q4s16, 1);
q3s16 = vqaddq_s16(q3s16, q2s16);
q4s16 = vqaddq_s16(q4s16, q2s16);
q3s16 = vqaddq_s16(q3s16, q2s16);
q4s16 = vqaddq_s16(q4s16, q2s16);
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
d2 = vqadd_s16(d12, d11);
d3 = vqadd_s16(d13, d10);
d4 = vqsub_s16(d13, d10);
d5 = vqsub_s16(d12, d11);
d2 = vqadd_s16(d12, d11);
d3 = vqadd_s16(d13, d10);
d4 = vqsub_s16(d13, d10);
d5 = vqsub_s16(d12, d11);
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
vreinterpret_s16_s32(v2tmp1.val[0]));
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
vreinterpret_s16_s32(v2tmp1.val[1]));
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
vreinterpret_s16_s32(v2tmp1.val[0]));
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
vreinterpret_s16_s32(v2tmp1.val[1]));
// 2nd for loop
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]);
q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]);
// 2nd for loop
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]);
q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]);
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
q3s16 = vshrq_n_s16(q3s16, 1);
q4s16 = vshrq_n_s16(q4s16, 1);
q3s16 = vshrq_n_s16(q3s16, 1);
q4s16 = vshrq_n_s16(q4s16, 1);
q3s16 = vqaddq_s16(q3s16, q2s16);
q4s16 = vqaddq_s16(q4s16, q2s16);
q3s16 = vqaddq_s16(q3s16, q2s16);
q4s16 = vqaddq_s16(q4s16, q2s16);
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
d2 = vqadd_s16(d12, d11);
d3 = vqadd_s16(d13, d10);
d4 = vqsub_s16(d13, d10);
d5 = vqsub_s16(d12, d11);
d2 = vqadd_s16(d12, d11);
d3 = vqadd_s16(d13, d10);
d4 = vqsub_s16(d13, d10);
d5 = vqsub_s16(d12, d11);
d2 = vrshr_n_s16(d2, 3);
d3 = vrshr_n_s16(d3, 3);
d4 = vrshr_n_s16(d4, 3);
d5 = vrshr_n_s16(d5, 3);
d2 = vrshr_n_s16(d2, 3);
d3 = vrshr_n_s16(d3, 3);
d4 = vrshr_n_s16(d4, 3);
d5 = vrshr_n_s16(d5, 3);
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
vreinterpret_s16_s32(v2tmp1.val[0]));
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
vreinterpret_s16_s32(v2tmp1.val[1]));
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
vreinterpret_s16_s32(v2tmp1.val[0]));
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
vreinterpret_s16_s32(v2tmp1.val[1]));
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]);
q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]);
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]);
q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]);
// dc_only_idct_add
for (i = 0; i < 2; i++, q1s16 = q2s16) {
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0);
pred_ptr += pred_stride;
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1);
pred_ptr += pred_stride;
// dc_only_idct_add
for (i = 0; i < 2; i++, q1s16 = q2s16) {
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0);
pred_ptr += pred_stride;
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1);
pred_ptr += pred_stride;
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16),
vreinterpret_u8_u32(d6u32));
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16), vreinterpret_u8_u32(d6u32));
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0);
dst_ptr += dst_stride;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1);
dst_ptr += dst_stride;
}
return;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0);
dst_ptr += dst_stride;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1);
dst_ptr += dst_stride;
}
return;
}

File diff suppressed because it is too large Load Diff

View File

@ -12,539 +12,525 @@
#include "./vpx_config.h"
#include "vpx_ports/arm.h"
static INLINE void vp8_loop_filter_neon(
uint8x16_t qblimit, // flimit
uint8x16_t qlimit, // limit
uint8x16_t qthresh, // thresh
uint8x16_t q3, // p3
uint8x16_t q4, // p2
uint8x16_t q5, // p1
uint8x16_t q6, // p0
uint8x16_t q7, // q0
uint8x16_t q8, // q1
uint8x16_t q9, // q2
uint8x16_t q10, // q3
uint8x16_t *q5r, // p1
uint8x16_t *q6r, // p0
uint8x16_t *q7r, // q0
uint8x16_t *q8r) { // q1
uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8;
int16x8_t q2s16, q11s16;
uint16x8_t q4u16;
int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8;
int8x8_t d2s8, d3s8;
static INLINE void vp8_loop_filter_neon(uint8x16_t qblimit, // flimit
uint8x16_t qlimit, // limit
uint8x16_t qthresh, // thresh
uint8x16_t q3, // p3
uint8x16_t q4, // p2
uint8x16_t q5, // p1
uint8x16_t q6, // p0
uint8x16_t q7, // q0
uint8x16_t q8, // q1
uint8x16_t q9, // q2
uint8x16_t q10, // q3
uint8x16_t *q5r, // p1
uint8x16_t *q6r, // p0
uint8x16_t *q7r, // q0
uint8x16_t *q8r) { // q1
uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8;
int16x8_t q2s16, q11s16;
uint16x8_t q4u16;
int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8;
int8x8_t d2s8, d3s8;
q11u8 = vabdq_u8(q3, q4);
q12u8 = vabdq_u8(q4, q5);
q13u8 = vabdq_u8(q5, q6);
q14u8 = vabdq_u8(q8, q7);
q3 = vabdq_u8(q9, q8);
q4 = vabdq_u8(q10, q9);
q11u8 = vabdq_u8(q3, q4);
q12u8 = vabdq_u8(q4, q5);
q13u8 = vabdq_u8(q5, q6);
q14u8 = vabdq_u8(q8, q7);
q3 = vabdq_u8(q9, q8);
q4 = vabdq_u8(q10, q9);
q11u8 = vmaxq_u8(q11u8, q12u8);
q12u8 = vmaxq_u8(q13u8, q14u8);
q3 = vmaxq_u8(q3, q4);
q15u8 = vmaxq_u8(q11u8, q12u8);
q11u8 = vmaxq_u8(q11u8, q12u8);
q12u8 = vmaxq_u8(q13u8, q14u8);
q3 = vmaxq_u8(q3, q4);
q15u8 = vmaxq_u8(q11u8, q12u8);
q9 = vabdq_u8(q6, q7);
q9 = vabdq_u8(q6, q7);
// vp8_hevmask
q13u8 = vcgtq_u8(q13u8, qthresh);
q14u8 = vcgtq_u8(q14u8, qthresh);
q15u8 = vmaxq_u8(q15u8, q3);
// vp8_hevmask
q13u8 = vcgtq_u8(q13u8, qthresh);
q14u8 = vcgtq_u8(q14u8, qthresh);
q15u8 = vmaxq_u8(q15u8, q3);
q2u8 = vabdq_u8(q5, q8);
q9 = vqaddq_u8(q9, q9);
q2u8 = vabdq_u8(q5, q8);
q9 = vqaddq_u8(q9, q9);
q15u8 = vcgeq_u8(qlimit, q15u8);
q15u8 = vcgeq_u8(qlimit, q15u8);
// vp8_filter() function
// convert to signed
q10 = vdupq_n_u8(0x80);
q8 = veorq_u8(q8, q10);
q7 = veorq_u8(q7, q10);
q6 = veorq_u8(q6, q10);
q5 = veorq_u8(q5, q10);
// vp8_filter() function
// convert to signed
q10 = vdupq_n_u8(0x80);
q8 = veorq_u8(q8, q10);
q7 = veorq_u8(q7, q10);
q6 = veorq_u8(q6, q10);
q5 = veorq_u8(q5, q10);
q2u8 = vshrq_n_u8(q2u8, 1);
q9 = vqaddq_u8(q9, q2u8);
q2u8 = vshrq_n_u8(q2u8, 1);
q9 = vqaddq_u8(q9, q2u8);
q10 = vdupq_n_u8(3);
q10 = vdupq_n_u8(3);
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
vget_low_s8(vreinterpretq_s8_u8(q6)));
q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
vget_high_s8(vreinterpretq_s8_u8(q6)));
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
vget_low_s8(vreinterpretq_s8_u8(q6)));
q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
vget_high_s8(vreinterpretq_s8_u8(q6)));
q9 = vcgeq_u8(qblimit, q9);
q9 = vcgeq_u8(qblimit, q9);
q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
vreinterpretq_s8_u8(q8));
q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), vreinterpretq_s8_u8(q8));
q14u8 = vorrq_u8(q13u8, q14u8);
q14u8 = vorrq_u8(q13u8, q14u8);
q4u16 = vmovl_u8(vget_low_u8(q10));
q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16));
q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16));
q4u16 = vmovl_u8(vget_low_u8(q10));
q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16));
q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16));
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8);
q15u8 = vandq_u8(q15u8, q9);
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8);
q15u8 = vandq_u8(q15u8, q9);
q1s8 = vreinterpretq_s8_u8(q1u8);
q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8));
q1s8 = vreinterpretq_s8_u8(q1u8);
q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8));
q9 = vdupq_n_u8(4);
// vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
d2s8 = vqmovn_s16(q2s16);
d3s8 = vqmovn_s16(q11s16);
q1s8 = vcombine_s8(d2s8, d3s8);
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8);
q1s8 = vreinterpretq_s8_u8(q1u8);
q9 = vdupq_n_u8(4);
// vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
d2s8 = vqmovn_s16(q2s16);
d3s8 = vqmovn_s16(q11s16);
q1s8 = vcombine_s8(d2s8, d3s8);
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8);
q1s8 = vreinterpretq_s8_u8(q1u8);
q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10));
q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9));
q2s8 = vshrq_n_s8(q2s8, 3);
q1s8 = vshrq_n_s8(q1s8, 3);
q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10));
q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9));
q2s8 = vshrq_n_s8(q2s8, 3);
q1s8 = vshrq_n_s8(q1s8, 3);
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8);
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8);
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8);
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8);
q1s8 = vrshrq_n_s8(q1s8, 1);
q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
q1s8 = vrshrq_n_s8(q1s8, 1);
q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8);
q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8);
q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8);
q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8);
q0u8 = vdupq_n_u8(0x80);
*q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8);
*q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
*q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
*q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8);
return;
q0u8 = vdupq_n_u8(0x80);
*q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8);
*q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
*q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
*q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8);
return;
}
void vp8_loop_filter_horizontal_edge_y_neon(
unsigned char *src,
int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh) {
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh) {
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
src -= (pitch << 2);
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
src -= (pitch << 2);
q3 = vld1q_u8(src);
src += pitch;
q4 = vld1q_u8(src);
src += pitch;
q5 = vld1q_u8(src);
src += pitch;
q6 = vld1q_u8(src);
src += pitch;
q7 = vld1q_u8(src);
src += pitch;
q8 = vld1q_u8(src);
src += pitch;
q9 = vld1q_u8(src);
src += pitch;
q10 = vld1q_u8(src);
q3 = vld1q_u8(src);
src += pitch;
q4 = vld1q_u8(src);
src += pitch;
q5 = vld1q_u8(src);
src += pitch;
q6 = vld1q_u8(src);
src += pitch;
q7 = vld1q_u8(src);
src += pitch;
q8 = vld1q_u8(src);
src += pitch;
q9 = vld1q_u8(src);
src += pitch;
q10 = vld1q_u8(src);
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
q5, q6, q7, q8, q9, q10,
&q5, &q6, &q7, &q8);
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9,
q10, &q5, &q6, &q7, &q8);
src -= (pitch * 5);
vst1q_u8(src, q5);
src += pitch;
vst1q_u8(src, q6);
src += pitch;
vst1q_u8(src, q7);
src += pitch;
vst1q_u8(src, q8);
return;
src -= (pitch * 5);
vst1q_u8(src, q5);
src += pitch;
vst1q_u8(src, q6);
src += pitch;
vst1q_u8(src, q7);
src += pitch;
vst1q_u8(src, q8);
return;
}
void vp8_loop_filter_horizontal_edge_uv_neon(
unsigned char *u,
int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh,
unsigned char *v) {
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh,
unsigned char *v) {
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
u -= (pitch << 2);
v -= (pitch << 2);
u -= (pitch << 2);
v -= (pitch << 2);
d6 = vld1_u8(u);
u += pitch;
d7 = vld1_u8(v);
v += pitch;
d8 = vld1_u8(u);
u += pitch;
d9 = vld1_u8(v);
v += pitch;
d10 = vld1_u8(u);
u += pitch;
d11 = vld1_u8(v);
v += pitch;
d12 = vld1_u8(u);
u += pitch;
d13 = vld1_u8(v);
v += pitch;
d14 = vld1_u8(u);
u += pitch;
d15 = vld1_u8(v);
v += pitch;
d16 = vld1_u8(u);
u += pitch;
d17 = vld1_u8(v);
v += pitch;
d18 = vld1_u8(u);
u += pitch;
d19 = vld1_u8(v);
v += pitch;
d20 = vld1_u8(u);
d21 = vld1_u8(v);
d6 = vld1_u8(u);
u += pitch;
d7 = vld1_u8(v);
v += pitch;
d8 = vld1_u8(u);
u += pitch;
d9 = vld1_u8(v);
v += pitch;
d10 = vld1_u8(u);
u += pitch;
d11 = vld1_u8(v);
v += pitch;
d12 = vld1_u8(u);
u += pitch;
d13 = vld1_u8(v);
v += pitch;
d14 = vld1_u8(u);
u += pitch;
d15 = vld1_u8(v);
v += pitch;
d16 = vld1_u8(u);
u += pitch;
d17 = vld1_u8(v);
v += pitch;
d18 = vld1_u8(u);
u += pitch;
d19 = vld1_u8(v);
v += pitch;
d20 = vld1_u8(u);
d21 = vld1_u8(v);
q3 = vcombine_u8(d6, d7);
q4 = vcombine_u8(d8, d9);
q5 = vcombine_u8(d10, d11);
q6 = vcombine_u8(d12, d13);
q7 = vcombine_u8(d14, d15);
q8 = vcombine_u8(d16, d17);
q9 = vcombine_u8(d18, d19);
q10 = vcombine_u8(d20, d21);
q3 = vcombine_u8(d6, d7);
q4 = vcombine_u8(d8, d9);
q5 = vcombine_u8(d10, d11);
q6 = vcombine_u8(d12, d13);
q7 = vcombine_u8(d14, d15);
q8 = vcombine_u8(d16, d17);
q9 = vcombine_u8(d18, d19);
q10 = vcombine_u8(d20, d21);
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
q5, q6, q7, q8, q9, q10,
&q5, &q6, &q7, &q8);
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9,
q10, &q5, &q6, &q7, &q8);
u -= (pitch * 5);
vst1_u8(u, vget_low_u8(q5));
u += pitch;
vst1_u8(u, vget_low_u8(q6));
u += pitch;
vst1_u8(u, vget_low_u8(q7));
u += pitch;
vst1_u8(u, vget_low_u8(q8));
u -= (pitch * 5);
vst1_u8(u, vget_low_u8(q5));
u += pitch;
vst1_u8(u, vget_low_u8(q6));
u += pitch;
vst1_u8(u, vget_low_u8(q7));
u += pitch;
vst1_u8(u, vget_low_u8(q8));
v -= (pitch * 5);
vst1_u8(v, vget_high_u8(q5));
v += pitch;
vst1_u8(v, vget_high_u8(q6));
v += pitch;
vst1_u8(v, vget_high_u8(q7));
v += pitch;
vst1_u8(v, vget_high_u8(q8));
return;
v -= (pitch * 5);
vst1_u8(v, vget_high_u8(q5));
v += pitch;
vst1_u8(v, vget_high_u8(q6));
v += pitch;
vst1_u8(v, vget_high_u8(q7));
v += pitch;
vst1_u8(v, vget_high_u8(q8));
return;
}
static INLINE void write_4x8(unsigned char *dst, int pitch,
const uint8x8x4_t result) {
#ifdef VPX_INCOMPATIBLE_GCC
/*
* uint8x8x4_t result
00 01 02 03 | 04 05 06 07
10 11 12 13 | 14 15 16 17
20 21 22 23 | 24 25 26 27
30 31 32 33 | 34 35 36 37
---
* after vtrn_u16
00 01 20 21 | 04 05 24 25
02 03 22 23 | 06 07 26 27
10 11 30 31 | 14 15 34 35
12 13 32 33 | 16 17 36 37
---
* after vtrn_u8
00 10 20 30 | 04 14 24 34
01 11 21 31 | 05 15 25 35
02 12 22 32 | 06 16 26 36
03 13 23 33 | 07 17 27 37
*/
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
vreinterpret_u16_u8(result.val[2]));
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
vreinterpret_u16_u8(result.val[3]));
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
vreinterpret_u8_u16(r13_u16.val[0]));
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
vreinterpret_u8_u16(r13_u16.val[1]));
const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
/*
* uint8x8x4_t result
00 01 02 03 | 04 05 06 07
10 11 12 13 | 14 15 16 17
20 21 22 23 | 24 25 26 27
30 31 32 33 | 34 35 36 37
---
* after vtrn_u16
00 01 20 21 | 04 05 24 25
02 03 22 23 | 06 07 26 27
10 11 30 31 | 14 15 34 35
12 13 32 33 | 16 17 36 37
---
* after vtrn_u8
00 10 20 30 | 04 14 24 34
01 11 21 31 | 05 15 25 35
02 12 22 32 | 06 16 26 36
03 13 23 33 | 07 17 27 37
*/
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
vreinterpret_u16_u8(result.val[2]));
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
vreinterpret_u16_u8(result.val[3]));
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
vreinterpret_u8_u16(r13_u16.val[0]));
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
vreinterpret_u8_u16(r13_u16.val[1]));
const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
dst += pitch;
vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
#else
vst4_lane_u8(dst, result, 0);
dst += pitch;
vst4_lane_u8(dst, result, 1);
dst += pitch;
vst4_lane_u8(dst, result, 2);
dst += pitch;
vst4_lane_u8(dst, result, 3);
dst += pitch;
vst4_lane_u8(dst, result, 4);
dst += pitch;
vst4_lane_u8(dst, result, 5);
dst += pitch;
vst4_lane_u8(dst, result, 6);
dst += pitch;
vst4_lane_u8(dst, result, 7);
vst4_lane_u8(dst, result, 0);
dst += pitch;
vst4_lane_u8(dst, result, 1);
dst += pitch;
vst4_lane_u8(dst, result, 2);
dst += pitch;
vst4_lane_u8(dst, result, 3);
dst += pitch;
vst4_lane_u8(dst, result, 4);
dst += pitch;
vst4_lane_u8(dst, result, 5);
dst += pitch;
vst4_lane_u8(dst, result, 6);
dst += pitch;
vst4_lane_u8(dst, result, 7);
#endif // VPX_INCOMPATIBLE_GCC
}
void vp8_loop_filter_vertical_edge_y_neon(
unsigned char *src,
int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh) {
unsigned char *s, *d;
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
uint8x8x4_t q4ResultH, q4ResultL;
void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh) {
unsigned char *s, *d;
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
uint8x8x4_t q4ResultH, q4ResultL;
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
s = src - 4;
d6 = vld1_u8(s);
s += pitch;
d8 = vld1_u8(s);
s += pitch;
d10 = vld1_u8(s);
s += pitch;
d12 = vld1_u8(s);
s += pitch;
d14 = vld1_u8(s);
s += pitch;
d16 = vld1_u8(s);
s += pitch;
d18 = vld1_u8(s);
s += pitch;
d20 = vld1_u8(s);
s += pitch;
d7 = vld1_u8(s);
s += pitch;
d9 = vld1_u8(s);
s += pitch;
d11 = vld1_u8(s);
s += pitch;
d13 = vld1_u8(s);
s += pitch;
d15 = vld1_u8(s);
s += pitch;
d17 = vld1_u8(s);
s += pitch;
d19 = vld1_u8(s);
s += pitch;
d21 = vld1_u8(s);
s = src - 4;
d6 = vld1_u8(s);
s += pitch;
d8 = vld1_u8(s);
s += pitch;
d10 = vld1_u8(s);
s += pitch;
d12 = vld1_u8(s);
s += pitch;
d14 = vld1_u8(s);
s += pitch;
d16 = vld1_u8(s);
s += pitch;
d18 = vld1_u8(s);
s += pitch;
d20 = vld1_u8(s);
s += pitch;
d7 = vld1_u8(s);
s += pitch;
d9 = vld1_u8(s);
s += pitch;
d11 = vld1_u8(s);
s += pitch;
d13 = vld1_u8(s);
s += pitch;
d15 = vld1_u8(s);
s += pitch;
d17 = vld1_u8(s);
s += pitch;
d19 = vld1_u8(s);
s += pitch;
d21 = vld1_u8(s);
q3 = vcombine_u8(d6, d7);
q4 = vcombine_u8(d8, d9);
q5 = vcombine_u8(d10, d11);
q6 = vcombine_u8(d12, d13);
q7 = vcombine_u8(d14, d15);
q8 = vcombine_u8(d16, d17);
q9 = vcombine_u8(d18, d19);
q10 = vcombine_u8(d20, d21);
q3 = vcombine_u8(d6, d7);
q4 = vcombine_u8(d8, d9);
q5 = vcombine_u8(d10, d11);
q6 = vcombine_u8(d12, d13);
q7 = vcombine_u8(d14, d15);
q8 = vcombine_u8(d16, d17);
q9 = vcombine_u8(d18, d19);
q10 = vcombine_u8(d20, d21);
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
vreinterpretq_u16_u32(q2tmp2.val[0]));
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
vreinterpretq_u16_u32(q2tmp3.val[0]));
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
vreinterpretq_u16_u32(q2tmp2.val[1]));
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
vreinterpretq_u16_u32(q2tmp3.val[1]));
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
vreinterpretq_u16_u32(q2tmp2.val[0]));
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
vreinterpretq_u16_u32(q2tmp3.val[0]));
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
vreinterpretq_u16_u32(q2tmp2.val[1]));
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
vreinterpretq_u16_u32(q2tmp3.val[1]));
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
vreinterpretq_u8_u16(q2tmp5.val[0]));
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
vreinterpretq_u8_u16(q2tmp5.val[1]));
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
vreinterpretq_u8_u16(q2tmp7.val[0]));
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
vreinterpretq_u8_u16(q2tmp7.val[1]));
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
vreinterpretq_u8_u16(q2tmp5.val[0]));
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
vreinterpretq_u8_u16(q2tmp5.val[1]));
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
vreinterpretq_u8_u16(q2tmp7.val[0]));
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
vreinterpretq_u8_u16(q2tmp7.val[1]));
q3 = q2tmp8.val[0];
q4 = q2tmp8.val[1];
q5 = q2tmp9.val[0];
q6 = q2tmp9.val[1];
q7 = q2tmp10.val[0];
q8 = q2tmp10.val[1];
q9 = q2tmp11.val[0];
q10 = q2tmp11.val[1];
q3 = q2tmp8.val[0];
q4 = q2tmp8.val[1];
q5 = q2tmp9.val[0];
q6 = q2tmp9.val[1];
q7 = q2tmp10.val[0];
q8 = q2tmp10.val[1];
q9 = q2tmp11.val[0];
q10 = q2tmp11.val[1];
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
q5, q6, q7, q8, q9, q10,
&q5, &q6, &q7, &q8);
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9,
q10, &q5, &q6, &q7, &q8);
q4ResultL.val[0] = vget_low_u8(q5); // d10
q4ResultL.val[1] = vget_low_u8(q6); // d12
q4ResultL.val[2] = vget_low_u8(q7); // d14
q4ResultL.val[3] = vget_low_u8(q8); // d16
q4ResultH.val[0] = vget_high_u8(q5); // d11
q4ResultH.val[1] = vget_high_u8(q6); // d13
q4ResultH.val[2] = vget_high_u8(q7); // d15
q4ResultH.val[3] = vget_high_u8(q8); // d17
q4ResultL.val[0] = vget_low_u8(q5); // d10
q4ResultL.val[1] = vget_low_u8(q6); // d12
q4ResultL.val[2] = vget_low_u8(q7); // d14
q4ResultL.val[3] = vget_low_u8(q8); // d16
q4ResultH.val[0] = vget_high_u8(q5); // d11
q4ResultH.val[1] = vget_high_u8(q6); // d13
q4ResultH.val[2] = vget_high_u8(q7); // d15
q4ResultH.val[3] = vget_high_u8(q8); // d17
d = src - 2;
write_4x8(d, pitch, q4ResultL);
d += pitch * 8;
write_4x8(d, pitch, q4ResultH);
d = src - 2;
write_4x8(d, pitch, q4ResultL);
d += pitch * 8;
write_4x8(d, pitch, q4ResultH);
}
void vp8_loop_filter_vertical_edge_uv_neon(
unsigned char *u,
int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh,
unsigned char *v) {
unsigned char *us, *ud;
unsigned char *vs, *vd;
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
uint8x8x4_t q4ResultH, q4ResultL;
void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
unsigned char blimit,
unsigned char limit,
unsigned char thresh,
unsigned char *v) {
unsigned char *us, *ud;
unsigned char *vs, *vd;
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
uint8x16_t q5, q6, q7, q8, q9, q10;
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
uint8x8x4_t q4ResultH, q4ResultL;
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
qblimit = vdupq_n_u8(blimit);
qlimit = vdupq_n_u8(limit);
qthresh = vdupq_n_u8(thresh);
us = u - 4;
d6 = vld1_u8(us);
us += pitch;
d8 = vld1_u8(us);
us += pitch;
d10 = vld1_u8(us);
us += pitch;
d12 = vld1_u8(us);
us += pitch;
d14 = vld1_u8(us);
us += pitch;
d16 = vld1_u8(us);
us += pitch;
d18 = vld1_u8(us);
us += pitch;
d20 = vld1_u8(us);
us = u - 4;
d6 = vld1_u8(us);
us += pitch;
d8 = vld1_u8(us);
us += pitch;
d10 = vld1_u8(us);
us += pitch;
d12 = vld1_u8(us);
us += pitch;
d14 = vld1_u8(us);
us += pitch;
d16 = vld1_u8(us);
us += pitch;
d18 = vld1_u8(us);
us += pitch;
d20 = vld1_u8(us);
vs = v - 4;
d7 = vld1_u8(vs);
vs += pitch;
d9 = vld1_u8(vs);
vs += pitch;
d11 = vld1_u8(vs);
vs += pitch;
d13 = vld1_u8(vs);
vs += pitch;
d15 = vld1_u8(vs);
vs += pitch;
d17 = vld1_u8(vs);
vs += pitch;
d19 = vld1_u8(vs);
vs += pitch;
d21 = vld1_u8(vs);
vs = v - 4;
d7 = vld1_u8(vs);
vs += pitch;
d9 = vld1_u8(vs);
vs += pitch;
d11 = vld1_u8(vs);
vs += pitch;
d13 = vld1_u8(vs);
vs += pitch;
d15 = vld1_u8(vs);
vs += pitch;
d17 = vld1_u8(vs);
vs += pitch;
d19 = vld1_u8(vs);
vs += pitch;
d21 = vld1_u8(vs);
q3 = vcombine_u8(d6, d7);
q4 = vcombine_u8(d8, d9);
q5 = vcombine_u8(d10, d11);
q6 = vcombine_u8(d12, d13);
q7 = vcombine_u8(d14, d15);
q8 = vcombine_u8(d16, d17);
q9 = vcombine_u8(d18, d19);
q10 = vcombine_u8(d20, d21);
q3 = vcombine_u8(d6, d7);
q4 = vcombine_u8(d8, d9);
q5 = vcombine_u8(d10, d11);
q6 = vcombine_u8(d12, d13);
q7 = vcombine_u8(d14, d15);
q8 = vcombine_u8(d16, d17);
q9 = vcombine_u8(d18, d19);
q10 = vcombine_u8(d20, d21);
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
vreinterpretq_u16_u32(q2tmp2.val[0]));
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
vreinterpretq_u16_u32(q2tmp3.val[0]));
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
vreinterpretq_u16_u32(q2tmp2.val[1]));
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
vreinterpretq_u16_u32(q2tmp3.val[1]));
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
vreinterpretq_u16_u32(q2tmp2.val[0]));
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
vreinterpretq_u16_u32(q2tmp3.val[0]));
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
vreinterpretq_u16_u32(q2tmp2.val[1]));
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
vreinterpretq_u16_u32(q2tmp3.val[1]));
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
vreinterpretq_u8_u16(q2tmp5.val[0]));
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
vreinterpretq_u8_u16(q2tmp5.val[1]));
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
vreinterpretq_u8_u16(q2tmp7.val[0]));
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
vreinterpretq_u8_u16(q2tmp7.val[1]));
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
vreinterpretq_u8_u16(q2tmp5.val[0]));
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
vreinterpretq_u8_u16(q2tmp5.val[1]));
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
vreinterpretq_u8_u16(q2tmp7.val[0]));
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
vreinterpretq_u8_u16(q2tmp7.val[1]));
q3 = q2tmp8.val[0];
q4 = q2tmp8.val[1];
q5 = q2tmp9.val[0];
q6 = q2tmp9.val[1];
q7 = q2tmp10.val[0];
q8 = q2tmp10.val[1];
q9 = q2tmp11.val[0];
q10 = q2tmp11.val[1];
q3 = q2tmp8.val[0];
q4 = q2tmp8.val[1];
q5 = q2tmp9.val[0];
q6 = q2tmp9.val[1];
q7 = q2tmp10.val[0];
q8 = q2tmp10.val[1];
q9 = q2tmp11.val[0];
q10 = q2tmp11.val[1];
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
q5, q6, q7, q8, q9, q10,
&q5, &q6, &q7, &q8);
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9,
q10, &q5, &q6, &q7, &q8);
q4ResultL.val[0] = vget_low_u8(q5); // d10
q4ResultL.val[1] = vget_low_u8(q6); // d12
q4ResultL.val[2] = vget_low_u8(q7); // d14
q4ResultL.val[3] = vget_low_u8(q8); // d16
ud = u - 2;
write_4x8(ud, pitch, q4ResultL);
q4ResultL.val[0] = vget_low_u8(q5); // d10
q4ResultL.val[1] = vget_low_u8(q6); // d12
q4ResultL.val[2] = vget_low_u8(q7); // d14
q4ResultL.val[3] = vget_low_u8(q8); // d16
ud = u - 2;
write_4x8(ud, pitch, q4ResultL);
q4ResultH.val[0] = vget_high_u8(q5); // d11
q4ResultH.val[1] = vget_high_u8(q6); // d13
q4ResultH.val[2] = vget_high_u8(q7); // d15
q4ResultH.val[3] = vget_high_u8(q8); // d17
vd = v - 2;
write_4x8(vd, pitch, q4ResultH);
q4ResultH.val[0] = vget_high_u8(q5); // d11
q4ResultH.val[1] = vget_high_u8(q6); // d13
q4ResultH.val[2] = vget_high_u8(q7); // d15
q4ResultH.val[3] = vget_high_u8(q8); // d17
vd = v - 2;
write_4x8(vd, pitch, q4ResultH);
}

View File

@ -8,15 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "blockd.h"
#include "vpx_mem/vpx_mem.h"
const unsigned char vp8_block2left[25] =
{
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
const unsigned char vp8_block2left[25] = {
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
};
const unsigned char vp8_block2above[25] =
{
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
const unsigned char vp8_block2above[25] = {
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
};

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_BLOCKD_H_
#define VP8_COMMON_BLOCKD_H_
@ -28,280 +27,266 @@ extern "C" {
#define DCPREDSIMTHRESH 0
#define DCPREDCNTTHRESH 3
#define MB_FEATURE_TREE_PROBS 3
#define MAX_MB_SEGMENTS 4
#define MB_FEATURE_TREE_PROBS 3
#define MAX_MB_SEGMENTS 4
#define MAX_REF_LF_DELTAS 4
#define MAX_MODE_LF_DELTAS 4
#define MAX_REF_LF_DELTAS 4
#define MAX_MODE_LF_DELTAS 4
/* Segment Feature Masks */
#define SEGMENT_DELTADATA 0
#define SEGMENT_ABSDATA 1
#define SEGMENT_DELTADATA 0
#define SEGMENT_ABSDATA 1
typedef struct
{
int r, c;
} POS;
#define PLANE_TYPE_Y_NO_DC 0
#define PLANE_TYPE_Y2 1
#define PLANE_TYPE_UV 2
#define PLANE_TYPE_Y_WITH_DC 3
typedef struct { int r, c; } POS;
#define PLANE_TYPE_Y_NO_DC 0
#define PLANE_TYPE_Y2 1
#define PLANE_TYPE_UV 2
#define PLANE_TYPE_Y_WITH_DC 3
typedef char ENTROPY_CONTEXT;
typedef struct
{
ENTROPY_CONTEXT y1[4];
ENTROPY_CONTEXT u[2];
ENTROPY_CONTEXT v[2];
ENTROPY_CONTEXT y2;
typedef struct {
ENTROPY_CONTEXT y1[4];
ENTROPY_CONTEXT u[2];
ENTROPY_CONTEXT v[2];
ENTROPY_CONTEXT y2;
} ENTROPY_CONTEXT_PLANES;
extern const unsigned char vp8_block2left[25];
extern const unsigned char vp8_block2above[25];
#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
Dest = (A)+(B);
#define VP8_COMBINEENTROPYCONTEXTS(Dest, A, B) Dest = (A) + (B);
typedef enum { KEY_FRAME = 0, INTER_FRAME = 1 } FRAME_TYPE;
typedef enum
{
KEY_FRAME = 0,
INTER_FRAME = 1
} FRAME_TYPE;
typedef enum {
DC_PRED, /* average of above and left pixels */
V_PRED, /* vertical prediction */
H_PRED, /* horizontal prediction */
TM_PRED, /* Truemotion prediction */
B_PRED, /* block based prediction, each block has its own prediction mode */
typedef enum
{
DC_PRED, /* average of above and left pixels */
V_PRED, /* vertical prediction */
H_PRED, /* horizontal prediction */
TM_PRED, /* Truemotion prediction */
B_PRED, /* block based prediction, each block has its own prediction mode */
NEARESTMV,
NEARMV,
ZEROMV,
NEWMV,
SPLITMV,
NEARESTMV,
NEARMV,
ZEROMV,
NEWMV,
SPLITMV,
MB_MODE_COUNT
MB_MODE_COUNT
} MB_PREDICTION_MODE;
/* Macroblock level features */
typedef enum
{
MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */
MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */
MB_LVL_MAX = 2 /* Number of MB level features supported */
typedef enum {
MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */
MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */
MB_LVL_MAX = 2 /* Number of MB level features supported */
} MB_LVL_FEATURES;
/* Segment Feature Masks */
#define SEGMENT_ALTQ 0x01
#define SEGMENT_ALT_LF 0x02
#define SEGMENT_ALTQ 0x01
#define SEGMENT_ALT_LF 0x02
#define VP8_YMODES (B_PRED + 1)
#define VP8_YMODES (B_PRED + 1)
#define VP8_UV_MODES (TM_PRED + 1)
#define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
typedef enum
{
B_DC_PRED, /* average of above and left pixels */
B_TM_PRED,
typedef enum {
B_DC_PRED, /* average of above and left pixels */
B_TM_PRED,
B_VE_PRED, /* vertical prediction */
B_HE_PRED, /* horizontal prediction */
B_VE_PRED, /* vertical prediction */
B_HE_PRED, /* horizontal prediction */
B_LD_PRED,
B_RD_PRED,
B_LD_PRED,
B_RD_PRED,
B_VR_PRED,
B_VL_PRED,
B_HD_PRED,
B_HU_PRED,
B_VR_PRED,
B_VL_PRED,
B_HD_PRED,
B_HU_PRED,
LEFT4X4,
ABOVE4X4,
ZERO4X4,
NEW4X4,
LEFT4X4,
ABOVE4X4,
ZERO4X4,
NEW4X4,
B_MODE_COUNT
B_MODE_COUNT
} B_PREDICTION_MODE;
#define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */
#define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */
#define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4)
/* For keyframes, intra block modes are predicted by the (already decoded)
modes for the Y blocks to the left and above us; for interframes, there
is a single probability table. */
union b_mode_info
{
B_PREDICTION_MODE as_mode;
int_mv mv;
union b_mode_info {
B_PREDICTION_MODE as_mode;
int_mv mv;
};
typedef enum
{
INTRA_FRAME = 0,
LAST_FRAME = 1,
GOLDEN_FRAME = 2,
ALTREF_FRAME = 3,
MAX_REF_FRAMES = 4
typedef enum {
INTRA_FRAME = 0,
LAST_FRAME = 1,
GOLDEN_FRAME = 2,
ALTREF_FRAME = 3,
MAX_REF_FRAMES = 4
} MV_REFERENCE_FRAME;
typedef struct
{
uint8_t mode, uv_mode;
uint8_t ref_frame;
uint8_t is_4x4;
int_mv mv;
typedef struct {
uint8_t mode, uv_mode;
uint8_t ref_frame;
uint8_t is_4x4;
int_mv mv;
uint8_t partitioning;
uint8_t mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
uint8_t need_to_clamp_mvs;
uint8_t segment_id; /* Which set of segmentation parameters should be used for this MB */
uint8_t partitioning;
/* does this mb has coefficients at all, 1=no coefficients, 0=need decode
tokens */
uint8_t mb_skip_coeff;
uint8_t need_to_clamp_mvs;
/* Which set of segmentation parameters should be used for this MB */
uint8_t segment_id;
} MB_MODE_INFO;
typedef struct modeinfo
{
MB_MODE_INFO mbmi;
union b_mode_info bmi[16];
typedef struct modeinfo {
MB_MODE_INFO mbmi;
union b_mode_info bmi[16];
} MODE_INFO;
#if CONFIG_MULTI_RES_ENCODING
/* The mb-level information needed to be stored for higher-resolution encoder */
typedef struct
{
MB_PREDICTION_MODE mode;
MV_REFERENCE_FRAME ref_frame;
int_mv mv;
int dissim; /* dissimilarity level of the macroblock */
typedef struct {
MB_PREDICTION_MODE mode;
MV_REFERENCE_FRAME ref_frame;
int_mv mv;
int dissim; /* dissimilarity level of the macroblock */
} LOWER_RES_MB_INFO;
/* The frame-level information needed to be stored for higher-resolution
* encoder */
typedef struct
{
FRAME_TYPE frame_type;
int is_frame_dropped;
// The frame rate for the lowest resolution.
double low_res_framerate;
/* The frame number of each reference frames */
unsigned int low_res_ref_frames[MAX_REF_FRAMES];
// The video frame counter value for the key frame, for lowest resolution.
unsigned int key_frame_counter_value;
LOWER_RES_MB_INFO *mb_info;
typedef struct {
FRAME_TYPE frame_type;
int is_frame_dropped;
// The frame rate for the lowest resolution.
double low_res_framerate;
/* The frame number of each reference frames */
unsigned int low_res_ref_frames[MAX_REF_FRAMES];
// The video frame counter value for the key frame, for lowest resolution.
unsigned int key_frame_counter_value;
LOWER_RES_MB_INFO *mb_info;
} LOWER_RES_FRAME_INFO;
#endif
typedef struct blockd
{
short *qcoeff;
short *dqcoeff;
unsigned char *predictor;
short *dequant;
typedef struct blockd {
short *qcoeff;
short *dqcoeff;
unsigned char *predictor;
short *dequant;
int offset;
char *eob;
int offset;
char *eob;
union b_mode_info bmi;
union b_mode_info bmi;
} BLOCKD;
typedef void (*vp8_subpix_fn_t)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
typedef void (*vp8_subpix_fn_t)(unsigned char *src, int src_pitch, int xofst,
int yofst, unsigned char *dst, int dst_pitch);
typedef struct macroblockd
{
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
DECLARE_ALIGNED(16, short, qcoeff[400]);
DECLARE_ALIGNED(16, short, dqcoeff[400]);
DECLARE_ALIGNED(16, char, eobs[25]);
typedef struct macroblockd {
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
DECLARE_ALIGNED(16, short, qcoeff[400]);
DECLARE_ALIGNED(16, short, dqcoeff[400]);
DECLARE_ALIGNED(16, char, eobs[25]);
DECLARE_ALIGNED(16, short, dequant_y1[16]);
DECLARE_ALIGNED(16, short, dequant_y1_dc[16]);
DECLARE_ALIGNED(16, short, dequant_y2[16]);
DECLARE_ALIGNED(16, short, dequant_uv[16]);
DECLARE_ALIGNED(16, short, dequant_y1[16]);
DECLARE_ALIGNED(16, short, dequant_y1_dc[16]);
DECLARE_ALIGNED(16, short, dequant_y2[16]);
DECLARE_ALIGNED(16, short, dequant_uv[16]);
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
BLOCKD block[25];
int fullpixel_mask;
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
BLOCKD block[25];
int fullpixel_mask;
YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
YV12_BUFFER_CONFIG dst;
YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
YV12_BUFFER_CONFIG dst;
MODE_INFO *mode_info_context;
int mode_info_stride;
MODE_INFO *mode_info_context;
int mode_info_stride;
FRAME_TYPE frame_type;
FRAME_TYPE frame_type;
int up_available;
int left_available;
int up_available;
int left_available;
unsigned char *recon_above[3];
unsigned char *recon_left[3];
int recon_left_stride[2];
unsigned char *recon_above[3];
unsigned char *recon_left[3];
int recon_left_stride[2];
/* Y,U,V,Y2 */
ENTROPY_CONTEXT_PLANES *above_context;
ENTROPY_CONTEXT_PLANES *left_context;
/* Y,U,V,Y2 */
ENTROPY_CONTEXT_PLANES *above_context;
ENTROPY_CONTEXT_PLANES *left_context;
/* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
unsigned char segmentation_enabled;
/* 0 indicates segmentation at MB level is not enabled. Otherwise the
* individual bits indicate which features are active. */
unsigned char segmentation_enabled;
/* 0 (do not update) 1 (update) the macroblock segmentation map. */
unsigned char update_mb_segmentation_map;
/* 0 (do not update) 1 (update) the macroblock segmentation map. */
unsigned char update_mb_segmentation_map;
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
unsigned char update_mb_segmentation_data;
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
unsigned char update_mb_segmentation_data;
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
unsigned char mb_segement_abs_delta;
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
unsigned char mb_segement_abs_delta;
/* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
/* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */
/* Per frame flags that define which MB level features (such as quantizer or
* loop filter level) */
/* are enabled and when enabled the proabilities used to decode the per MB
* flags in MB_MODE_INFO */
/* Probability Tree used to code Segment number */
vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];
/* Segment parameters */
signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];
signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */
/* mode_based Loop filter adjustment */
unsigned char mode_ref_lf_delta_enabled;
unsigned char mode_ref_lf_delta_update;
/* mode_based Loop filter adjustment */
unsigned char mode_ref_lf_delta_enabled;
unsigned char mode_ref_lf_delta_update;
/* Delta values have the range +/- MAX_LOOP_FILTER */
signed char
last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
/* 0 = BPRED, ZERO_MV, MV, SPLIT */
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
signed char
mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
/* Delta values have the range +/- MAX_LOOP_FILTER */
signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
/* Distance of MB away from frame edges */
int mb_to_left_edge;
int mb_to_right_edge;
int mb_to_top_edge;
int mb_to_bottom_edge;
/* Distance of MB away from frame edges */
int mb_to_left_edge;
int mb_to_right_edge;
int mb_to_top_edge;
int mb_to_bottom_edge;
vp8_subpix_fn_t subpixel_predict;
vp8_subpix_fn_t subpixel_predict8x4;
vp8_subpix_fn_t subpixel_predict8x8;
vp8_subpix_fn_t subpixel_predict16x16;
void *current_bc;
vp8_subpix_fn_t subpixel_predict;
vp8_subpix_fn_t subpixel_predict8x4;
vp8_subpix_fn_t subpixel_predict8x8;
vp8_subpix_fn_t subpixel_predict16x16;
void *current_bc;
int corrupted;
int corrupted;
#if ARCH_X86 || ARCH_X86_64
/* This is an intermediate buffer currently used in sub-pixel motion search
* to keep a copy of the reference area. This buffer can be used for other
* purpose.
*/
DECLARE_ALIGNED(32, unsigned char, y_buf[22*32]);
/* This is an intermediate buffer currently used in sub-pixel motion search
* to keep a copy of the reference area. This buffer can be used for other
* purpose.
*/
DECLARE_ALIGNED(32, unsigned char, y_buf[22 * 32]);
#endif
} MACROBLOCKD;
extern void vp8_build_block_doffsets(MACROBLOCKD *x);
extern void vp8_setup_block_dptrs(MACROBLOCKD *x);

View File

@ -18,177 +18,177 @@ extern "C" {
/* Update probabilities for the nodes in the token entropy tree.
Generated file included by entropy.c */
const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] =
{
{
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
{249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
{234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, },
{250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
},
{
{
{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, },
{234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, },
},
{
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
},
{
{
{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, },
{234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, },
{251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, },
},
{
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
},
{
{
{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, },
{248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, },
},
{
{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
{248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
{
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
},
},
};
const vp8_prob vp8_coef_update_probs
[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES] = {
{
{
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 },
{ 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 },
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
},
{
{
{ 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 },
{ 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 },
},
{
{ 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
},
{
{
{ 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 },
{ 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 },
},
{
{ 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
},
{
{
{ 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 },
{ 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
{
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
},
},
};
#ifdef __cplusplus
} // extern "C"

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_COMMON_H_
#define VP8_COMMON_COMMON_H_
@ -24,22 +23,23 @@ extern "C" {
/* Only need this for fixed-size arrays, for structs just assign. */
#define vp8_copy( Dest, Src) { \
assert( sizeof( Dest) == sizeof( Src)); \
memcpy( Dest, Src, sizeof( Src)); \
}
#define vp8_copy(Dest, Src) \
{ \
assert(sizeof(Dest) == sizeof(Src)); \
memcpy(Dest, Src, sizeof(Src)); \
}
/* Use this for variably-sized arrays. */
#define vp8_copy_array( Dest, Src, N) { \
assert( sizeof( *Dest) == sizeof( *Src)); \
memcpy( Dest, Src, N * sizeof( *Src)); \
}
#define vp8_copy_array(Dest, Src, N) \
{ \
assert(sizeof(*Dest) == sizeof(*Src)); \
memcpy(Dest, Src, N * sizeof(*Src)); \
}
#define vp8_zero( Dest) memset( &Dest, 0, sizeof( Dest));
#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *Dest));
#define vp8_zero(Dest) memset(&Dest, 0, sizeof(Dest));
#define vp8_zero_array(Dest, N) memset(Dest, 0, N * sizeof(*Dest));
#ifdef __cplusplus
} // extern "C"

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "entropy.h"
/* *** GENERATED FILE: DO NOT EDIT *** */

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <string.h>
#include "./vp8_rtcd.h"
@ -16,17 +15,13 @@
/* Copy 2 macroblocks to a buffer */
void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride,
unsigned char *dst_ptr, int dst_stride,
int height)
{
int r;
unsigned char *dst_ptr, int dst_stride, int height) {
int r;
for (r = 0; r < height; r++)
{
memcpy(dst_ptr, src_ptr, 32);
for (r = 0; r < height; r++) {
memcpy(dst_ptr, src_ptr, 32);
src_ptr += src_stride;
dst_ptr += dst_stride;
}
src_ptr += src_stride;
dst_ptr += dst_stride;
}
}

View File

@ -8,148 +8,128 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdio.h>
#include "blockd.h"
void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols,
int frame) {
int mb_row;
int mb_col;
int mb_index = 0;
FILE *mvs = fopen("mvs.stt", "a");
void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int frame)
{
/* print out the macroblock Y modes */
mb_index = 0;
fprintf(mvs, "Mb Modes for Frame %d\n", frame);
int mb_row;
int mb_col;
int mb_index = 0;
FILE *mvs = fopen("mvs.stt", "a");
for (mb_row = 0; mb_row < rows; mb_row++) {
for (mb_col = 0; mb_col < cols; mb_col++) {
fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode);
/* print out the macroblock Y modes */
mb_index = 0;
fprintf(mvs, "Mb Modes for Frame %d\n", frame);
for (mb_row = 0; mb_row < rows; mb_row++)
{
for (mb_col = 0; mb_col < cols; mb_col++)
{
fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode);
mb_index++;
}
fprintf(mvs, "\n");
mb_index++;
mb_index++;
}
fprintf(mvs, "\n");
mb_index++;
}
mb_index = 0;
fprintf(mvs, "Mb mv ref for Frame %d\n", frame);
fprintf(mvs, "\n");
for (mb_row = 0; mb_row < rows; mb_row++)
{
for (mb_col = 0; mb_col < cols; mb_col++)
{
mb_index = 0;
fprintf(mvs, "Mb mv ref for Frame %d\n", frame);
fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame);
for (mb_row = 0; mb_row < rows; mb_row++) {
for (mb_col = 0; mb_col < cols; mb_col++) {
fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame);
mb_index++;
}
fprintf(mvs, "\n");
mb_index++;
mb_index++;
}
fprintf(mvs, "\n");
mb_index++;
}
/* print out the macroblock UV modes */
mb_index = 0;
fprintf(mvs, "UV Modes for Frame %d\n", frame);
fprintf(mvs, "\n");
for (mb_row = 0; mb_row < rows; mb_row++)
{
for (mb_col = 0; mb_col < cols; mb_col++)
{
/* print out the macroblock UV modes */
mb_index = 0;
fprintf(mvs, "UV Modes for Frame %d\n", frame);
fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode);
for (mb_row = 0; mb_row < rows; mb_row++) {
for (mb_col = 0; mb_col < cols; mb_col++) {
fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode);
mb_index++;
}
mb_index++;
fprintf(mvs, "\n");
mb_index++;
}
mb_index++;
fprintf(mvs, "\n");
}
/* print out the block modes */
fprintf(mvs, "Mbs for Frame %d\n", frame);
{
int b_row;
fprintf(mvs, "\n");
for (b_row = 0; b_row < 4 * rows; b_row++)
{
int b_col;
int bindex;
/* print out the block modes */
fprintf(mvs, "Mbs for Frame %d\n", frame);
{
int b_row;
for (b_col = 0; b_col < 4 * cols; b_col++)
{
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
bindex = (b_row & 3) * 4 + (b_col & 3);
for (b_row = 0; b_row < 4 * rows; b_row++) {
int b_col;
int bindex;
if (mi[mb_index].mbmi.mode == B_PRED)
fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode);
else
fprintf(mvs, "xx ");
for (b_col = 0; b_col < 4 * cols; b_col++) {
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
bindex = (b_row & 3) * 4 + (b_col & 3);
}
if (mi[mb_index].mbmi.mode == B_PRED)
fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode);
else
fprintf(mvs, "xx ");
}
fprintf(mvs, "\n");
}
fprintf(mvs, "\n");
}
fprintf(mvs, "\n");
}
fprintf(mvs, "\n");
/* print out the macroblock mvs */
mb_index = 0;
fprintf(mvs, "MVs for Frame %d\n", frame);
/* print out the macroblock mvs */
mb_index = 0;
fprintf(mvs, "MVs for Frame %d\n", frame);
for (mb_row = 0; mb_row < rows; mb_row++)
{
for (mb_col = 0; mb_col < cols; mb_col++)
{
fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv.as_mv.row / 2, mi[mb_index].mbmi.mv.as_mv.col / 2);
for (mb_row = 0; mb_row < rows; mb_row++) {
for (mb_col = 0; mb_col < cols; mb_col++) {
fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv.as_mv.row / 2,
mi[mb_index].mbmi.mv.as_mv.col / 2);
mb_index++;
}
mb_index++;
fprintf(mvs, "\n");
mb_index++;
}
mb_index++;
fprintf(mvs, "\n");
}
fprintf(mvs, "\n");
/* print out the block modes */
fprintf(mvs, "MVs for Frame %d\n", frame);
{
int b_row;
/* print out the block modes */
fprintf(mvs, "MVs for Frame %d\n", frame);
{
int b_row;
for (b_row = 0; b_row < 4 * rows; b_row++)
{
int b_col;
int bindex;
for (b_row = 0; b_row < 4 * rows; b_row++) {
int b_col;
int bindex;
for (b_col = 0; b_col < 4 * cols; b_col++)
{
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
bindex = (b_row & 3) * 4 + (b_col & 3);
fprintf(mvs, "%3d:%-3d ", mi[mb_index].bmi[bindex].mv.as_mv.row, mi[mb_index].bmi[bindex].mv.as_mv.col);
for (b_col = 0; b_col < 4 * cols; b_col++) {
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
bindex = (b_row & 3) * 4 + (b_col & 3);
fprintf(mvs, "%3d:%-3d ", mi[mb_index].bmi[bindex].mv.as_mv.row,
mi[mb_index].bmi[bindex].mv.as_mv.col);
}
}
fprintf(mvs, "\n");
}
fprintf(mvs, "\n");
}
fprintf(mvs, "\n");
}
fprintf(mvs, "\n");
fclose(mvs);
fclose(mvs);
}

View File

@ -17,181 +17,141 @@ extern "C" {
/*Generated file, included by entropy.c*/
static const vp8_prob default_coef_probs [BLOCK_TYPES]
[COEF_BANDS]
[PREV_COEF_CONTEXTS]
[ENTROPY_NODES] =
{
{ /* Block Type ( 0 ) */
static const vp8_prob default_coef_probs
[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES] = {
{ /* Block Type ( 0 ) */
{ /* Coeff Band ( 0 )*/
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
},
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } },
{ /* Coeff Band ( 1 )*/
{ 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
{ 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
{ 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
},
{ 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
{ 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
{ 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 } },
{ /* Coeff Band ( 2 )*/
{ 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
{ 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
{ 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }
},
{ 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
{ 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
{ 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 } },
{ /* Coeff Band ( 3 )*/
{ 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
{ 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
{ 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }
},
{ 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
{ 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
{ 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 } },
{ /* Coeff Band ( 4 )*/
{ 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
{ 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
{ 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
},
{ 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
{ 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
{ 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 } },
{ /* Coeff Band ( 5 )*/
{ 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
{ 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
{ 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
},
{ 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
{ 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
{ 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 } },
{ /* Coeff Band ( 6 )*/
{ 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
{ 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
{ 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
},
{ 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
{ 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
{ 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 } },
{ /* Coeff Band ( 7 )*/
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
}
},
{ /* Block Type ( 1 ) */
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } } },
{ /* Block Type ( 1 ) */
{ /* Coeff Band ( 0 )*/
{ 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
{ 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
{ 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
},
{ 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
{ 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
{ 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 } },
{ /* Coeff Band ( 1 )*/
{ 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
{ 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
{ 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
},
{ 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
{ 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
{ 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 } },
{ /* Coeff Band ( 2 )*/
{ 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
{ 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
{ 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
},
{ 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
{ 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
{ 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 } },
{ /* Coeff Band ( 3 )*/
{ 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
{ 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
{ 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
},
{ 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
{ 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
{ 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 } },
{ /* Coeff Band ( 4 )*/
{ 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
{ 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
{ 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
},
{ 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
{ 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
{ 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 } },
{ /* Coeff Band ( 5 )*/
{ 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
{ 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
{ 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
},
{ 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
{ 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
{ 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 } },
{ /* Coeff Band ( 6 )*/
{ 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
{ 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
{ 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
},
{ 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
{ 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
{ 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 } },
{ /* Coeff Band ( 7 )*/
{ 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
{ 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
}
},
{ /* Block Type ( 2 ) */
{ 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
{ 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 } } },
{ /* Block Type ( 2 ) */
{ /* Coeff Band ( 0 )*/
{ 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
{ 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
{ 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
},
{ 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
{ 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
{ 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 } },
{ /* Coeff Band ( 1 )*/
{ 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
{ 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
{ 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
},
{ 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
{ 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
{ 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 } },
{ /* Coeff Band ( 2 )*/
{ 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
{ 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
{ 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
},
{ 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
{ 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
{ 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 } },
{ /* Coeff Band ( 3 )*/
{ 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
{ 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
},
{ 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
{ 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 } },
{ /* Coeff Band ( 4 )*/
{ 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
{ 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
},
{ 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
{ 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } },
{ /* Coeff Band ( 5 )*/
{ 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
},
{ 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 } },
{ /* Coeff Band ( 6 )*/
{ 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
},
{ 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 } },
{ /* Coeff Band ( 7 )*/
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
}
},
{ /* Block Type ( 3 ) */
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } } },
{ /* Block Type ( 3 ) */
{ /* Coeff Band ( 0 )*/
{ 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
{ 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
{ 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
},
{ 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
{ 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
{ 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 } },
{ /* Coeff Band ( 1 )*/
{ 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
{ 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
{ 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
},
{ 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
{ 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
{ 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 } },
{ /* Coeff Band ( 2 )*/
{ 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
{ 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
{ 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
},
{ 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
{ 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
{ 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 } },
{ /* Coeff Band ( 3 )*/
{ 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
{ 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
{ 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
},
{ 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
{ 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
{ 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 } },
{ /* Coeff Band ( 4 )*/
{ 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
{ 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
{ 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
},
{ 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
{ 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
{ 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 } },
{ /* Coeff Band ( 5 )*/
{ 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
{ 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
{ 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
},
{ 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
{ 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
{ 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 } },
{ /* Coeff Band ( 6 )*/
{ 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
{ 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
{ 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
},
{ 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
{ 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
{ 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 } },
{ /* Coeff Band ( 7 )*/
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
}
}
};
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } } }
};
#ifdef __cplusplus
} // extern "C"

View File

@ -8,36 +8,30 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vp8/common/blockd.h"
#include "vpx_mem/vpx_mem.h"
void vp8_dequantize_b_c(BLOCKD *d, short *DQC)
{
int i;
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
void vp8_dequantize_b_c(BLOCKD *d, short *DQC) {
int i;
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
for (i = 0; i < 16; i++)
{
DQ[i] = Q[i] * DQC[i];
}
for (i = 0; i < 16; i++) {
DQ[i] = Q[i] * DQC[i];
}
}
void vp8_dequant_idct_add_c(short *input, short *dq,
unsigned char *dest, int stride)
{
int i;
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *dest,
int stride) {
int i;
for (i = 0; i < 16; i++)
{
input[i] = dq[i] * input[i];
}
for (i = 0; i < 16; i++) {
input[i] = dq[i] * input[i];
}
vp8_short_idct4x4llm_c(input, dest, stride, dest, stride);
memset(input, 0, 32);
vp8_short_idct4x4llm_c(input, dest, stride, dest, stride);
memset(input, 0, 32);
}

View File

@ -15,47 +15,34 @@
#include "coefupdateprobs.h"
DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) =
{
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) = {
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]) =
{ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7};
DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]) = {
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7
};
DECLARE_ALIGNED(16, const unsigned char,
vp8_prev_token_class[MAX_ENTROPY_TOKENS]) =
{ 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0};
DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
{
0, 1, 4, 8,
5, 2, 3, 6,
9, 12, 13, 10,
7, 11, 14, 15,
vp8_prev_token_class[MAX_ENTROPY_TOKENS]) = {
0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0
};
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
{
1, 2, 6, 7,
3, 5, 8, 13,
4, 9, 12, 14,
10, 11, 15, 16
DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) = {
0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15,
};
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) = {
1, 2, 6, 7, 3, 5, 8, 13, 4, 9, 12, 14, 10, 11, 15, 16
};
/* vp8_default_zig_zag_mask generated with:
@ -71,87 +58,69 @@ DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
}
*/
DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]) =
{
1, 2, 32, 64,
4, 16, 128, 4096,
8, 256, 2048, 8192,
512, 1024, 16384, -32768
DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]) = {
1, 2, 32, 64, 4, 16, 128, 4096, 8, 256, 2048, 8192, 512, 1024, 16384, -32768
};
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = { 7, 6 };
/* Array indices are identical to previously-existing CONTEXT_NODE indices */
const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
{
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
-ZERO_TOKEN, 4, /* 1 = ZERO */
-ONE_TOKEN, 6, /* 2 = ONE */
8, 12, /* 3 = LOW_VAL */
-TWO_TOKEN, 10, /* 4 = TWO */
-THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */
14, 16, /* 6 = HIGH_LOW */
-DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */
18, 20, /* 8 = CAT_THREEFOUR */
-DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */
-DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
/* corresponding _CONTEXT_NODEs */
/* clang-format off */
const vp8_tree_index vp8_coef_tree[22] = {
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
-ZERO_TOKEN, 4, /* 1 = ZERO */
-ONE_TOKEN, 6, /* 2 = ONE */
8, 12, /* 3 = LOW_VAL */
-TWO_TOKEN, 10, /* 4 = TWO */
-THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */
14, 16, /* 6 = HIGH_LOW */
-DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */
18, 20, /* 8 = CAT_THREEFOUR */
-DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */
-DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
};
/* clang-format on */
/* vp8_coef_encodings generated with:
vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree);
*/
vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] =
{
{2, 2},
{6, 3},
{28, 5},
{58, 6},
{59, 6},
{60, 6},
{61, 6},
{124, 7},
{125, 7},
{126, 7},
{127, 7},
{0, 1}
vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] = {
{ 2, 2 }, { 6, 3 }, { 28, 5 }, { 58, 6 }, { 59, 6 }, { 60, 6 },
{ 61, 6 }, { 124, 7 }, { 125, 7 }, { 126, 7 }, { 127, 7 }, { 0, 1 }
};
/* Trees for extra bits. Probabilities are constant and
do not depend on previously encoded bits */
static const vp8_prob Pcat1[] = { 159};
static const vp8_prob Pcat2[] = { 165, 145};
static const vp8_prob Pcat3[] = { 173, 148, 140};
static const vp8_prob Pcat4[] = { 176, 155, 140, 135};
static const vp8_prob Pcat5[] = { 180, 157, 141, 134, 130};
static const vp8_prob Pcat6[] =
{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129};
static const vp8_prob Pcat1[] = { 159 };
static const vp8_prob Pcat2[] = { 165, 145 };
static const vp8_prob Pcat3[] = { 173, 148, 140 };
static const vp8_prob Pcat4[] = { 176, 155, 140, 135 };
static const vp8_prob Pcat5[] = { 180, 157, 141, 134, 130 };
static const vp8_prob Pcat6[] = { 254, 254, 243, 230, 196, 177,
153, 140, 133, 130, 129 };
/* tree index tables generated with:
void init_bit_tree(vp8_tree_index *p, int n)
{
int i = 0;
void init_bit_tree(vp8_tree_index *p, int n) {
int i = 0;
while (++i < n)
{
p[0] = p[1] = i << 1;
p += 2;
}
while (++i < n) {
p[0] = p[1] = i << 1;
p += 2;
}
p[0] = p[1] = 0;
p[0] = p[1] = 0;
}
void init_bit_trees()
{
init_bit_tree(cat1, 1);
init_bit_tree(cat2, 2);
init_bit_tree(cat3, 3);
init_bit_tree(cat4, 4);
init_bit_tree(cat5, 5);
init_bit_tree(cat6, 11);
void init_bit_trees() {
init_bit_tree(cat1, 1);
init_bit_tree(cat2, 2);
init_bit_tree(cat3, 3);
init_bit_tree(cat4, 4);
init_bit_tree(cat5, 5);
init_bit_tree(cat6, 11);
}
*/
@ -160,29 +129,19 @@ static const vp8_tree_index cat2[4] = { 2, 2, 0, 0 };
static const vp8_tree_index cat3[6] = { 2, 2, 4, 4, 0, 0 };
static const vp8_tree_index cat4[8] = { 2, 2, 4, 4, 6, 6, 0, 0 };
static const vp8_tree_index cat5[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 };
static const vp8_tree_index cat6[22] = { 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12,
14, 14, 16, 16, 18, 18, 20, 20, 0, 0 };
static const vp8_tree_index cat6[22] = {
2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, 16, 16, 18, 18, 20, 20, 0, 0
};
const vp8_extra_bit_struct vp8_extra_bits[12] =
{
{ 0, 0, 0, 0},
{ 0, 0, 0, 1},
{ 0, 0, 0, 2},
{ 0, 0, 0, 3},
{ 0, 0, 0, 4},
{ cat1, Pcat1, 1, 5},
{ cat2, Pcat2, 2, 7},
{ cat3, Pcat3, 3, 11},
{ cat4, Pcat4, 4, 19},
{ cat5, Pcat5, 5, 35},
{ cat6, Pcat6, 11, 67},
{ 0, 0, 0, 0}
const vp8_extra_bit_struct vp8_extra_bits[12] = {
{ 0, 0, 0, 0 }, { 0, 0, 0, 1 }, { 0, 0, 0, 2 },
{ 0, 0, 0, 3 }, { 0, 0, 0, 4 }, { cat1, Pcat1, 1, 5 },
{ cat2, Pcat2, 2, 7 }, { cat3, Pcat3, 3, 11 }, { cat4, Pcat4, 4, 19 },
{ cat5, Pcat5, 5, 35 }, { cat6, Pcat6, 11, 67 }, { 0, 0, 0, 0 }
};
#include "default_coef_probs.h"
void vp8_default_coef_probs(VP8_COMMON *pc)
{
memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs));
void vp8_default_coef_probs(VP8_COMMON *pc) {
memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs));
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ENTROPY_H_
#define VP8_COMMON_ENTROPY_H_
@ -21,18 +20,18 @@ extern "C" {
/* Coefficient token alphabet */
#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */
#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */
#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */
#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */
#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */
#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */
#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
#define MAX_ENTROPY_TOKENS 12
#define ENTROPY_NODES 11
@ -41,21 +40,20 @@ extern const vp8_tree_index vp8_coef_tree[];
extern const struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS];
typedef struct
{
vp8_tree_p tree;
const vp8_prob *prob;
int Len;
int base_val;
typedef struct {
vp8_tree_p tree;
const vp8_prob *prob;
int Len;
int base_val;
} vp8_extra_bit_struct;
extern const vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */
extern const vp8_extra_bit_struct
vp8_extra_bits[12]; /* indexed by token value */
#define PROB_UPDATE_BASELINE_COST 7
#define MAX_PROB 255
#define DCT_MAX_VALUE 2048
#define PROB_UPDATE_BASELINE_COST 7
#define MAX_PROB 255
#define DCT_MAX_VALUE 2048
/* Coefficients are predicted via a 3-dimensional probability table. */
@ -86,12 +84,13 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
distinct bands). */
/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
# define PREV_COEF_CONTEXTS 3
#define PREV_COEF_CONTEXTS 3
extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]);
extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
extern DECLARE_ALIGNED(16, const unsigned char,
vp8_prev_token_class[MAX_ENTROPY_TOKENS]);
extern const vp8_prob vp8_coef_update_probs[BLOCK_TYPES][COEF_BANDS]
[PREV_COEF_CONTEXTS][ENTROPY_NODES];
struct VP8Common;
void vp8_default_coef_probs(struct VP8Common *);

View File

@ -16,156 +16,97 @@
#include "vp8_entropymodedata.h"
int vp8_mv_cont(const int_mv *l, const int_mv *a)
{
int lez = (l->as_int == 0);
int aez = (a->as_int == 0);
int lea = (l->as_int == a->as_int);
int vp8_mv_cont(const int_mv *l, const int_mv *a) {
int lez = (l->as_int == 0);
int aez = (a->as_int == 0);
int lea = (l->as_int == a->as_int);
if (lea && lez)
return SUBMVREF_LEFT_ABOVE_ZED;
if (lea && lez) return SUBMVREF_LEFT_ABOVE_ZED;
if (lea)
return SUBMVREF_LEFT_ABOVE_SAME;
if (lea) return SUBMVREF_LEFT_ABOVE_SAME;
if (aez)
return SUBMVREF_ABOVE_ZED;
if (aez) return SUBMVREF_ABOVE_ZED;
if (lez)
return SUBMVREF_LEFT_ZED;
if (lez) return SUBMVREF_LEFT_ZED;
return SUBMVREF_NORMAL;
return SUBMVREF_NORMAL;
}
static const vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1] = { 180, 162, 25};
static const vp8_prob sub_mv_ref_prob[VP8_SUBMVREFS - 1] = { 180, 162, 25 };
const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1] =
{
{ 147, 136, 18 },
{ 106, 145, 1 },
{ 179, 121, 1 },
{ 223, 1 , 34 },
{ 208, 1 , 1 }
const vp8_prob vp8_sub_mv_ref_prob2[SUBMVREF_COUNT][VP8_SUBMVREFS - 1] = {
{ 147, 136, 18 },
{ 106, 145, 1 },
{ 179, 121, 1 },
{ 223, 1, 34 },
{ 208, 1, 1 }
};
const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS] =
{
{
0, 0, 0, 0,
0, 0, 0, 0,
1, 1, 1, 1,
1, 1, 1, 1,
},
{
0, 0, 1, 1,
0, 0, 1, 1,
0, 0, 1, 1,
0, 0, 1, 1,
},
{
0, 0, 1, 1,
0, 0, 1, 1,
2, 2, 3, 3,
2, 2, 3, 3,
},
{
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, 14, 15,
}
const vp8_mbsplit vp8_mbsplits[VP8_NUMMBSPLITS] = {
{
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
},
{
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
},
{
0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3,
},
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
}
};
const int vp8_mbsplit_count [VP8_NUMMBSPLITS] = { 2, 2, 4, 16};
const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1] = { 110, 111, 150};
const int vp8_mbsplit_count[VP8_NUMMBSPLITS] = { 2, 2, 4, 16 };
const vp8_prob vp8_mbsplit_probs[VP8_NUMMBSPLITS - 1] = { 110, 111, 150 };
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
const vp8_tree_index vp8_bmode_tree[18] = /* INTRAMODECONTEXTNODE value */
{
-B_DC_PRED, 2, /* 0 = DC_NODE */
-B_TM_PRED, 4, /* 1 = TM_NODE */
-B_VE_PRED, 6, /* 2 = VE_NODE */
8, 12, /* 3 = COM_NODE */
-B_HE_PRED, 10, /* 4 = HE_NODE */
-B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */
-B_LD_PRED, 14, /* 6 = LD_NODE */
-B_VL_PRED, 16, /* 7 = VL_NODE */
-B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */
};
const vp8_tree_index vp8_bmode_tree[18] = /* INTRAMODECONTEXTNODE value */
{
-B_DC_PRED, 2, /* 0 = DC_NODE */
-B_TM_PRED, 4, /* 1 = TM_NODE */
-B_VE_PRED, 6, /* 2 = VE_NODE */
8, 12, /* 3 = COM_NODE */
-B_HE_PRED, 10, /* 4 = HE_NODE */
-B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */
-B_LD_PRED, 14, /* 6 = LD_NODE */
-B_VL_PRED, 16, /* 7 = VL_NODE */
-B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */
};
/* Again, these trees use the same probability indices as their
explicitly-programmed predecessors. */
const vp8_tree_index vp8_ymode_tree[8] =
{
-DC_PRED, 2,
4, 6,
-V_PRED, -H_PRED,
-TM_PRED, -B_PRED
const vp8_tree_index vp8_ymode_tree[8] = {
-DC_PRED, 2, 4, 6, -V_PRED, -H_PRED, -TM_PRED, -B_PRED
};
const vp8_tree_index vp8_kf_ymode_tree[8] =
{
-B_PRED, 2,
4, 6,
-DC_PRED, -V_PRED,
-H_PRED, -TM_PRED
const vp8_tree_index vp8_kf_ymode_tree[8] = {
-B_PRED, 2, 4, 6, -DC_PRED, -V_PRED, -H_PRED, -TM_PRED
};
const vp8_tree_index vp8_uv_mode_tree[6] =
{
-DC_PRED, 2,
-V_PRED, 4,
-H_PRED, -TM_PRED
};
const vp8_tree_index vp8_uv_mode_tree[6] = { -DC_PRED, 2, -V_PRED,
4, -H_PRED, -TM_PRED };
const vp8_tree_index vp8_mbsplit_tree[6] =
{
-3, 2,
-2, 4,
-0, -1
};
const vp8_tree_index vp8_mbsplit_tree[6] = { -3, 2, -2, 4, -0, -1 };
const vp8_tree_index vp8_mv_ref_tree[8] =
{
-ZEROMV, 2,
-NEARESTMV, 4,
-NEARMV, 6,
-NEWMV, -SPLITMV
};
const vp8_tree_index vp8_mv_ref_tree[8] = { -ZEROMV, 2, -NEARESTMV, 4,
-NEARMV, 6, -NEWMV, -SPLITMV };
const vp8_tree_index vp8_sub_mv_ref_tree[6] =
{
-LEFT4X4, 2,
-ABOVE4X4, 4,
-ZERO4X4, -NEW4X4
};
const vp8_tree_index vp8_sub_mv_ref_tree[6] = { -LEFT4X4, 2, -ABOVE4X4,
4, -ZERO4X4, -NEW4X4 };
const vp8_tree_index vp8_small_mvtree [14] =
{
2, 8,
4, 6,
-0, -1,
-2, -3,
10, 12,
-4, -5,
-6, -7
};
const vp8_tree_index vp8_small_mvtree[14] = { 2, 8, 4, 6, -0, -1, -2,
-3, 10, 12, -4, -5, -6, -7 };
void vp8_init_mbmode_probs(VP8_COMMON *x)
{
memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
void vp8_init_mbmode_probs(VP8_COMMON *x) {
memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
}
void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1])
{
memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
void vp8_default_bmode_probs(vp8_prob p[VP8_BINTRAMODES - 1]) {
memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ENTROPYMODE_H_
#define VP8_COMMON_ENTROPYMODE_H_
@ -19,42 +18,40 @@
extern "C" {
#endif
typedef enum
{
SUBMVREF_NORMAL,
SUBMVREF_LEFT_ZED,
SUBMVREF_ABOVE_ZED,
SUBMVREF_LEFT_ABOVE_SAME,
SUBMVREF_LEFT_ABOVE_ZED
typedef enum {
SUBMVREF_NORMAL,
SUBMVREF_LEFT_ZED,
SUBMVREF_ABOVE_ZED,
SUBMVREF_LEFT_ABOVE_SAME,
SUBMVREF_LEFT_ABOVE_ZED
} sumvfref_t;
typedef int vp8_mbsplit[16];
#define VP8_NUMMBSPLITS 4
extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS];
extern const vp8_mbsplit vp8_mbsplits[VP8_NUMMBSPLITS];
extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */
extern const int vp8_mbsplit_count[VP8_NUMMBSPLITS]; /* # of subsets */
extern const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1];
extern const vp8_prob vp8_mbsplit_probs[VP8_NUMMBSPLITS - 1];
extern int vp8_mv_cont(const int_mv *l, const int_mv *a);
#define SUBMVREF_COUNT 5
extern const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1];
extern const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES];
extern const vp8_prob vp8_sub_mv_ref_prob2[SUBMVREF_COUNT][VP8_SUBMVREFS - 1];
extern const unsigned int vp8_kf_default_bmode_counts
[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES];
extern const vp8_tree_index vp8_bmode_tree[];
extern const vp8_tree_index vp8_ymode_tree[];
extern const vp8_tree_index vp8_kf_ymode_tree[];
extern const vp8_tree_index vp8_uv_mode_tree[];
extern const vp8_tree_index vp8_ymode_tree[];
extern const vp8_tree_index vp8_kf_ymode_tree[];
extern const vp8_tree_index vp8_uv_mode_tree[];
extern const vp8_tree_index vp8_mbsplit_tree[];
extern const vp8_tree_index vp8_mv_ref_tree[];
extern const vp8_tree_index vp8_sub_mv_ref_tree[];
extern const vp8_tree_index vp8_mbsplit_tree[];
extern const vp8_tree_index vp8_mv_ref_tree[];
extern const vp8_tree_index vp8_sub_mv_ref_tree[];
extern const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES];
extern const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES];
@ -65,21 +62,23 @@ extern const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS];
/* Inter mode values do not start at zero */
extern const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS];
extern const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS];
extern const struct vp8_token_struct
vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS];
extern const vp8_tree_index vp8_small_mvtree[];
extern const struct vp8_token_struct vp8_small_mvencodings[8];
/* Key frame default mode probs */
extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES]
[VP8_BINTRAMODES-1];
extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1];
extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1];
extern const vp8_prob
vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES - 1];
extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES - 1];
extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES - 1];
void vp8_init_mbmode_probs(VP8_COMMON *x);
void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]);
void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]);
void vp8_default_bmode_probs(vp8_prob dest[VP8_BINTRAMODES - 1]);
void vp8_kf_default_bmode_probs(
vp8_prob dest[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES - 1]);
#ifdef __cplusplus
} // extern "C"

View File

@ -8,42 +8,40 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "entropymv.h"
const MV_CONTEXT vp8_mv_update_probs[2] =
{
{{
237,
246,
253, 253, 254, 254, 254, 254, 254,
254, 254, 254, 254, 254, 250, 250, 252, 254, 254
}},
{{
231,
243,
245, 253, 254, 254, 254, 254, 254,
254, 254, 254, 254, 254, 251, 251, 254, 254, 254
}}
/* clang-format off */
const MV_CONTEXT vp8_mv_update_probs[2] = {
{ {
237,
246,
253, 253, 254, 254, 254, 254, 254,
254, 254, 254, 254, 254, 250, 250, 252, 254, 254
} },
{ {
231,
243,
245, 253, 254, 254, 254, 254, 254,
254, 254, 254, 254, 254, 251, 251, 254, 254, 254
} }
};
const MV_CONTEXT vp8_default_mv_context[2] =
{
{{
/* row */
162, /* is short */
128, /* sign */
225, 146, 172, 147, 214, 39, 156, /* short tree */
128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */
}},
/* clang-format on */
const MV_CONTEXT vp8_default_mv_context[2] = {
{ {
/* row */
162, /* is short */
128, /* sign */
225, 146, 172, 147, 214, 39, 156, /* short tree */
128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */
} },
{ {
/* same for column */
164, /* is short */
128, /**/
204, 170, 119, 235, 140, 230, 228, /**/
128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */
{{
/* same for column */
164, /* is short */
128,
204, 170, 119, 235, 140, 230, 228,
128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */
}}
} }
};

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ENTROPYMV_H_
#define VP8_COMMON_ENTROPYMV_H_
@ -18,29 +17,27 @@
extern "C" {
#endif
enum
{
mv_max = 1023, /* max absolute value of a MV component */
MVvals = (2 * mv_max) + 1, /* # possible values "" */
mvfp_max = 255, /* max absolute value of a full pixel MV component */
MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */
enum {
mv_max = 1023, /* max absolute value of a MV component */
MVvals = (2 * mv_max) + 1, /* # possible values "" */
mvfp_max = 255, /* max absolute value of a full pixel MV component */
MVfpvals = (2 * mvfp_max) + 1, /* # possible full pixel MV values */
mvlong_width = 10, /* Large MVs have 9 bit magnitudes */
mvnum_short = 8, /* magnitudes 0 through 7 */
mvlong_width = 10, /* Large MVs have 9 bit magnitudes */
mvnum_short = 8, /* magnitudes 0 through 7 */
/* probability offsets for coding each MV component */
/* probability offsets for coding each MV component */
mvpis_short = 0, /* short (<= 7) vs long (>= 8) */
MVPsign, /* sign for non-zero */
MVPshort, /* 8 short values = 7-position tree */
mvpis_short = 0, /* short (<= 7) vs long (>= 8) */
MVPsign, /* sign for non-zero */
MVPshort, /* 8 short values = 7-position tree */
MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */
MVPcount = MVPbits + mvlong_width /* (with independent probabilities) */
MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */
MVPcount = MVPbits + mvlong_width /* (with independent probabilities) */
};
typedef struct mv_context
{
vp8_prob prob[MVPcount]; /* often come in row, col pairs */
typedef struct mv_context {
vp8_prob prob[MVPcount]; /* often come in row, col pairs */
} MV_CONTEXT;
extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2];

View File

@ -8,181 +8,146 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "extend.h"
#include "vpx_mem/vpx_mem.h"
static void copy_and_extend_plane(unsigned char *s, /* source */
int sp, /* source pitch */
unsigned char *d, /* destination */
int dp, /* destination pitch */
int h, /* height */
int w, /* width */
int et, /* extend top border */
int el, /* extend left border */
int eb, /* extend bottom border */
int er /* extend right border */
) {
int i;
unsigned char *src_ptr1, *src_ptr2;
unsigned char *dest_ptr1, *dest_ptr2;
int linesize;
static void copy_and_extend_plane
(
unsigned char *s, /* source */
int sp, /* source pitch */
unsigned char *d, /* destination */
int dp, /* destination pitch */
int h, /* height */
int w, /* width */
int et, /* extend top border */
int el, /* extend left border */
int eb, /* extend bottom border */
int er /* extend right border */
)
{
int i;
unsigned char *src_ptr1, *src_ptr2;
unsigned char *dest_ptr1, *dest_ptr2;
int linesize;
/* copy the left and right most columns out */
src_ptr1 = s;
src_ptr2 = s + w - 1;
dest_ptr1 = d - el;
dest_ptr2 = d + w;
/* copy the left and right most columns out */
src_ptr1 = s;
src_ptr2 = s + w - 1;
dest_ptr1 = d - el;
dest_ptr2 = d + w;
for (i = 0; i < h; i++) {
memset(dest_ptr1, src_ptr1[0], el);
memcpy(dest_ptr1 + el, src_ptr1, w);
memset(dest_ptr2, src_ptr2[0], er);
src_ptr1 += sp;
src_ptr2 += sp;
dest_ptr1 += dp;
dest_ptr2 += dp;
}
for (i = 0; i < h; i++)
{
memset(dest_ptr1, src_ptr1[0], el);
memcpy(dest_ptr1 + el, src_ptr1, w);
memset(dest_ptr2, src_ptr2[0], er);
src_ptr1 += sp;
src_ptr2 += sp;
dest_ptr1 += dp;
dest_ptr2 += dp;
}
/* Now copy the top and bottom lines into each line of the respective
* borders
*/
src_ptr1 = d - el;
src_ptr2 = d + dp * (h - 1) - el;
dest_ptr1 = d + dp * (-et) - el;
dest_ptr2 = d + dp * (h)-el;
linesize = el + er + w;
/* Now copy the top and bottom lines into each line of the respective
* borders
*/
src_ptr1 = d - el;
src_ptr2 = d + dp * (h - 1) - el;
dest_ptr1 = d + dp * (-et) - el;
dest_ptr2 = d + dp * (h) - el;
linesize = el + er + w;
for (i = 0; i < et; i++) {
memcpy(dest_ptr1, src_ptr1, linesize);
dest_ptr1 += dp;
}
for (i = 0; i < et; i++)
{
memcpy(dest_ptr1, src_ptr1, linesize);
dest_ptr1 += dp;
}
for (i = 0; i < eb; i++)
{
memcpy(dest_ptr2, src_ptr2, linesize);
dest_ptr2 += dp;
}
for (i = 0; i < eb; i++) {
memcpy(dest_ptr2, src_ptr2, linesize);
dest_ptr2 += dp;
}
}
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst)
{
int et = dst->border;
int el = dst->border;
int eb = dst->border + dst->y_height - src->y_height;
int er = dst->border + dst->y_width - src->y_width;
YV12_BUFFER_CONFIG *dst) {
int et = dst->border;
int el = dst->border;
int eb = dst->border + dst->y_height - src->y_height;
int er = dst->border + dst->y_width - src->y_width;
copy_and_extend_plane(src->y_buffer, src->y_stride,
dst->y_buffer, dst->y_stride,
src->y_height, src->y_width,
et, el, eb, er);
copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer,
dst->y_stride, src->y_height, src->y_width, et, el, eb,
er);
et = dst->border >> 1;
el = dst->border >> 1;
eb = (dst->border >> 1) + dst->uv_height - src->uv_height;
er = (dst->border >> 1) + dst->uv_width - src->uv_width;
et = dst->border >> 1;
el = dst->border >> 1;
eb = (dst->border >> 1) + dst->uv_height - src->uv_height;
er = (dst->border >> 1) + dst->uv_width - src->uv_width;
copy_and_extend_plane(src->u_buffer, src->uv_stride,
dst->u_buffer, dst->uv_stride,
src->uv_height, src->uv_width,
et, el, eb, er);
copy_and_extend_plane(src->u_buffer, src->uv_stride, dst->u_buffer,
dst->uv_stride, src->uv_height, src->uv_width, et, el,
eb, er);
copy_and_extend_plane(src->v_buffer, src->uv_stride,
dst->v_buffer, dst->uv_stride,
src->uv_height, src->uv_width,
et, el, eb, er);
copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer,
dst->uv_stride, src->uv_height, src->uv_width, et, el,
eb, er);
}
void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
int srcy, int srcx,
int srch, int srcw)
{
int et = dst->border;
int el = dst->border;
int eb = dst->border + dst->y_height - src->y_height;
int er = dst->border + dst->y_width - src->y_width;
int src_y_offset = srcy * src->y_stride + srcx;
int dst_y_offset = srcy * dst->y_stride + srcx;
int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
YV12_BUFFER_CONFIG *dst, int srcy,
int srcx, int srch, int srcw) {
int et = dst->border;
int el = dst->border;
int eb = dst->border + dst->y_height - src->y_height;
int er = dst->border + dst->y_width - src->y_width;
int src_y_offset = srcy * src->y_stride + srcx;
int dst_y_offset = srcy * dst->y_stride + srcx;
int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
/* If the side is not touching the bounder then don't extend. */
if (srcy)
et = 0;
if (srcx)
el = 0;
if (srcy + srch != src->y_height)
eb = 0;
if (srcx + srcw != src->y_width)
er = 0;
/* If the side is not touching the bounder then don't extend. */
if (srcy) et = 0;
if (srcx) el = 0;
if (srcy + srch != src->y_height) eb = 0;
if (srcx + srcw != src->y_width) er = 0;
copy_and_extend_plane(src->y_buffer + src_y_offset,
src->y_stride,
dst->y_buffer + dst_y_offset,
dst->y_stride,
srch, srcw,
et, el, eb, er);
copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride,
dst->y_buffer + dst_y_offset, dst->y_stride, srch, srcw,
et, el, eb, er);
et = (et + 1) >> 1;
el = (el + 1) >> 1;
eb = (eb + 1) >> 1;
er = (er + 1) >> 1;
srch = (srch + 1) >> 1;
srcw = (srcw + 1) >> 1;
et = (et + 1) >> 1;
el = (el + 1) >> 1;
eb = (eb + 1) >> 1;
er = (er + 1) >> 1;
srch = (srch + 1) >> 1;
srcw = (srcw + 1) >> 1;
copy_and_extend_plane(src->u_buffer + src_uv_offset,
src->uv_stride,
dst->u_buffer + dst_uv_offset,
dst->uv_stride,
srch, srcw,
et, el, eb, er);
copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride,
dst->u_buffer + dst_uv_offset, dst->uv_stride, srch,
srcw, et, el, eb, er);
copy_and_extend_plane(src->v_buffer + src_uv_offset,
src->uv_stride,
dst->v_buffer + dst_uv_offset,
dst->uv_stride,
srch, srcw,
et, el, eb, er);
copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride,
dst->v_buffer + dst_uv_offset, dst->uv_stride, srch,
srcw, et, el, eb, er);
}
/* note the extension is only for the last row, for intra prediction purpose */
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf,
unsigned char *YPtr,
unsigned char *UPtr,
unsigned char *VPtr)
{
int i;
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr,
unsigned char *UPtr, unsigned char *VPtr) {
int i;
YPtr += ybf->y_stride * 14;
UPtr += ybf->uv_stride * 6;
VPtr += ybf->uv_stride * 6;
YPtr += ybf->y_stride * 14;
UPtr += ybf->uv_stride * 6;
VPtr += ybf->uv_stride * 6;
for (i = 0; i < 4; i++)
{
YPtr[i] = YPtr[-1];
UPtr[i] = UPtr[-1];
VPtr[i] = VPtr[-1];
}
for (i = 0; i < 4; i++) {
YPtr[i] = YPtr[-1];
UPtr[i] = UPtr[-1];
VPtr[i] = VPtr[-1];
}
YPtr += ybf->y_stride;
UPtr += ybf->uv_stride;
VPtr += ybf->uv_stride;
YPtr += ybf->y_stride;
UPtr += ybf->uv_stride;
VPtr += ybf->uv_stride;
for (i = 0; i < 4; i++)
{
YPtr[i] = YPtr[-1];
UPtr[i] = UPtr[-1];
VPtr[i] = VPtr[-1];
}
for (i = 0; i < 4; i++) {
YPtr[i] = YPtr[-1];
UPtr[i] = UPtr[-1];
VPtr[i] = VPtr[-1];
}
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_EXTEND_H_
#define VP8_COMMON_EXTEND_H_
@ -18,13 +17,13 @@
extern "C" {
#endif
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr);
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr,
unsigned char *UPtr, unsigned char *VPtr);
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst);
void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
int srcy, int srcx,
int srch, int srcw);
YV12_BUFFER_CONFIG *dst, int srcy,
int srcx, int srch, int srcw);
#ifdef __cplusplus
} // extern "C"

View File

@ -8,243 +8,186 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "filter.h"
#include "./vp8_rtcd.h"
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
{
{ 128, 0 },
{ 112, 16 },
{ 96, 32 },
{ 80, 48 },
{ 64, 64 },
{ 48, 80 },
{ 32, 96 },
{ 16, 112 }
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = {
{ 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
{ 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 }
};
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
{
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = {
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
{ 0, -6, 123, 12, -1, 0 },
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
{ 0, -9, 93, 50, -6, 0 },
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
{ 0, -6, 50, 93, -9, 0 },
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
{ 0, -1, 12, 123, -6, 0 },
{ 0, 0, 128, 0, 0,
0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
{ 0, -6, 123, 12, -1, 0 },
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
{ 0, -9, 93, 50, -6, 0 },
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
{ 0, -6, 50, 93, -9, 0 },
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
{ 0, -1, 12, 123, -6, 0 },
};
static void filter_block2d_first_pass
(
unsigned char *src_ptr,
int *output_ptr,
unsigned int src_pixels_per_line,
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
const short *vp8_filter
)
{
unsigned int i, j;
int Temp;
static void filter_block2d_first_pass(unsigned char *src_ptr, int *output_ptr,
unsigned int src_pixels_per_line,
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
const short *vp8_filter) {
unsigned int i, j;
int Temp;
for (i = 0; i < output_height; i++)
{
for (j = 0; j < output_width; j++)
{
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[0] * vp8_filter[2]) +
((int)src_ptr[pixel_step] * vp8_filter[3]) +
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
for (i = 0; i < output_height; i++) {
for (j = 0; j < output_width; j++) {
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[0] * vp8_filter[2]) +
((int)src_ptr[pixel_step] * vp8_filter[3]) +
((int)src_ptr[2 * pixel_step] * vp8_filter[4]) +
((int)src_ptr[3 * pixel_step] * vp8_filter[5]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
/* Normalize back to 0-255 */
Temp = Temp >> VP8_FILTER_SHIFT;
/* Normalize back to 0-255 */
Temp = Temp >> VP8_FILTER_SHIFT;
if (Temp < 0)
Temp = 0;
else if (Temp > 255)
Temp = 255;
if (Temp < 0)
Temp = 0;
else if (Temp > 255)
Temp = 255;
output_ptr[j] = Temp;
src_ptr++;
}
/* Next row... */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_width;
output_ptr[j] = Temp;
src_ptr++;
}
/* Next row... */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_width;
}
}
static void filter_block2d_second_pass
(
int *src_ptr,
unsigned char *output_ptr,
int output_pitch,
unsigned int src_pixels_per_line,
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
const short *vp8_filter
)
{
unsigned int i, j;
int Temp;
static void filter_block2d_second_pass(int *src_ptr, unsigned char *output_ptr,
int output_pitch,
unsigned int src_pixels_per_line,
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
const short *vp8_filter) {
unsigned int i, j;
int Temp;
for (i = 0; i < output_height; i++)
{
for (j = 0; j < output_width; j++)
{
/* Apply filter */
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[0] * vp8_filter[2]) +
((int)src_ptr[pixel_step] * vp8_filter[3]) +
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
for (i = 0; i < output_height; i++) {
for (j = 0; j < output_width; j++) {
/* Apply filter */
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
((int)src_ptr[0] * vp8_filter[2]) +
((int)src_ptr[pixel_step] * vp8_filter[3]) +
((int)src_ptr[2 * pixel_step] * vp8_filter[4]) +
((int)src_ptr[3 * pixel_step] * vp8_filter[5]) +
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
/* Normalize back to 0-255 */
Temp = Temp >> VP8_FILTER_SHIFT;
/* Normalize back to 0-255 */
Temp = Temp >> VP8_FILTER_SHIFT;
if (Temp < 0)
Temp = 0;
else if (Temp > 255)
Temp = 255;
if (Temp < 0)
Temp = 0;
else if (Temp > 255)
Temp = 255;
output_ptr[j] = (unsigned char)Temp;
src_ptr++;
}
/* Start next row */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_pitch;
output_ptr[j] = (unsigned char)Temp;
src_ptr++;
}
/* Start next row */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_pitch;
}
}
static void filter_block2d(unsigned char *src_ptr, unsigned char *output_ptr,
unsigned int src_pixels_per_line, int output_pitch,
const short *HFilter, const short *VFilter) {
int FData[9 * 4]; /* Temp data buffer used in filtering */
static void filter_block2d
(
unsigned char *src_ptr,
unsigned char *output_ptr,
unsigned int src_pixels_per_line,
int output_pitch,
const short *HFilter,
const short *VFilter
)
{
int FData[9*4]; /* Temp data buffer used in filtering */
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData,
src_pixels_per_line, 1, 9, 4, HFilter);
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4,
VFilter);
}
void vp8_sixtap_predict4x4_c(unsigned char *src_ptr, int src_pixels_per_line,
int xoffset, int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
void vp8_sixtap_predict4x4_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
VFilter);
}
void vp8_sixtap_predict8x8_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
int FData[13*16]; /* Temp data buffer used in filtering */
void vp8_sixtap_predict8x8_c(unsigned char *src_ptr, int src_pixels_per_line,
int xoffset, int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
int FData[13 * 16]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData,
src_pixels_per_line, 1, 13, 8, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8,
VFilter);
}
void vp8_sixtap_predict8x4_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
int FData[13*16]; /* Temp data buffer used in filtering */
void vp8_sixtap_predict8x4_c(unsigned char *src_ptr, int src_pixels_per_line,
int xoffset, int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
int FData[13 * 16]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData,
src_pixels_per_line, 1, 9, 8, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8,
VFilter);
}
void vp8_sixtap_predict16x16_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
int FData[21*24]; /* Temp data buffer used in filtering */
void vp8_sixtap_predict16x16_c(unsigned char *src_ptr, int src_pixels_per_line,
int xoffset, int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
int FData[21 * 24]; /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
/* First filter 1-D horizontally... */
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData,
src_pixels_per_line, 1, 21, 16, HFilter);
/* then filter verticaly... */
filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16,
VFilter);
}
/****************************************************************************
*
* ROUTINE : filter_block2d_bil_first_pass
@ -267,33 +210,25 @@ void vp8_sixtap_predict16x16_c
* Two filter taps should sum to VP8_FILTER_WEIGHT.
*
****************************************************************************/
static void filter_block2d_bil_first_pass
(
unsigned char *src_ptr,
unsigned short *dst_ptr,
unsigned int src_stride,
unsigned int height,
unsigned int width,
const short *vp8_filter
)
{
unsigned int i, j;
static void filter_block2d_bil_first_pass(
unsigned char *src_ptr, unsigned short *dst_ptr, unsigned int src_stride,
unsigned int height, unsigned int width, const short *vp8_filter) {
unsigned int i, j;
for (i = 0; i < height; i++)
{
for (j = 0; j < width; j++)
{
/* Apply bilinear filter */
dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[1] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
src_ptr++;
}
/* Next row... */
src_ptr += src_stride - width;
dst_ptr += width;
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
/* Apply bilinear filter */
dst_ptr[j] =
(((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[1] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2)) >>
VP8_FILTER_SHIFT;
src_ptr++;
}
/* Next row... */
src_ptr += src_stride - width;
dst_ptr += width;
}
}
/****************************************************************************
@ -312,42 +247,35 @@ static void filter_block2d_bil_first_pass
*
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
* in the vertical direction to produce the filtered output
* block. Used to implement second-pass of 2-D separable filter.
* block. Used to implement second-pass of 2-D separable
* filter.
*
* SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
* SPECIAL NOTES : Requires 32-bit input as produced by
* filter_block2d_bil_first_pass.
* Two filter taps should sum to VP8_FILTER_WEIGHT.
*
****************************************************************************/
static void filter_block2d_bil_second_pass
(
unsigned short *src_ptr,
unsigned char *dst_ptr,
int dst_pitch,
unsigned int height,
unsigned int width,
const short *vp8_filter
)
{
unsigned int i, j;
int Temp;
static void filter_block2d_bil_second_pass(unsigned short *src_ptr,
unsigned char *dst_ptr,
int dst_pitch, unsigned int height,
unsigned int width,
const short *vp8_filter) {
unsigned int i, j;
int Temp;
for (i = 0; i < height; i++)
{
for (j = 0; j < width; j++)
{
/* Apply filter */
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[width] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT / 2);
dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
src_ptr++;
}
/* Next row... */
dst_ptr += dst_pitch;
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
/* Apply filter */
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[width] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2);
dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
src_ptr++;
}
}
/* Next row... */
dst_ptr += dst_pitch;
}
}
/****************************************************************************
*
@ -356,7 +284,8 @@ static void filter_block2d_bil_second_pass
* INPUTS : UINT8 *src_ptr : Pointer to source block.
* UINT32 src_pitch : Stride of source block.
* UINT32 dst_pitch : Stride of destination block.
* INT32 *HFilter : Array of 2 horizontal filter taps.
* INT32 *HFilter : Array of 2 horizontal filter
* taps.
* INT32 *VFilter : Array of 2 vertical filter taps.
* INT32 Width : Block width
* INT32 Height : Block height
@ -372,44 +301,29 @@ static void filter_block2d_bil_second_pass
* SPECIAL NOTES : The largest block size can be handled here is 16x16
*
****************************************************************************/
static void filter_block2d_bil
(
unsigned char *src_ptr,
unsigned char *dst_ptr,
unsigned int src_pitch,
unsigned int dst_pitch,
const short *HFilter,
const short *VFilter,
int Width,
int Height
)
{
static void filter_block2d_bil(unsigned char *src_ptr, unsigned char *dst_ptr,
unsigned int src_pitch, unsigned int dst_pitch,
const short *HFilter, const short *VFilter,
int Width, int Height) {
unsigned short FData[17 * 16]; /* Temp data buffer used in filtering */
unsigned short FData[17*16]; /* Temp data buffer used in filtering */
/* First filter 1-D horizontally... */
filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width,
HFilter);
/* First filter 1-D horizontally... */
filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
/* then 1-D vertically... */
filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
/* then 1-D vertically... */
filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width,
VFilter);
}
void vp8_bilinear_predict4x4_c(unsigned char *src_ptr, int src_pixels_per_line,
int xoffset, int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
void vp8_bilinear_predict4x4_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
#if 0
{
int i;
@ -429,65 +343,46 @@ void vp8_bilinear_predict4x4_c
}
}
#endif
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
VFilter, 4, 4);
}
void vp8_bilinear_predict8x8_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
void vp8_bilinear_predict8x8_c(unsigned char *src_ptr, int src_pixels_per_line,
int xoffset, int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
VFilter, 8, 8);
}
void vp8_bilinear_predict8x4_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
void vp8_bilinear_predict8x4_c(unsigned char *src_ptr, int src_pixels_per_line,
int xoffset, int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
VFilter, 8, 4);
}
void vp8_bilinear_predict16x16_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
void vp8_bilinear_predict16x16_c(unsigned char *src_ptr,
int src_pixels_per_line, int xoffset,
int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
const short *HFilter;
const short *VFilter;
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
VFilter, 16, 16);
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_FILTER_H_
#define VP8_COMMON_FILTER_H_
@ -20,7 +19,7 @@ extern "C" {
#define BLOCK_HEIGHT_WIDTH 4
#define VP8_FILTER_WEIGHT 128
#define VP8_FILTER_SHIFT 7
#define VP8_FILTER_SHIFT 7
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]);
extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]);

View File

@ -8,186 +8,147 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "findnearmv.h"
const unsigned char vp8_mbsplit_offset[4][16] = {
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{ 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }
};
/* Predict motion vectors using those from already-decoded nearby blocks.
Note that we only consider one 4x4 subblock from each candidate 16x16
macroblock. */
void vp8_find_near_mvs
(
MACROBLOCKD *xd,
const MODE_INFO *here,
int_mv *nearest,
int_mv *nearby,
int_mv *best_mv,
int cnt[4],
int refframe,
int *ref_frame_sign_bias
)
{
const MODE_INFO *above = here - xd->mode_info_stride;
const MODE_INFO *left = here - 1;
const MODE_INFO *aboveleft = above - 1;
int_mv near_mvs[4];
int_mv *mv = near_mvs;
int *cntx = cnt;
enum {CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV};
void vp8_find_near_mvs(MACROBLOCKD *xd, const MODE_INFO *here, int_mv *nearest,
int_mv *nearby, int_mv *best_mv, int cnt[4],
int refframe, int *ref_frame_sign_bias) {
const MODE_INFO *above = here - xd->mode_info_stride;
const MODE_INFO *left = here - 1;
const MODE_INFO *aboveleft = above - 1;
int_mv near_mvs[4];
int_mv *mv = near_mvs;
int *cntx = cnt;
enum { CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
/* Zero accumulators */
mv[0].as_int = mv[1].as_int = mv[2].as_int = 0;
cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0;
/* Zero accumulators */
mv[0].as_int = mv[1].as_int = mv[2].as_int = 0;
cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0;
/* Process above */
if (above->mbmi.ref_frame != INTRA_FRAME)
{
if (above->mbmi.mv.as_int)
{
(++mv)->as_int = above->mbmi.mv.as_int;
mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias);
++cntx;
}
*cntx += 2;
/* Process above */
if (above->mbmi.ref_frame != INTRA_FRAME) {
if (above->mbmi.mv.as_int) {
(++mv)->as_int = above->mbmi.mv.as_int;
mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv,
ref_frame_sign_bias);
++cntx;
}
/* Process left */
if (left->mbmi.ref_frame != INTRA_FRAME)
{
if (left->mbmi.mv.as_int)
{
int_mv this_mv;
*cntx += 2;
}
this_mv.as_int = left->mbmi.mv.as_int;
mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
/* Process left */
if (left->mbmi.ref_frame != INTRA_FRAME) {
if (left->mbmi.mv.as_int) {
int_mv this_mv;
if (this_mv.as_int != mv->as_int)
{
(++mv)->as_int = this_mv.as_int;
++cntx;
}
this_mv.as_int = left->mbmi.mv.as_int;
mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv,
ref_frame_sign_bias);
*cntx += 2;
}
else
cnt[CNT_INTRA] += 2;
}
if (this_mv.as_int != mv->as_int) {
(++mv)->as_int = this_mv.as_int;
++cntx;
}
/* Process above left */
if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
{
if (aboveleft->mbmi.mv.as_int)
{
int_mv this_mv;
*cntx += 2;
} else
cnt[CNT_INTRA] += 2;
}
this_mv.as_int = aboveleft->mbmi.mv.as_int;
mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
/* Process above left */
if (aboveleft->mbmi.ref_frame != INTRA_FRAME) {
if (aboveleft->mbmi.mv.as_int) {
int_mv this_mv;
if (this_mv.as_int != mv->as_int)
{
(++mv)->as_int = this_mv.as_int;
++cntx;
}
this_mv.as_int = aboveleft->mbmi.mv.as_int;
mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe,
&this_mv, ref_frame_sign_bias);
*cntx += 1;
}
else
cnt[CNT_INTRA] += 1;
}
if (this_mv.as_int != mv->as_int) {
(++mv)->as_int = this_mv.as_int;
++cntx;
}
/* If we have three distinct MV's ... */
if (cnt[CNT_SPLITMV])
{
/* See if above-left MV can be merged with NEAREST */
if (mv->as_int == near_mvs[CNT_NEAREST].as_int)
cnt[CNT_NEAREST] += 1;
}
*cntx += 1;
} else
cnt[CNT_INTRA] += 1;
}
cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV)
+ (left->mbmi.mode == SPLITMV)) * 2
+ (aboveleft->mbmi.mode == SPLITMV);
/* If we have three distinct MV's ... */
if (cnt[CNT_SPLITMV]) {
/* See if above-left MV can be merged with NEAREST */
if (mv->as_int == near_mvs[CNT_NEAREST].as_int) cnt[CNT_NEAREST] += 1;
}
/* Swap near and nearest if necessary */
if (cnt[CNT_NEAR] > cnt[CNT_NEAREST])
{
int tmp;
tmp = cnt[CNT_NEAREST];
cnt[CNT_NEAREST] = cnt[CNT_NEAR];
cnt[CNT_NEAR] = tmp;
tmp = near_mvs[CNT_NEAREST].as_int;
near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int;
near_mvs[CNT_NEAR].as_int = tmp;
}
cnt[CNT_SPLITMV] =
((above->mbmi.mode == SPLITMV) + (left->mbmi.mode == SPLITMV)) * 2 +
(aboveleft->mbmi.mode == SPLITMV);
/* Use near_mvs[0] to store the "best" MV */
if (cnt[CNT_NEAREST] >= cnt[CNT_INTRA])
near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST];
/* Swap near and nearest if necessary */
if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
int tmp;
tmp = cnt[CNT_NEAREST];
cnt[CNT_NEAREST] = cnt[CNT_NEAR];
cnt[CNT_NEAR] = tmp;
tmp = near_mvs[CNT_NEAREST].as_int;
near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int;
near_mvs[CNT_NEAR].as_int = tmp;
}
/* Set up return values */
best_mv->as_int = near_mvs[0].as_int;
nearest->as_int = near_mvs[CNT_NEAREST].as_int;
nearby->as_int = near_mvs[CNT_NEAR].as_int;
/* Use near_mvs[0] to store the "best" MV */
if (cnt[CNT_NEAREST] >= cnt[CNT_INTRA])
near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST];
/* Set up return values */
best_mv->as_int = near_mvs[0].as_int;
nearest->as_int = near_mvs[CNT_NEAREST].as_int;
nearby->as_int = near_mvs[CNT_NEAR].as_int;
}
static void invert_and_clamp_mvs(int_mv *inv, int_mv *src, MACROBLOCKD *xd)
{
inv->as_mv.row = src->as_mv.row * -1;
inv->as_mv.col = src->as_mv.col * -1;
vp8_clamp_mv2(inv, xd);
vp8_clamp_mv2(src, xd);
static void invert_and_clamp_mvs(int_mv *inv, int_mv *src, MACROBLOCKD *xd) {
inv->as_mv.row = src->as_mv.row * -1;
inv->as_mv.col = src->as_mv.col * -1;
vp8_clamp_mv2(inv, xd);
vp8_clamp_mv2(src, xd);
}
int vp8_find_near_mvs_bias(MACROBLOCKD *xd, const MODE_INFO *here,
int_mv mode_mv_sb[2][MB_MODE_COUNT],
int_mv best_mv_sb[2], int cnt[4], int refframe,
int *ref_frame_sign_bias) {
int sign_bias = ref_frame_sign_bias[refframe];
int vp8_find_near_mvs_bias
(
MACROBLOCKD *xd,
const MODE_INFO *here,
int_mv mode_mv_sb[2][MB_MODE_COUNT],
int_mv best_mv_sb[2],
int cnt[4],
int refframe,
int *ref_frame_sign_bias
)
{
int sign_bias = ref_frame_sign_bias[refframe];
vp8_find_near_mvs(xd, here, &mode_mv_sb[sign_bias][NEARESTMV],
&mode_mv_sb[sign_bias][NEARMV], &best_mv_sb[sign_bias], cnt,
refframe, ref_frame_sign_bias);
vp8_find_near_mvs(xd,
here,
&mode_mv_sb[sign_bias][NEARESTMV],
&mode_mv_sb[sign_bias][NEARMV],
&best_mv_sb[sign_bias],
cnt,
refframe,
ref_frame_sign_bias);
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARESTMV],
&mode_mv_sb[sign_bias][NEARESTMV], xd);
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARMV],
&mode_mv_sb[sign_bias][NEARMV], xd);
invert_and_clamp_mvs(&best_mv_sb[!sign_bias], &best_mv_sb[sign_bias], xd);
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARESTMV],
&mode_mv_sb[sign_bias][NEARESTMV], xd);
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARMV],
&mode_mv_sb[sign_bias][NEARMV], xd);
invert_and_clamp_mvs(&best_mv_sb[!sign_bias],
&best_mv_sb[sign_bias], xd);
return sign_bias;
return sign_bias;
}
vp8_prob *vp8_mv_ref_probs(
vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
)
{
p[0] = vp8_mode_contexts [near_mv_ref_ct[0]] [0];
p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
/*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
return p;
vp8_prob *vp8_mv_ref_probs(vp8_prob p[VP8_MVREFS - 1],
const int near_mv_ref_ct[4]) {
p[0] = vp8_mode_contexts[near_mv_ref_ct[0]][0];
p[1] = vp8_mode_contexts[near_mv_ref_ct[1]][1];
p[2] = vp8_mode_contexts[near_mv_ref_ct[2]][2];
p[3] = vp8_mode_contexts[near_mv_ref_ct[3]][3];
/* p[3] = vp8_mode_contexts[near_mv_ref_ct[1] + near_mv_ref_ct[2] +
near_mv_ref_ct[3]][3]; */
return p;
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_FINDNEARMV_H_
#define VP8_COMMON_FINDNEARMV_H_
@ -22,170 +21,125 @@
extern "C" {
#endif
static INLINE void mv_bias(int refmb_ref_frame_sign_bias, int refframe,
int_mv *mvp, const int *ref_frame_sign_bias)
{
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
{
mvp->as_mv.row *= -1;
mvp->as_mv.col *= -1;
}
int_mv *mvp, const int *ref_frame_sign_bias) {
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe]) {
mvp->as_mv.row *= -1;
mvp->as_mv.col *= -1;
}
}
#define LEFT_TOP_MARGIN (16 << 3)
#define RIGHT_BOTTOM_MARGIN (16 << 3)
static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd)
{
if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) {
if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
}
static INLINE void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge,
int mb_to_right_edge, int mb_to_top_edge,
int mb_to_bottom_edge)
{
mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ?
mb_to_left_edge : mv->as_mv.col;
mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ?
mb_to_right_edge : mv->as_mv.col;
mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ?
mb_to_top_edge : mv->as_mv.row;
mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ?
mb_to_bottom_edge : mv->as_mv.row;
int mb_to_bottom_edge) {
mv->as_mv.col =
(mv->as_mv.col < mb_to_left_edge) ? mb_to_left_edge : mv->as_mv.col;
mv->as_mv.col =
(mv->as_mv.col > mb_to_right_edge) ? mb_to_right_edge : mv->as_mv.col;
mv->as_mv.row =
(mv->as_mv.row < mb_to_top_edge) ? mb_to_top_edge : mv->as_mv.row;
mv->as_mv.row =
(mv->as_mv.row > mb_to_bottom_edge) ? mb_to_bottom_edge : mv->as_mv.row;
}
static INLINE unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge,
int mb_to_right_edge,
int mb_to_top_edge,
int mb_to_bottom_edge)
{
unsigned int need_to_clamp;
need_to_clamp = (mv->as_mv.col < mb_to_left_edge);
need_to_clamp |= (mv->as_mv.col > mb_to_right_edge);
need_to_clamp |= (mv->as_mv.row < mb_to_top_edge);
need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge);
return need_to_clamp;
int mb_to_bottom_edge) {
unsigned int need_to_clamp;
need_to_clamp = (mv->as_mv.col < mb_to_left_edge);
need_to_clamp |= (mv->as_mv.col > mb_to_right_edge);
need_to_clamp |= (mv->as_mv.row < mb_to_top_edge);
need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge);
return need_to_clamp;
}
void vp8_find_near_mvs
(
MACROBLOCKD *xd,
const MODE_INFO *here,
int_mv *nearest, int_mv *nearby, int_mv *best,
int near_mv_ref_cts[4],
int refframe,
int *ref_frame_sign_bias
);
void vp8_find_near_mvs(MACROBLOCKD *xd, const MODE_INFO *here, int_mv *nearest,
int_mv *nearby, int_mv *best, int near_mv_ref_cts[4],
int refframe, int *ref_frame_sign_bias);
int vp8_find_near_mvs_bias(MACROBLOCKD *xd, const MODE_INFO *here,
int_mv mode_mv_sb[2][MB_MODE_COUNT],
int_mv best_mv_sb[2], int cnt[4], int refframe,
int *ref_frame_sign_bias);
int vp8_find_near_mvs_bias
(
MACROBLOCKD *xd,
const MODE_INFO *here,
int_mv mode_mv_sb[2][MB_MODE_COUNT],
int_mv best_mv_sb[2],
int cnt[4],
int refframe,
int *ref_frame_sign_bias
);
vp8_prob *vp8_mv_ref_probs(
vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
);
vp8_prob *vp8_mv_ref_probs(vp8_prob p[VP8_MVREFS - 1],
const int near_mv_ref_ct[4]);
extern const unsigned char vp8_mbsplit_offset[4][16];
static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b) {
if (!(b & 3)) {
/* On L edge, get from MB to left of us */
--cur_mb;
static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b)
{
if (!(b & 3))
{
/* On L edge, get from MB to left of us */
--cur_mb;
if (cur_mb->mbmi.mode != SPLITMV) return cur_mb->mbmi.mv.as_int;
b += 4;
}
if(cur_mb->mbmi.mode != SPLITMV)
return cur_mb->mbmi.mv.as_int;
b += 4;
}
return (cur_mb->bmi + b - 1)->mv.as_int;
return (cur_mb->bmi + b - 1)->mv.as_int;
}
static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b,
int mi_stride)
{
if (!(b >> 2))
{
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
int mi_stride) {
if (!(b >> 2)) {
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
if(cur_mb->mbmi.mode != SPLITMV)
return cur_mb->mbmi.mv.as_int;
b += 16;
}
if (cur_mb->mbmi.mode != SPLITMV) return cur_mb->mbmi.mv.as_int;
b += 16;
}
return (cur_mb->bmi + (b - 4))->mv.as_int;
return (cur_mb->bmi + (b - 4))->mv.as_int;
}
static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
{
if (!(b & 3))
{
/* On L edge, get from MB to left of us */
--cur_mb;
switch (cur_mb->mbmi.mode)
{
case B_PRED:
return (cur_mb->bmi + b + 3)->as_mode;
case DC_PRED:
return B_DC_PRED;
case V_PRED:
return B_VE_PRED;
case H_PRED:
return B_HE_PRED;
case TM_PRED:
return B_TM_PRED;
default:
return B_DC_PRED;
}
static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb,
int b) {
if (!(b & 3)) {
/* On L edge, get from MB to left of us */
--cur_mb;
switch (cur_mb->mbmi.mode) {
case B_PRED: return (cur_mb->bmi + b + 3)->as_mode;
case DC_PRED: return B_DC_PRED;
case V_PRED: return B_VE_PRED;
case H_PRED: return B_HE_PRED;
case TM_PRED: return B_TM_PRED;
default: return B_DC_PRED;
}
}
return (cur_mb->bmi + b - 1)->as_mode;
return (cur_mb->bmi + b - 1)->as_mode;
}
static INLINE B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b,
int mi_stride)
{
if (!(b >> 2))
{
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
int mi_stride) {
if (!(b >> 2)) {
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
switch (cur_mb->mbmi.mode)
{
case B_PRED:
return (cur_mb->bmi + b + 12)->as_mode;
case DC_PRED:
return B_DC_PRED;
case V_PRED:
return B_VE_PRED;
case H_PRED:
return B_HE_PRED;
case TM_PRED:
return B_TM_PRED;
default:
return B_DC_PRED;
}
switch (cur_mb->mbmi.mode) {
case B_PRED: return (cur_mb->bmi + b + 12)->as_mode;
case DC_PRED: return B_DC_PRED;
case V_PRED: return B_VE_PRED;
case H_PRED: return B_HE_PRED;
case TM_PRED: return B_TM_PRED;
default: return B_DC_PRED;
}
}
return (cur_mb->bmi + b - 4)->as_mode;
return (cur_mb->bmi + b - 4)->as_mode;
}
#ifdef __cplusplus

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#if ARCH_ARM
@ -24,7 +23,7 @@
#include <unistd.h>
#elif defined(_WIN32)
#include <windows.h>
typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
typedef void(WINAPI *PGNSI)(LPSYSTEM_INFO);
#elif defined(__OS2__)
#define INCL_DOS
#define INCL_DOSSPINLOCK
@ -33,74 +32,69 @@ typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
#endif
#if CONFIG_MULTITHREAD
static int get_cpu_count()
{
int core_count = 16;
static int get_cpu_count() {
int core_count = 16;
#if HAVE_UNISTD_H && !defined(__OS2__)
#if defined(_SC_NPROCESSORS_ONLN)
core_count = sysconf(_SC_NPROCESSORS_ONLN);
core_count = sysconf(_SC_NPROCESSORS_ONLN);
#elif defined(_SC_NPROC_ONLN)
core_count = sysconf(_SC_NPROC_ONLN);
core_count = sysconf(_SC_NPROC_ONLN);
#endif
#elif defined(_WIN32)
{
{
#if _WIN32_WINNT >= 0x0501
SYSTEM_INFO sysinfo;
GetNativeSystemInfo(&sysinfo);
SYSTEM_INFO sysinfo;
GetNativeSystemInfo(&sysinfo);
#else
PGNSI pGNSI;
SYSTEM_INFO sysinfo;
PGNSI pGNSI;
SYSTEM_INFO sysinfo;
/* Call GetNativeSystemInfo if supported or
* GetSystemInfo otherwise. */
/* Call GetNativeSystemInfo if supported or
* GetSystemInfo otherwise. */
pGNSI = (PGNSI) GetProcAddress(
GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo");
if (pGNSI != NULL)
pGNSI(&sysinfo);
else
GetSystemInfo(&sysinfo);
pGNSI = (PGNSI)GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
"GetNativeSystemInfo");
if (pGNSI != NULL)
pGNSI(&sysinfo);
else
GetSystemInfo(&sysinfo);
#endif
core_count = sysinfo.dwNumberOfProcessors;
}
core_count = sysinfo.dwNumberOfProcessors;
}
#elif defined(__OS2__)
{
ULONG proc_id;
ULONG status;
{
ULONG proc_id;
ULONG status;
core_count = 0;
for (proc_id = 1; ; proc_id++)
{
if (DosGetProcessorStatus(proc_id, &status))
break;
core_count = 0;
for (proc_id = 1;; proc_id++) {
if (DosGetProcessorStatus(proc_id, &status)) break;
if (status == PROC_ONLINE)
core_count++;
}
if (status == PROC_ONLINE) core_count++;
}
}
#else
/* other platforms */
/* other platforms */
#endif
return core_count > 0 ? core_count : 1;
return core_count > 0 ? core_count : 1;
}
#endif
void vp8_clear_system_state_c() {};
void vp8_clear_system_state_c(){};
void vp8_machine_specific_config(VP8_COMMON *ctx)
{
void vp8_machine_specific_config(VP8_COMMON *ctx) {
#if CONFIG_MULTITHREAD
ctx->processor_core_count = get_cpu_count();
ctx->processor_core_count = get_cpu_count();
#else
(void)ctx;
(void)ctx;
#endif /* CONFIG_MULTITHREAD */
#if ARCH_ARM
ctx->cpu_caps = arm_cpu_caps();
ctx->cpu_caps = arm_cpu_caps();
#elif ARCH_X86 || ARCH_X86_64
ctx->cpu_caps = x86_simd_caps();
ctx->cpu_caps = x86_simd_caps();
#endif
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_HEADER_H_
#define VP8_COMMON_HEADER_H_
@ -17,22 +16,21 @@ extern "C" {
#endif
/* 24 bits total */
typedef struct
{
unsigned int type: 1;
unsigned int version: 3;
unsigned int show_frame: 1;
typedef struct {
unsigned int type : 1;
unsigned int version : 3;
unsigned int show_frame : 1;
/* Allow 2^20 bytes = 8 megabits for first partition */
/* Allow 2^20 bytes = 8 megabits for first partition */
unsigned int first_partition_length_in_bytes: 19;
unsigned int first_partition_length_in_bytes : 19;
#ifdef PACKET_TESTING
unsigned int frame_number;
unsigned int update_gold: 1;
unsigned int uses_gold: 1;
unsigned int update_last: 1;
unsigned int uses_last: 1;
unsigned int frame_number;
unsigned int update_gold : 1;
unsigned int uses_gold : 1;
unsigned int update_last : 1;
unsigned int uses_last : 1;
#endif
} VP8_HEADER;
@ -43,7 +41,6 @@ typedef struct
#define VP8_HEADER_SIZE 3
#endif
#ifdef __cplusplus
} // extern "C"
#endif

View File

@ -12,79 +12,67 @@
#include "vp8_rtcd.h"
#include "vpx_mem/vpx_mem.h"
void vp8_dequant_idct_add_c(short *input, short *dq,
unsigned char *dest, int stride);
void vp8_dc_only_idct_add_c(short input_dc, unsigned char * pred,
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *dest,
int stride);
void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred,
int pred_stride, unsigned char *dst_ptr,
int dst_stride);
void vp8_dequant_idct_add_y_block_c
(short *q, short *dq,
unsigned char *dst, int stride, char *eobs)
{
int i, j;
void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst,
int stride, char *eobs) {
int i, j;
for (i = 0; i < 4; i++)
{
for (j = 0; j < 4; j++)
{
if (*eobs++ > 1)
vp8_dequant_idct_add_c (q, dq, dst, stride);
else
{
vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
if (*eobs++ > 1)
vp8_dequant_idct_add_c(q, dq, dst, stride);
else {
vp8_dc_only_idct_add_c(q[0] * dq[0], dst, stride, dst, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
q += 16;
dst += 4;
}
dst += 4*stride - 16;
q += 16;
dst += 4;
}
dst += 4 * stride - 16;
}
}
void vp8_dequant_idct_add_uv_block_c
(short *q, short *dq,
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
{
int i, j;
void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dstu,
unsigned char *dstv, int stride,
char *eobs) {
int i, j;
for (i = 0; i < 2; i++)
{
for (j = 0; j < 2; j++)
{
if (*eobs++ > 1)
vp8_dequant_idct_add_c (q, dq, dstu, stride);
else
{
vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
if (*eobs++ > 1)
vp8_dequant_idct_add_c(q, dq, dstu, stride);
else {
vp8_dc_only_idct_add_c(q[0] * dq[0], dstu, stride, dstu, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
q += 16;
dstu += 4;
}
dstu += 4*stride - 8;
q += 16;
dstu += 4;
}
for (i = 0; i < 2; i++)
{
for (j = 0; j < 2; j++)
{
if (*eobs++ > 1)
vp8_dequant_idct_add_c (q, dq, dstv, stride);
else
{
vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
dstu += 4 * stride - 8;
}
q += 16;
dstv += 4;
}
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
if (*eobs++ > 1)
vp8_dequant_idct_add_c(q, dq, dstv, stride);
else {
vp8_dc_only_idct_add_c(q[0] * dq[0], dstv, stride, dstv, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
dstv += 4*stride - 8;
q += 16;
dstv += 4;
}
dstv += 4 * stride - 8;
}
}

View File

@ -24,182 +24,162 @@
* x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
**************************************************************************/
static const int cospi8sqrt2minus1 = 20091;
static const int sinpi8sqrt2 = 35468;
static const int sinpi8sqrt2 = 35468;
void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr,
int pred_stride, unsigned char *dst_ptr,
int dst_stride)
{
int i;
int r, c;
int a1, b1, c1, d1;
short output[16];
short *ip = input;
short *op = output;
int temp1, temp2;
int shortpitch = 4;
int dst_stride) {
int i;
int r, c;
int a1, b1, c1, d1;
short output[16];
short *ip = input;
short *op = output;
int temp1, temp2;
int shortpitch = 4;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[8];
b1 = ip[0] - ip[8];
for (i = 0; i < 4; i++) {
a1 = ip[0] + ip[8];
b1 = ip[0] - ip[8];
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
op[shortpitch*0] = a1 + d1;
op[shortpitch*3] = a1 - d1;
op[shortpitch * 0] = a1 + d1;
op[shortpitch * 3] = a1 - d1;
op[shortpitch*1] = b1 + c1;
op[shortpitch*2] = b1 - c1;
op[shortpitch * 1] = b1 + c1;
op[shortpitch * 2] = b1 - c1;
ip++;
op++;
}
ip = output;
op = output;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[2];
b1 = ip[0] - ip[2];
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
op[0] = (a1 + d1 + 4) >> 3;
op[3] = (a1 - d1 + 4) >> 3;
op[1] = (b1 + c1 + 4) >> 3;
op[2] = (b1 - c1 + 4) >> 3;
ip += shortpitch;
op += shortpitch;
}
ip = output;
for (r = 0; r < 4; r++)
{
for (c = 0; c < 4; c++)
{
int a = ip[c] + pred_ptr[c] ;
if (a < 0)
a = 0;
if (a > 255)
a = 255;
dst_ptr[c] = (unsigned char) a ;
}
ip += 4;
dst_ptr += dst_stride;
pred_ptr += pred_stride;
ip++;
op++;
}
ip = output;
op = output;
for (i = 0; i < 4; i++) {
a1 = ip[0] + ip[2];
b1 = ip[0] - ip[2];
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
op[0] = (a1 + d1 + 4) >> 3;
op[3] = (a1 - d1 + 4) >> 3;
op[1] = (b1 + c1 + 4) >> 3;
op[2] = (b1 - c1 + 4) >> 3;
ip += shortpitch;
op += shortpitch;
}
ip = output;
for (r = 0; r < 4; r++) {
for (c = 0; c < 4; c++) {
int a = ip[c] + pred_ptr[c];
if (a < 0) a = 0;
if (a > 255) a = 255;
dst_ptr[c] = (unsigned char)a;
}
ip += 4;
dst_ptr += dst_stride;
pred_ptr += pred_stride;
}
}
void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
int pred_stride, unsigned char *dst_ptr,
int dst_stride)
{
int a1 = ((input_dc + 4) >> 3);
int r, c;
int dst_stride) {
int a1 = ((input_dc + 4) >> 3);
int r, c;
for (r = 0; r < 4; r++)
{
for (c = 0; c < 4; c++)
{
int a = a1 + pred_ptr[c] ;
for (r = 0; r < 4; r++) {
for (c = 0; c < 4; c++) {
int a = a1 + pred_ptr[c];
if (a < 0)
a = 0;
if (a < 0) a = 0;
if (a > 255)
a = 255;
if (a > 255) a = 255;
dst_ptr[c] = (unsigned char) a ;
}
dst_ptr += dst_stride;
pred_ptr += pred_stride;
dst_ptr[c] = (unsigned char)a;
}
dst_ptr += dst_stride;
pred_ptr += pred_stride;
}
}
void vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff)
{
short output[16];
int i;
int a1, b1, c1, d1;
int a2, b2, c2, d2;
short *ip = input;
short *op = output;
void vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff) {
short output[16];
int i;
int a1, b1, c1, d1;
int a2, b2, c2, d2;
short *ip = input;
short *op = output;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[12];
b1 = ip[4] + ip[8];
c1 = ip[4] - ip[8];
d1 = ip[0] - ip[12];
for (i = 0; i < 4; i++) {
a1 = ip[0] + ip[12];
b1 = ip[4] + ip[8];
c1 = ip[4] - ip[8];
d1 = ip[0] - ip[12];
op[0] = a1 + b1;
op[4] = c1 + d1;
op[8] = a1 - b1;
op[12] = d1 - c1;
ip++;
op++;
}
op[0] = a1 + b1;
op[4] = c1 + d1;
op[8] = a1 - b1;
op[12] = d1 - c1;
ip++;
op++;
}
ip = output;
op = output;
ip = output;
op = output;
for (i = 0; i < 4; i++)
{
a1 = ip[0] + ip[3];
b1 = ip[1] + ip[2];
c1 = ip[1] - ip[2];
d1 = ip[0] - ip[3];
for (i = 0; i < 4; i++) {
a1 = ip[0] + ip[3];
b1 = ip[1] + ip[2];
c1 = ip[1] - ip[2];
d1 = ip[0] - ip[3];
a2 = a1 + b1;
b2 = c1 + d1;
c2 = a1 - b1;
d2 = d1 - c1;
a2 = a1 + b1;
b2 = c1 + d1;
c2 = a1 - b1;
d2 = d1 - c1;
op[0] = (a2 + 3) >> 3;
op[1] = (b2 + 3) >> 3;
op[2] = (c2 + 3) >> 3;
op[3] = (d2 + 3) >> 3;
op[0] = (a2 + 3) >> 3;
op[1] = (b2 + 3) >> 3;
op[2] = (c2 + 3) >> 3;
op[3] = (d2 + 3) >> 3;
ip += 4;
op += 4;
}
ip += 4;
op += 4;
}
for(i = 0; i < 16; i++)
{
mb_dqcoeff[i * 16] = output[i];
}
for (i = 0; i < 16; i++) {
mb_dqcoeff[i * 16] = output[i];
}
}
void vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff)
{
int i;
int a1;
void vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff) {
int i;
int a1;
a1 = ((input[0] + 3) >> 3);
for(i = 0; i < 16; i++)
{
mb_dqcoeff[i * 16] = a1;
}
a1 = ((input[0] + 3) >> 3);
for (i = 0; i < 16; i++) {
mb_dqcoeff[i * 16] = a1;
}
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_INVTRANS_H_
#define VP8_COMMON_INVTRANS_H_
@ -25,43 +24,31 @@
extern "C" {
#endif
static void eob_adjust(char *eobs, short *diff)
{
/* eob adjust.... the idct can only skip if both the dc and eob are zero */
int js;
for(js = 0; js < 16; js++)
{
if((eobs[js] == 0) && (diff[0] != 0))
eobs[js]++;
diff+=16;
}
static void eob_adjust(char *eobs, short *diff) {
/* eob adjust.... the idct can only skip if both the dc and eob are zero */
int js;
for (js = 0; js < 16; js++) {
if ((eobs[js] == 0) && (diff[0] != 0)) eobs[js]++;
diff += 16;
}
}
static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd)
{
short *DQC = xd->dequant_y1;
static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd) {
short *DQC = xd->dequant_y1;
if (xd->mode_info_context->mbmi.mode != SPLITMV)
{
/* do 2nd order transform on the dc block */
if (xd->eobs[24] > 1)
{
vp8_short_inv_walsh4x4
(&xd->block[24].dqcoeff[0], xd->qcoeff);
}
else
{
vp8_short_inv_walsh4x4_1
(&xd->block[24].dqcoeff[0], xd->qcoeff);
}
eob_adjust(xd->eobs, xd->qcoeff);
DQC = xd->dequant_y1_dc;
if (xd->mode_info_context->mbmi.mode != SPLITMV) {
/* do 2nd order transform on the dc block */
if (xd->eobs[24] > 1) {
vp8_short_inv_walsh4x4(&xd->block[24].dqcoeff[0], xd->qcoeff);
} else {
vp8_short_inv_walsh4x4_1(&xd->block[24].dqcoeff[0], xd->qcoeff);
}
vp8_dequant_idct_add_y_block
(xd->qcoeff, DQC,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
eob_adjust(xd->eobs, xd->qcoeff);
DQC = xd->dequant_y1_dc;
}
vp8_dequant_idct_add_y_block(xd->qcoeff, DQC, xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
}
#ifdef __cplusplus
} // extern "C"

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_LOOPFILTER_H_
#define VP8_COMMON_LOOPFILTER_H_
@ -20,16 +19,12 @@
extern "C" {
#endif
#define MAX_LOOP_FILTER 63
#define MAX_LOOP_FILTER 63
/* fraction of total macroblock rows to be used in fast filter level picking */
/* has to be > 2 */
#define PARTIAL_FRAME_FRACTION 8
#define PARTIAL_FRAME_FRACTION 8
typedef enum
{
NORMAL_LOOPFILTER = 0,
SIMPLE_LOOPFILTER = 1
} LOOPFILTERTYPE;
typedef enum { NORMAL_LOOPFILTER = 0, SIMPLE_LOOPFILTER = 1 } LOOPFILTERTYPE;
#if ARCH_ARM
#define SIMD_WIDTH 1
@ -40,35 +35,32 @@ typedef enum
/* Need to align this structure so when it is declared and
* passed it can be loaded into vector registers.
*/
typedef struct
{
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
unsigned char lvl[4][4][4];
unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
unsigned char mode_lf_lut[10];
typedef struct {
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
unsigned char lvl[4][4][4];
unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
unsigned char mode_lf_lut[10];
} loop_filter_info_n;
typedef struct loop_filter_info
{
const unsigned char * mblim;
const unsigned char * blim;
const unsigned char * lim;
const unsigned char * hev_thr;
typedef struct loop_filter_info {
const unsigned char *mblim;
const unsigned char *blim;
const unsigned char *lim;
const unsigned char *hev_thr;
} loop_filter_info;
typedef void loop_filter_uvfunction
(
unsigned char *u, /* source pointer */
int p, /* pitch */
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
unsigned char *v
);
typedef void loop_filter_uvfunction(unsigned char *u, /* source pointer */
int p, /* pitch */
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
unsigned char *v);
/* assorted loopfilter functions which get used elsewhere */
struct VP8Common;
@ -77,8 +69,7 @@ struct modeinfo;
void vp8_loop_filter_init(struct VP8Common *cm);
void vp8_loop_filter_frame_init(struct VP8Common *cm,
struct macroblockd *mbd,
void vp8_loop_filter_frame_init(struct VP8Common *cm, struct macroblockd *mbd,
int default_filt_lvl);
void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd,
@ -88,22 +79,21 @@ void vp8_loop_filter_partial_frame(struct VP8Common *cm,
struct macroblockd *mbd,
int default_filt_lvl);
void vp8_loop_filter_frame_yonly(struct VP8Common *cm,
struct macroblockd *mbd,
void vp8_loop_filter_frame_yonly(struct VP8Common *cm, struct macroblockd *mbd,
int default_filt_lvl);
void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
int sharpness_lvl);
void vp8_loop_filter_row_normal(struct VP8Common *cm,
struct modeinfo *mode_info_context,
int mb_row, int post_ystride, int post_uvstride,
struct modeinfo *mode_info_context, int mb_row,
int post_ystride, int post_uvstride,
unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr);
void vp8_loop_filter_row_simple(struct VP8Common *cm,
struct modeinfo *mode_info_context,
int mb_row, int post_ystride, int post_uvstride,
struct modeinfo *mode_info_context, int mb_row,
int post_ystride, int post_uvstride,
unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr);
#ifdef __cplusplus

View File

@ -8,423 +8,374 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include "loopfilter.h"
#include "onyxc_int.h"
typedef unsigned char uc;
static signed char vp8_signed_char_clamp(int t)
{
t = (t < -128 ? -128 : t);
t = (t > 127 ? 127 : t);
return (signed char) t;
static signed char vp8_signed_char_clamp(int t) {
t = (t < -128 ? -128 : t);
t = (t > 127 ? 127 : t);
return (signed char)t;
}
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
static signed char vp8_filter_mask(uc limit, uc blimit,
uc p3, uc p2, uc p1, uc p0,
uc q0, uc q1, uc q2, uc q3)
{
signed char mask = 0;
mask |= (abs(p3 - p2) > limit);
mask |= (abs(p2 - p1) > limit);
mask |= (abs(p1 - p0) > limit);
mask |= (abs(q1 - q0) > limit);
mask |= (abs(q2 - q1) > limit);
mask |= (abs(q3 - q2) > limit);
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit);
return mask - 1;
static signed char vp8_filter_mask(uc limit, uc blimit, uc p3, uc p2, uc p1,
uc p0, uc q0, uc q1, uc q2, uc q3) {
signed char mask = 0;
mask |= (abs(p3 - p2) > limit);
mask |= (abs(p2 - p1) > limit);
mask |= (abs(p1 - p0) > limit);
mask |= (abs(q1 - q0) > limit);
mask |= (abs(q2 - q1) > limit);
mask |= (abs(q3 - q2) > limit);
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit);
return mask - 1;
}
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
{
signed char hev = 0;
hev |= (abs(p1 - p0) > thresh) * -1;
hev |= (abs(q1 - q0) > thresh) * -1;
return hev;
static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1) {
signed char hev = 0;
hev |= (abs(p1 - p0) > thresh) * -1;
hev |= (abs(q1 - q0) > thresh) * -1;
return hev;
}
static void vp8_filter(signed char mask, uc hev, uc *op1,
uc *op0, uc *oq0, uc *oq1)
static void vp8_filter(signed char mask, uc hev, uc *op1, uc *op0, uc *oq0,
uc *oq1) {
signed char ps0, qs0;
signed char ps1, qs1;
signed char filter_value, Filter1, Filter2;
signed char u;
{
signed char ps0, qs0;
signed char ps1, qs1;
signed char filter_value, Filter1, Filter2;
signed char u;
ps1 = (signed char)*op1 ^ 0x80;
ps0 = (signed char)*op0 ^ 0x80;
qs0 = (signed char)*oq0 ^ 0x80;
qs1 = (signed char)*oq1 ^ 0x80;
ps1 = (signed char) * op1 ^ 0x80;
ps0 = (signed char) * op0 ^ 0x80;
qs0 = (signed char) * oq0 ^ 0x80;
qs1 = (signed char) * oq1 ^ 0x80;
/* add outer taps if we have high edge variance */
filter_value = vp8_signed_char_clamp(ps1 - qs1);
filter_value &= hev;
/* add outer taps if we have high edge variance */
filter_value = vp8_signed_char_clamp(ps1 - qs1);
filter_value &= hev;
/* inner taps */
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
filter_value &= mask;
/* inner taps */
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
filter_value &= mask;
/* save bottom 3 bits so that we round one side +4 and the other +3
* if it equals 4 we'll set to adjust by -1 to account for the fact
* we'd round 3 the other way
*/
Filter1 = vp8_signed_char_clamp(filter_value + 4);
Filter2 = vp8_signed_char_clamp(filter_value + 3);
Filter1 >>= 3;
Filter2 >>= 3;
u = vp8_signed_char_clamp(qs0 - Filter1);
*oq0 = u ^ 0x80;
u = vp8_signed_char_clamp(ps0 + Filter2);
*op0 = u ^ 0x80;
filter_value = Filter1;
/* save bottom 3 bits so that we round one side +4 and the other +3
* if it equals 4 we'll set to adjust by -1 to account for the fact
* we'd round 3 the other way
*/
Filter1 = vp8_signed_char_clamp(filter_value + 4);
Filter2 = vp8_signed_char_clamp(filter_value + 3);
Filter1 >>= 3;
Filter2 >>= 3;
u = vp8_signed_char_clamp(qs0 - Filter1);
*oq0 = u ^ 0x80;
u = vp8_signed_char_clamp(ps0 + Filter2);
*op0 = u ^ 0x80;
filter_value = Filter1;
/* outer tap adjustments */
filter_value += 1;
filter_value >>= 1;
filter_value &= ~hev;
u = vp8_signed_char_clamp(qs1 - filter_value);
*oq1 = u ^ 0x80;
u = vp8_signed_char_clamp(ps1 + filter_value);
*op1 = u ^ 0x80;
/* outer tap adjustments */
filter_value += 1;
filter_value >>= 1;
filter_value &= ~hev;
u = vp8_signed_char_clamp(qs1 - filter_value);
*oq1 = u ^ 0x80;
u = vp8_signed_char_clamp(ps1 + filter_value);
*op1 = u ^ 0x80;
}
void vp8_loop_filter_horizontal_edge_c
(
unsigned char *s,
int p, /* pitch */
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
int count
)
{
int hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
void vp8_loop_filter_horizontal_edge_c(unsigned char *s, int p, /* pitch */
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh, int count) {
int hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
/* loop filter designed to work using chars so that we can make maximum use
* of 8 bit simd instructions.
*/
do
{
mask = vp8_filter_mask(limit[0], blimit[0],
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
s[0*p], s[1*p], s[2*p], s[3*p]);
/* loop filter designed to work using chars so that we can make maximum use
* of 8 bit simd instructions.
*/
do {
mask = vp8_filter_mask(limit[0], blimit[0], s[-4 * p], s[-3 * p], s[-2 * p],
s[-1 * p], s[0 * p], s[1 * p], s[2 * p], s[3 * p]);
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
hev = vp8_hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]);
vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
++s;
}
while (++i < count * 8);
++s;
} while (++i < count * 8);
}
void vp8_loop_filter_vertical_edge_c
(
unsigned char *s,
int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
int count
)
{
int hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
void vp8_loop_filter_vertical_edge_c(unsigned char *s, int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh, int count) {
int hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
/* loop filter designed to work using chars so that we can make maximum use
* of 8 bit simd instructions.
*/
do
{
mask = vp8_filter_mask(limit[0], blimit[0],
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
/* loop filter designed to work using chars so that we can make maximum use
* of 8 bit simd instructions.
*/
do {
mask = vp8_filter_mask(limit[0], blimit[0], s[-4], s[-3], s[-2], s[-1],
s[0], s[1], s[2], s[3]);
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
s += p;
}
while (++i < count * 8);
s += p;
} while (++i < count * 8);
}
static void vp8_mbfilter(signed char mask, uc hev,
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
{
signed char s, u;
signed char filter_value, Filter1, Filter2;
signed char ps2 = (signed char) * op2 ^ 0x80;
signed char ps1 = (signed char) * op1 ^ 0x80;
signed char ps0 = (signed char) * op0 ^ 0x80;
signed char qs0 = (signed char) * oq0 ^ 0x80;
signed char qs1 = (signed char) * oq1 ^ 0x80;
signed char qs2 = (signed char) * oq2 ^ 0x80;
static void vp8_mbfilter(signed char mask, uc hev, uc *op2, uc *op1, uc *op0,
uc *oq0, uc *oq1, uc *oq2) {
signed char s, u;
signed char filter_value, Filter1, Filter2;
signed char ps2 = (signed char)*op2 ^ 0x80;
signed char ps1 = (signed char)*op1 ^ 0x80;
signed char ps0 = (signed char)*op0 ^ 0x80;
signed char qs0 = (signed char)*oq0 ^ 0x80;
signed char qs1 = (signed char)*oq1 ^ 0x80;
signed char qs2 = (signed char)*oq2 ^ 0x80;
/* add outer taps if we have high edge variance */
filter_value = vp8_signed_char_clamp(ps1 - qs1);
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
filter_value &= mask;
/* add outer taps if we have high edge variance */
filter_value = vp8_signed_char_clamp(ps1 - qs1);
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
filter_value &= mask;
Filter2 = filter_value;
Filter2 &= hev;
Filter2 = filter_value;
Filter2 &= hev;
/* save bottom 3 bits so that we round one side +4 and the other +3 */
Filter1 = vp8_signed_char_clamp(Filter2 + 4);
Filter2 = vp8_signed_char_clamp(Filter2 + 3);
Filter1 >>= 3;
Filter2 >>= 3;
qs0 = vp8_signed_char_clamp(qs0 - Filter1);
ps0 = vp8_signed_char_clamp(ps0 + Filter2);
/* save bottom 3 bits so that we round one side +4 and the other +3 */
Filter1 = vp8_signed_char_clamp(Filter2 + 4);
Filter2 = vp8_signed_char_clamp(Filter2 + 3);
Filter1 >>= 3;
Filter2 >>= 3;
qs0 = vp8_signed_char_clamp(qs0 - Filter1);
ps0 = vp8_signed_char_clamp(ps0 + Filter2);
/* only apply wider filter if not high edge variance */
filter_value &= ~hev;
Filter2 = filter_value;
/* only apply wider filter if not high edge variance */
filter_value &= ~hev;
Filter2 = filter_value;
/* roughly 3/7th difference across boundary */
u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
s = vp8_signed_char_clamp(qs0 - u);
*oq0 = s ^ 0x80;
s = vp8_signed_char_clamp(ps0 + u);
*op0 = s ^ 0x80;
/* roughly 3/7th difference across boundary */
u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
s = vp8_signed_char_clamp(qs0 - u);
*oq0 = s ^ 0x80;
s = vp8_signed_char_clamp(ps0 + u);
*op0 = s ^ 0x80;
/* roughly 2/7th difference across boundary */
u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
s = vp8_signed_char_clamp(qs1 - u);
*oq1 = s ^ 0x80;
s = vp8_signed_char_clamp(ps1 + u);
*op1 = s ^ 0x80;
/* roughly 2/7th difference across boundary */
u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
s = vp8_signed_char_clamp(qs1 - u);
*oq1 = s ^ 0x80;
s = vp8_signed_char_clamp(ps1 + u);
*op1 = s ^ 0x80;
/* roughly 1/7th difference across boundary */
u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
s = vp8_signed_char_clamp(qs2 - u);
*oq2 = s ^ 0x80;
s = vp8_signed_char_clamp(ps2 + u);
*op2 = s ^ 0x80;
/* roughly 1/7th difference across boundary */
u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
s = vp8_signed_char_clamp(qs2 - u);
*oq2 = s ^ 0x80;
s = vp8_signed_char_clamp(ps2 + u);
*op2 = s ^ 0x80;
}
void vp8_mbloop_filter_horizontal_edge_c
(
unsigned char *s,
int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
int count
)
{
signed char hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
void vp8_mbloop_filter_horizontal_edge_c(unsigned char *s, int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
int count) {
signed char hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
/* loop filter designed to work using chars so that we can make maximum use
* of 8 bit simd instructions.
*/
do
{
/* loop filter designed to work using chars so that we can make maximum use
* of 8 bit simd instructions.
*/
do {
mask = vp8_filter_mask(limit[0], blimit[0], s[-4 * p], s[-3 * p], s[-2 * p],
s[-1 * p], s[0 * p], s[1 * p], s[2 * p], s[3 * p]);
mask = vp8_filter_mask(limit[0], blimit[0],
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
s[0*p], s[1*p], s[2*p], s[3*p]);
hev = vp8_hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]);
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
++s;
}
while (++i < count * 8);
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p,
s + 2 * p);
++s;
} while (++i < count * 8);
}
void vp8_mbloop_filter_vertical_edge_c(unsigned char *s, int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh, int count) {
signed char hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
void vp8_mbloop_filter_vertical_edge_c
(
unsigned char *s,
int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
int count
)
{
signed char hev = 0; /* high edge variance */
signed char mask = 0;
int i = 0;
do {
mask = vp8_filter_mask(limit[0], blimit[0], s[-4], s[-3], s[-2], s[-1],
s[0], s[1], s[2], s[3]);
do
{
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
mask = vp8_filter_mask(limit[0], blimit[0],
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
s += p;
}
while (++i < count * 8);
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
s += p;
} while (++i < count * 8);
}
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
{
/* Why does this cause problems for win32?
* error C2143: syntax error : missing ';' before 'type'
* (void) limit;
*/
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
return mask;
static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0,
uc q1) {
/* Why does this cause problems for win32?
* error C2143: syntax error : missing ';' before 'type'
* (void) limit;
*/
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
return mask;
}
static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
{
signed char filter_value, Filter1, Filter2;
signed char p1 = (signed char) * op1 ^ 0x80;
signed char p0 = (signed char) * op0 ^ 0x80;
signed char q0 = (signed char) * oq0 ^ 0x80;
signed char q1 = (signed char) * oq1 ^ 0x80;
signed char u;
static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0,
uc *oq1) {
signed char filter_value, Filter1, Filter2;
signed char p1 = (signed char)*op1 ^ 0x80;
signed char p0 = (signed char)*op0 ^ 0x80;
signed char q0 = (signed char)*oq0 ^ 0x80;
signed char q1 = (signed char)*oq1 ^ 0x80;
signed char u;
filter_value = vp8_signed_char_clamp(p1 - q1);
filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0));
filter_value &= mask;
filter_value = vp8_signed_char_clamp(p1 - q1);
filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0));
filter_value &= mask;
/* save bottom 3 bits so that we round one side +4 and the other +3 */
Filter1 = vp8_signed_char_clamp(filter_value + 4);
Filter1 >>= 3;
u = vp8_signed_char_clamp(q0 - Filter1);
*oq0 = u ^ 0x80;
/* save bottom 3 bits so that we round one side +4 and the other +3 */
Filter1 = vp8_signed_char_clamp(filter_value + 4);
Filter1 >>= 3;
u = vp8_signed_char_clamp(q0 - Filter1);
*oq0 = u ^ 0x80;
Filter2 = vp8_signed_char_clamp(filter_value + 3);
Filter2 >>= 3;
u = vp8_signed_char_clamp(p0 + Filter2);
*op0 = u ^ 0x80;
Filter2 = vp8_signed_char_clamp(filter_value + 3);
Filter2 >>= 3;
u = vp8_signed_char_clamp(p0 + Filter2);
*op0 = u ^ 0x80;
}
void vp8_loop_filter_simple_horizontal_edge_c
(
unsigned char *s,
int p,
const unsigned char *blimit
)
{
signed char mask = 0;
int i = 0;
void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *s, int p,
const unsigned char *blimit) {
signed char mask = 0;
int i = 0;
do
{
mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
++s;
}
while (++i < 16);
do {
mask = vp8_simple_filter_mask(blimit[0], s[-2 * p], s[-1 * p], s[0 * p],
s[1 * p]);
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
++s;
} while (++i < 16);
}
void vp8_loop_filter_simple_vertical_edge_c
(
unsigned char *s,
int p,
const unsigned char *blimit
)
{
signed char mask = 0;
int i = 0;
do
{
mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
s += p;
}
while (++i < 16);
void vp8_loop_filter_simple_vertical_edge_c(unsigned char *s, int p,
const unsigned char *blimit) {
signed char mask = 0;
int i = 0;
do {
mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
s += p;
} while (++i < 16);
}
/* Horizontal MB filtering */
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi)
{
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
loop_filter_info *lfi) {
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim,
lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi)
{
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
loop_filter_info *lfi) {
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim,
lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi)
{
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
loop_filter_info *lfi) {
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim,
lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim,
lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim,
lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (u_ptr)
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride,
lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride,
lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit)
{
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit);
const unsigned char *blimit) {
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride,
blimit);
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride,
blimit);
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride,
blimit);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi)
{
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
loop_filter_info *lfi) {
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim,
lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim,
lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim,
lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (u_ptr)
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim,
lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim,
lfi->hev_thr, 1);
}
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit)
{
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
const unsigned char *blimit) {
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
}

View File

@ -8,61 +8,50 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "blockd.h"
void vp8_setup_block_dptrs(MACROBLOCKD *x)
{
int r, c;
void vp8_setup_block_dptrs(MACROBLOCKD *x) {
int r, c;
for (r = 0; r < 4; r++)
{
for (c = 0; c < 4; c++)
{
x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4;
}
for (r = 0; r < 4; r++) {
for (c = 0; c < 4; c++) {
x->block[r * 4 + c].predictor = x->predictor + r * 4 * 16 + c * 4;
}
}
for (r = 0; r < 2; r++)
{
for (c = 0; c < 2; c++)
{
x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4;
}
for (r = 0; r < 2; r++) {
for (c = 0; c < 2; c++) {
x->block[16 + r * 2 + c].predictor =
x->predictor + 256 + r * 4 * 8 + c * 4;
}
}
for (r = 0; r < 2; r++)
{
for (c = 0; c < 2; c++)
{
x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4;
}
for (r = 0; r < 2; r++) {
for (c = 0; c < 2; c++) {
x->block[20 + r * 2 + c].predictor =
x->predictor + 320 + r * 4 * 8 + c * 4;
}
}
for (r = 0; r < 25; r++)
{
x->block[r].qcoeff = x->qcoeff + r * 16;
x->block[r].dqcoeff = x->dqcoeff + r * 16;
x->block[r].eob = x->eobs + r;
}
for (r = 0; r < 25; r++) {
x->block[r].qcoeff = x->qcoeff + r * 16;
x->block[r].dqcoeff = x->dqcoeff + r * 16;
x->block[r].eob = x->eobs + r;
}
}
void vp8_build_block_doffsets(MACROBLOCKD *x)
{
int block;
void vp8_build_block_doffsets(MACROBLOCKD *x) {
int block;
for (block = 0; block < 16; block++) /* y blocks */
{
x->block[block].offset =
(block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4;
}
for (block = 0; block < 16; block++) /* y blocks */
{
x->block[block].offset =
(block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4;
}
for (block = 16; block < 20; block++) /* U and V blocks */
{
x->block[block+4].offset =
x->block[block].offset =
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4;
}
for (block = 16; block < 20; block++) /* U and V blocks */
{
x->block[block + 4].offset = x->block[block].offset =
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4;
}
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/* MFQE: Multiframe Quality Enhancement
* In rate limited situations keyframes may cause significant visual artifacts
* commonly referred to as "popping." This file implements a postproccesing
@ -28,359 +27,299 @@
#include <stdlib.h>
static void filter_by_weight(unsigned char *src, int src_stride,
unsigned char *dst, int dst_stride,
int block_size, int src_weight)
{
int dst_weight = (1 << MFQE_PRECISION) - src_weight;
int rounding_bit = 1 << (MFQE_PRECISION - 1);
int r, c;
unsigned char *dst, int dst_stride, int block_size,
int src_weight) {
int dst_weight = (1 << MFQE_PRECISION) - src_weight;
int rounding_bit = 1 << (MFQE_PRECISION - 1);
int r, c;
for (r = 0; r < block_size; r++)
{
for (c = 0; c < block_size; c++)
{
dst[c] = (src[c] * src_weight +
dst[c] * dst_weight +
rounding_bit) >> MFQE_PRECISION;
}
src += src_stride;
dst += dst_stride;
for (r = 0; r < block_size; r++) {
for (c = 0; c < block_size; c++) {
dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit) >>
MFQE_PRECISION;
}
src += src_stride;
dst += dst_stride;
}
}
void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride,
unsigned char *dst, int dst_stride,
int src_weight)
{
filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
int src_weight) {
filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
}
void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride,
unsigned char *dst, int dst_stride,
int src_weight)
{
filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
int src_weight) {
filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
}
void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride,
unsigned char *dst, int dst_stride,
int src_weight)
{
filter_by_weight(src, src_stride, dst, dst_stride, 4, src_weight);
int src_weight) {
filter_by_weight(src, src_stride, dst, dst_stride, 4, src_weight);
}
static void apply_ifactor(unsigned char *y_src,
int y_src_stride,
unsigned char *y_dst,
int y_dst_stride,
unsigned char *u_src,
unsigned char *v_src,
int uv_src_stride,
unsigned char *u_dst,
unsigned char *v_dst,
int uv_dst_stride,
int block_size,
int src_weight)
{
if (block_size == 16)
{
vp8_filter_by_weight16x16(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
vp8_filter_by_weight8x8(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
vp8_filter_by_weight8x8(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
}
else /* if (block_size == 8) */
{
vp8_filter_by_weight8x8(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
vp8_filter_by_weight4x4(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
vp8_filter_by_weight4x4(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
}
static void apply_ifactor(unsigned char *y_src, int y_src_stride,
unsigned char *y_dst, int y_dst_stride,
unsigned char *u_src, unsigned char *v_src,
int uv_src_stride, unsigned char *u_dst,
unsigned char *v_dst, int uv_dst_stride,
int block_size, int src_weight) {
if (block_size == 16) {
vp8_filter_by_weight16x16(y_src, y_src_stride, y_dst, y_dst_stride,
src_weight);
vp8_filter_by_weight8x8(u_src, uv_src_stride, u_dst, uv_dst_stride,
src_weight);
vp8_filter_by_weight8x8(v_src, uv_src_stride, v_dst, uv_dst_stride,
src_weight);
} else /* if (block_size == 8) */
{
vp8_filter_by_weight8x8(y_src, y_src_stride, y_dst, y_dst_stride,
src_weight);
vp8_filter_by_weight4x4(u_src, uv_src_stride, u_dst, uv_dst_stride,
src_weight);
vp8_filter_by_weight4x4(v_src, uv_src_stride, v_dst, uv_dst_stride,
src_weight);
}
}
static unsigned int int_sqrt(unsigned int x)
{
unsigned int y = x;
unsigned int guess;
int p = 1;
while (y>>=1) p++;
p>>=1;
static unsigned int int_sqrt(unsigned int x) {
unsigned int y = x;
unsigned int guess;
int p = 1;
while (y >>= 1) p++;
p >>= 1;
guess=0;
while (p>=0)
{
guess |= (1<<p);
if (x<guess*guess)
guess -= (1<<p);
p--;
}
/* choose between guess or guess+1 */
return guess+(guess*guess+guess+1<=x);
guess = 0;
while (p >= 0) {
guess |= (1 << p);
if (x < guess * guess) guess -= (1 << p);
p--;
}
/* choose between guess or guess+1 */
return guess + (guess * guess + guess + 1 <= x);
}
#define USE_SSD
static void multiframe_quality_enhance_block
(
static void multiframe_quality_enhance_block(
int blksize, /* Currently only values supported are 16, 8 */
int qcurr,
int qprev,
unsigned char *y,
unsigned char *u,
unsigned char *v,
int y_stride,
int uv_stride,
unsigned char *yd,
unsigned char *ud,
unsigned char *vd,
int yd_stride,
int uvd_stride
)
{
static const unsigned char VP8_ZEROS[16]=
int qcurr, int qprev, unsigned char *y, unsigned char *u, unsigned char *v,
int y_stride, int uv_stride, unsigned char *yd, unsigned char *ud,
unsigned char *vd, int yd_stride, int uvd_stride) {
static const unsigned char VP8_ZEROS[16] = { 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0 };
int uvblksize = blksize >> 1;
int qdiff = qcurr - qprev;
int i;
unsigned char *up;
unsigned char *udp;
unsigned char *vp;
unsigned char *vdp;
unsigned int act, actd, sad, usad, vsad, sse, thr, thrsq, actrisk;
if (blksize == 16) {
actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse) + 128) >> 8;
act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse) + 128) >> 8;
#ifdef USE_SSD
vpx_variance16x16(y, y_stride, yd, yd_stride, &sse);
sad = (sse + 128) >> 8;
vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
usad = (sse + 32) >> 6;
vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
vsad = (sse + 32) >> 6;
#else
sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6;
vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride) + 32) >> 6;
#endif
} else /* if (blksize == 8) */
{
actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse) + 32) >> 6;
act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse) + 32) >> 6;
#ifdef USE_SSD
vpx_variance8x8(y, y_stride, yd, yd_stride, &sse);
sad = (sse + 32) >> 6;
vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
usad = (sse + 8) >> 4;
vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
vsad = (sse + 8) >> 4;
#else
sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4;
vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4;
#endif
}
actrisk = (actd > act * 5);
/* thr = qdiff/16 + log2(act) + log4(qprev) */
thr = (qdiff >> 4);
while (actd >>= 1) thr++;
while (qprev >>= 2) thr++;
#ifdef USE_SSD
thrsq = thr * thr;
if (sad < thrsq &&
/* additional checks for color mismatch and excessive addition of
* high-frequencies */
4 * usad < thrsq && 4 * vsad < thrsq && !actrisk)
#else
if (sad < thr &&
/* additional checks for color mismatch and excessive addition of
* high-frequencies */
2 * usad < thr && 2 * vsad < thr && !actrisk)
#endif
{
int ifactor;
#ifdef USE_SSD
/* TODO: optimize this later to not need sqr root */
sad = int_sqrt(sad);
#endif
ifactor = (sad << MFQE_PRECISION) / thr;
ifactor >>= (qdiff >> 5);
if (ifactor) {
apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd,
uvd_stride, blksize, ifactor);
}
} else /* else implicitly copy from previous frame */
{
if (blksize == 16) {
vp8_copy_mem16x16(y, y_stride, yd, yd_stride);
vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride);
vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride);
} else /* if (blksize == 8) */
{
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
for (up = u, udp = ud, i = 0; i < uvblksize;
++i, up += uv_stride, udp += uvd_stride)
memcpy(udp, up, uvblksize);
for (vp = v, vdp = vd, i = 0; i < uvblksize;
++i, vp += uv_stride, vdp += uvd_stride)
memcpy(vdp, vp, uvblksize);
}
}
}
static int qualify_inter_mb(const MODE_INFO *mode_info_context, int *map) {
if (mode_info_context->mbmi.mb_skip_coeff)
map[0] = map[1] = map[2] = map[3] = 1;
else if (mode_info_context->mbmi.mode == SPLITMV) {
static int ndx[4][4] = {
{ 0, 1, 4, 5 }, { 2, 3, 6, 7 }, { 8, 9, 12, 13 }, { 10, 11, 14, 15 }
};
int uvblksize = blksize >> 1;
int qdiff = qcurr - qprev;
int i;
unsigned char *up;
unsigned char *udp;
unsigned char *vp;
unsigned char *vdp;
unsigned int act, actd, sad, usad, vsad, sse, thr, thrsq, actrisk;
if (blksize == 16)
{
actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
#ifdef USE_SSD
vpx_variance16x16(y, y_stride, yd, yd_stride, &sse);
sad = (sse + 128)>>8;
vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
usad = (sse + 32)>>6;
vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
vsad = (sse + 32)>>6;
#else
sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6;
vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride)+ 32) >> 6;
#endif
}
else /* if (blksize == 8) */
{
actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
#ifdef USE_SSD
vpx_variance8x8(y, y_stride, yd, yd_stride, &sse);
sad = (sse + 32)>>6;
vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
usad = (sse + 8)>>4;
vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
vsad = (sse + 8)>>4;
#else
sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4;
vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4;
#endif
}
actrisk = (actd > act * 5);
/* thr = qdiff/16 + log2(act) + log4(qprev) */
thr = (qdiff >> 4);
while (actd >>= 1) thr++;
while (qprev >>= 2) thr++;
#ifdef USE_SSD
thrsq = thr * thr;
if (sad < thrsq &&
/* additional checks for color mismatch and excessive addition of
* high-frequencies */
4 * usad < thrsq && 4 * vsad < thrsq && !actrisk)
#else
if (sad < thr &&
/* additional checks for color mismatch and excessive addition of
* high-frequencies */
2 * usad < thr && 2 * vsad < thr && !actrisk)
#endif
{
int ifactor;
#ifdef USE_SSD
/* TODO: optimize this later to not need sqr root */
sad = int_sqrt(sad);
#endif
ifactor = (sad << MFQE_PRECISION) / thr;
ifactor >>= (qdiff >> 5);
if (ifactor)
{
apply_ifactor(y, y_stride, yd, yd_stride,
u, v, uv_stride,
ud, vd, uvd_stride,
blksize, ifactor);
}
}
else /* else implicitly copy from previous frame */
{
if (blksize == 16)
{
vp8_copy_mem16x16(y, y_stride, yd, yd_stride);
vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride);
vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride);
}
else /* if (blksize == 8) */
{
vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride)
memcpy(udp, up, uvblksize);
for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride)
memcpy(vdp, vp, uvblksize);
}
int i, j;
for (i = 0; i < 4; ++i) {
map[i] = 1;
for (j = 0; j < 4 && map[j]; ++j)
map[i] &= (mode_info_context->bmi[ndx[i][j]].mv.as_mv.row <= 2 &&
mode_info_context->bmi[ndx[i][j]].mv.as_mv.col <= 2);
}
} else {
map[0] = map[1] = map[2] = map[3] =
(mode_info_context->mbmi.mode > B_PRED &&
abs(mode_info_context->mbmi.mv.as_mv.row) <= 2 &&
abs(mode_info_context->mbmi.mv.as_mv.col) <= 2);
}
return (map[0] + map[1] + map[2] + map[3]);
}
static int qualify_inter_mb(const MODE_INFO *mode_info_context, int *map)
{
if (mode_info_context->mbmi.mb_skip_coeff)
map[0] = map[1] = map[2] = map[3] = 1;
else if (mode_info_context->mbmi.mode==SPLITMV)
{
static int ndx[4][4] =
{
{0, 1, 4, 5},
{2, 3, 6, 7},
{8, 9, 12, 13},
{10, 11, 14, 15}
};
int i, j;
for (i=0; i<4; ++i)
{
map[i] = 1;
for (j=0; j<4 && map[j]; ++j)
map[i] &= (mode_info_context->bmi[ndx[i][j]].mv.as_mv.row <= 2 &&
mode_info_context->bmi[ndx[i][j]].mv.as_mv.col <= 2);
}
}
else
{
map[0] = map[1] = map[2] = map[3] =
(mode_info_context->mbmi.mode > B_PRED &&
abs(mode_info_context->mbmi.mv.as_mv.row) <= 2 &&
abs(mode_info_context->mbmi.mv.as_mv.col) <= 2);
}
return (map[0]+map[1]+map[2]+map[3]);
}
void vp8_multiframe_quality_enhance(VP8_COMMON *cm) {
YV12_BUFFER_CONFIG *show = cm->frame_to_show;
YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
void vp8_multiframe_quality_enhance
(
VP8_COMMON *cm
)
{
YV12_BUFFER_CONFIG *show = cm->frame_to_show;
YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
FRAME_TYPE frame_type = cm->frame_type;
/* Point at base of Mb MODE_INFO list has motion vectors etc */
const MODE_INFO *mode_info_context = cm->show_frame_mi;
int mb_row;
int mb_col;
int totmap, map[4];
int qcurr = cm->base_qindex;
int qprev = cm->postproc_state.last_base_qindex;
FRAME_TYPE frame_type = cm->frame_type;
/* Point at base of Mb MODE_INFO list has motion vectors etc */
const MODE_INFO *mode_info_context = cm->show_frame_mi;
int mb_row;
int mb_col;
int totmap, map[4];
int qcurr = cm->base_qindex;
int qprev = cm->postproc_state.last_base_qindex;
unsigned char *y_ptr, *u_ptr, *v_ptr;
unsigned char *yd_ptr, *ud_ptr, *vd_ptr;
unsigned char *y_ptr, *u_ptr, *v_ptr;
unsigned char *yd_ptr, *ud_ptr, *vd_ptr;
/* Set up the buffer pointers */
y_ptr = show->y_buffer;
u_ptr = show->u_buffer;
v_ptr = show->v_buffer;
yd_ptr = dest->y_buffer;
ud_ptr = dest->u_buffer;
vd_ptr = dest->v_buffer;
/* Set up the buffer pointers */
y_ptr = show->y_buffer;
u_ptr = show->u_buffer;
v_ptr = show->v_buffer;
yd_ptr = dest->y_buffer;
ud_ptr = dest->u_buffer;
vd_ptr = dest->v_buffer;
/* postprocess each macro block */
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
/* if motion is high there will likely be no benefit */
if (frame_type == INTER_FRAME) totmap = qualify_inter_mb(mode_info_context, map);
else totmap = (frame_type == KEY_FRAME ? 4 : 0);
if (totmap)
{
if (totmap < 4)
{
int i, j;
for (i=0; i<2; ++i)
for (j=0; j<2; ++j)
{
if (map[i*2+j])
{
multiframe_quality_enhance_block(8, qcurr, qprev,
y_ptr + 8*(i*show->y_stride+j),
u_ptr + 4*(i*show->uv_stride+j),
v_ptr + 4*(i*show->uv_stride+j),
show->y_stride,
show->uv_stride,
yd_ptr + 8*(i*dest->y_stride+j),
ud_ptr + 4*(i*dest->uv_stride+j),
vd_ptr + 4*(i*dest->uv_stride+j),
dest->y_stride,
dest->uv_stride);
}
else
{
/* copy a 8x8 block */
int k;
unsigned char *up = u_ptr + 4*(i*show->uv_stride+j);
unsigned char *udp = ud_ptr + 4*(i*dest->uv_stride+j);
unsigned char *vp = v_ptr + 4*(i*show->uv_stride+j);
unsigned char *vdp = vd_ptr + 4*(i*dest->uv_stride+j);
vp8_copy_mem8x8(y_ptr + 8*(i*show->y_stride+j), show->y_stride,
yd_ptr + 8*(i*dest->y_stride+j), dest->y_stride);
for (k = 0; k < 4; ++k, up += show->uv_stride, udp += dest->uv_stride,
vp += show->uv_stride, vdp += dest->uv_stride)
{
memcpy(udp, up, 4);
memcpy(vdp, vp, 4);
}
}
}
}
else /* totmap = 4 */
{
multiframe_quality_enhance_block(16, qcurr, qprev, y_ptr,
u_ptr, v_ptr,
show->y_stride,
show->uv_stride,
yd_ptr, ud_ptr, vd_ptr,
dest->y_stride,
dest->uv_stride);
/* postprocess each macro block */
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
/* if motion is high there will likely be no benefit */
if (frame_type == INTER_FRAME)
totmap = qualify_inter_mb(mode_info_context, map);
else
totmap = (frame_type == KEY_FRAME ? 4 : 0);
if (totmap) {
if (totmap < 4) {
int i, j;
for (i = 0; i < 2; ++i)
for (j = 0; j < 2; ++j) {
if (map[i * 2 + j]) {
multiframe_quality_enhance_block(
8, qcurr, qprev, y_ptr + 8 * (i * show->y_stride + j),
u_ptr + 4 * (i * show->uv_stride + j),
v_ptr + 4 * (i * show->uv_stride + j), show->y_stride,
show->uv_stride, yd_ptr + 8 * (i * dest->y_stride + j),
ud_ptr + 4 * (i * dest->uv_stride + j),
vd_ptr + 4 * (i * dest->uv_stride + j), dest->y_stride,
dest->uv_stride);
} else {
/* copy a 8x8 block */
int k;
unsigned char *up = u_ptr + 4 * (i * show->uv_stride + j);
unsigned char *udp = ud_ptr + 4 * (i * dest->uv_stride + j);
unsigned char *vp = v_ptr + 4 * (i * show->uv_stride + j);
unsigned char *vdp = vd_ptr + 4 * (i * dest->uv_stride + j);
vp8_copy_mem8x8(
y_ptr + 8 * (i * show->y_stride + j), show->y_stride,
yd_ptr + 8 * (i * dest->y_stride + j), dest->y_stride);
for (k = 0; k < 4; ++k, up += show->uv_stride,
udp += dest->uv_stride, vp += show->uv_stride,
vdp += dest->uv_stride) {
memcpy(udp, up, 4);
memcpy(vdp, vp, 4);
}
}
}
else
{
vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
}
y_ptr += 16;
u_ptr += 8;
v_ptr += 8;
yd_ptr += 16;
ud_ptr += 8;
vd_ptr += 8;
mode_info_context++; /* step to next MB */
} else /* totmap = 4 */
{
multiframe_quality_enhance_block(
16, qcurr, qprev, y_ptr, u_ptr, v_ptr, show->y_stride,
show->uv_stride, yd_ptr, ud_ptr, vd_ptr, dest->y_stride,
dest->uv_stride);
}
y_ptr += show->y_stride * 16 - 16 * cm->mb_cols;
u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols;
ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
mode_info_context++; /* Skip border mb */
} else {
vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
}
y_ptr += 16;
u_ptr += 8;
v_ptr += 8;
yd_ptr += 16;
ud_ptr += 8;
vd_ptr += 8;
mode_info_context++; /* step to next MB */
}
y_ptr += show->y_stride * 16 - 16 * cm->mb_cols;
u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols;
ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
mode_info_context++; /* Skip border mb */
}
}

View File

@ -8,26 +8,22 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
void vp8_dequant_idct_add_dspr2(short *input, short *dq,
unsigned char *dest, int stride)
{
int i;
void vp8_dequant_idct_add_dspr2(short *input, short *dq, unsigned char *dest,
int stride) {
int i;
for (i = 0; i < 16; i++)
{
input[i] = dq[i] * input[i];
}
for (i = 0; i < 16; i++) {
input[i] = dq[i] * input[i];
}
vp8_short_idct4x4llm_dspr2(input, dest, stride, dest, stride);
memset(input, 0, 32);
vp8_short_idct4x4llm_dspr2(input, dest, stride, dest, stride);
memset(input, 0, 32);
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -13,76 +13,64 @@
#if HAVE_DSPR2
void vp8_dequant_idct_add_y_block_dspr2
(short *q, short *dq,
unsigned char *dst, int stride, char *eobs)
{
int i, j;
void vp8_dequant_idct_add_y_block_dspr2(short *q, short *dq, unsigned char *dst,
int stride, char *eobs) {
int i, j;
for (i = 0; i < 4; i++)
{
for (j = 0; j < 4; j++)
{
if (*eobs++ > 1)
vp8_dequant_idct_add_dspr2(q, dq, dst, stride);
else
{
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dst, stride, dst, stride);
((int *)q)[0] = 0;
}
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
if (*eobs++ > 1)
vp8_dequant_idct_add_dspr2(q, dq, dst, stride);
else {
vp8_dc_only_idct_add_dspr2(q[0] * dq[0], dst, stride, dst, stride);
((int *)q)[0] = 0;
}
q += 16;
dst += 4;
}
dst += 4 * stride - 16;
q += 16;
dst += 4;
}
dst += 4 * stride - 16;
}
}
void vp8_dequant_idct_add_uv_block_dspr2
(short *q, short *dq,
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
{
int i, j;
void vp8_dequant_idct_add_uv_block_dspr2(short *q, short *dq,
unsigned char *dstu,
unsigned char *dstv, int stride,
char *eobs) {
int i, j;
for (i = 0; i < 2; i++)
{
for (j = 0; j < 2; j++)
{
if (*eobs++ > 1)
vp8_dequant_idct_add_dspr2(q, dq, dstu, stride);
else
{
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstu, stride, dstu, stride);
((int *)q)[0] = 0;
}
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
if (*eobs++ > 1)
vp8_dequant_idct_add_dspr2(q, dq, dstu, stride);
else {
vp8_dc_only_idct_add_dspr2(q[0] * dq[0], dstu, stride, dstu, stride);
((int *)q)[0] = 0;
}
q += 16;
dstu += 4;
}
dstu += 4 * stride - 8;
q += 16;
dstu += 4;
}
for (i = 0; i < 2; i++)
{
for (j = 0; j < 2; j++)
{
if (*eobs++ > 1)
vp8_dequant_idct_add_dspr2(q, dq, dstv, stride);
else
{
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstv, stride, dstv, stride);
((int *)q)[0] = 0;
}
dstu += 4 * stride - 8;
}
q += 16;
dstv += 4;
}
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
if (*eobs++ > 1)
vp8_dequant_idct_add_dspr2(q, dq, dstv, stride);
else {
vp8_dc_only_idct_add_dspr2(q[0] * dq[0], dstv, stride, dstv, stride);
((int *)q)[0] = 0;
}
dstv += 4 * stride - 8;
q += 16;
dstv += 4;
}
dstv += 4 * stride - 8;
}
}
#endif

View File

@ -28,342 +28,319 @@
****************************************************************************/
extern unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH];
static const int cospi8sqrt2minus1 = 20091;
static const int sinpi8sqrt2 = 35468;
static const int sinpi8sqrt2 = 35468;
inline void prefetch_load_short(short *src)
{
__asm__ __volatile__ (
"pref 0, 0(%[src]) \n\t"
:
: [src] "r" (src)
);
inline void prefetch_load_short(short *src) {
__asm__ __volatile__("pref 0, 0(%[src]) \n\t" : : [src] "r"(src));
}
void vp8_short_idct4x4llm_dspr2(short *input, unsigned char *pred_ptr,
int pred_stride, unsigned char *dst_ptr,
int dst_stride)
{
int r, c;
int a1, b1, c1, d1;
short output[16];
short *ip = input;
short *op = output;
int temp1, temp2;
int shortpitch = 4;
int dst_stride) {
int r, c;
int a1, b1, c1, d1;
short output[16];
short *ip = input;
short *op = output;
int temp1, temp2;
int shortpitch = 4;
int c2, d2;
int temp3, temp4;
unsigned char *cm = ff_cropTbl + CROP_WIDTH;
int c2, d2;
int temp3, temp4;
unsigned char *cm = ff_cropTbl + CROP_WIDTH;
/* prepare data for load */
prefetch_load_short(ip + 8);
/* prepare data for load */
prefetch_load_short(ip + 8);
/* first loop is unrolled */
a1 = ip[0] + ip[8];
b1 = ip[0] - ip[8];
/* first loop is unrolled */
a1 = ip[0] + ip[8];
b1 = ip[0] - ip[8];
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
temp3 = (ip[5] * sinpi8sqrt2) >> 16;
temp4 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
c2 = temp3 - temp4;
temp3 = (ip[5] * sinpi8sqrt2) >> 16;
temp4 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
c2 = temp3 - temp4;
temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
temp4 = (ip[13] * sinpi8sqrt2) >> 16;
d2 = temp3 + temp4;
temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
temp4 = (ip[13] * sinpi8sqrt2) >> 16;
d2 = temp3 + temp4;
op[0] = a1 + d1;
op[12] = a1 - d1;
op[4] = b1 + c1;
op[8] = b1 - c1;
op[0] = a1 + d1;
op[12] = a1 - d1;
op[4] = b1 + c1;
op[8] = b1 - c1;
a1 = ip[1] + ip[9];
b1 = ip[1] - ip[9];
a1 = ip[1] + ip[9];
b1 = ip[1] - ip[9];
op[1] = a1 + d2;
op[13] = a1 - d2;
op[5] = b1 + c2;
op[9] = b1 - c2;
op[1] = a1 + d2;
op[13] = a1 - d2;
op[5] = b1 + c2;
op[9] = b1 - c2;
a1 = ip[2] + ip[10];
b1 = ip[2] - ip[10];
a1 = ip[2] + ip[10];
b1 = ip[2] - ip[10];
temp1 = (ip[6] * sinpi8sqrt2) >> 16;
temp2 = ip[14] + ((ip[14] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = (ip[6] * sinpi8sqrt2) >> 16;
temp2 = ip[14] + ((ip[14] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = ip[6] + ((ip[6] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[14] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
temp1 = ip[6] + ((ip[6] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[14] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
temp3 = (ip[7] * sinpi8sqrt2) >> 16;
temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
c2 = temp3 - temp4;
temp3 = (ip[7] * sinpi8sqrt2) >> 16;
temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
c2 = temp3 - temp4;
temp3 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
temp4 = (ip[15] * sinpi8sqrt2) >> 16;
d2 = temp3 + temp4;
temp3 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
temp4 = (ip[15] * sinpi8sqrt2) >> 16;
d2 = temp3 + temp4;
op[2] = a1 + d1;
op[14] = a1 - d1;
op[6] = b1 + c1;
op[10] = b1 - c1;
op[2] = a1 + d1;
op[14] = a1 - d1;
op[6] = b1 + c1;
op[10] = b1 - c1;
a1 = ip[3] + ip[11];
b1 = ip[3] - ip[11];
a1 = ip[3] + ip[11];
b1 = ip[3] - ip[11];
op[3] = a1 + d2;
op[15] = a1 - d2;
op[7] = b1 + c2;
op[11] = b1 - c2;
op[3] = a1 + d2;
op[15] = a1 - d2;
op[7] = b1 + c2;
op[11] = b1 - c2;
ip = output;
ip = output;
/* prepare data for load */
prefetch_load_short(ip + shortpitch);
/* prepare data for load */
prefetch_load_short(ip + shortpitch);
/* second loop is unrolled */
a1 = ip[0] + ip[2];
b1 = ip[0] - ip[2];
/* second loop is unrolled */
a1 = ip[0] + ip[2];
b1 = ip[0] - ip[2];
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
temp3 = (ip[5] * sinpi8sqrt2) >> 16;
temp4 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
c2 = temp3 - temp4;
temp3 = (ip[5] * sinpi8sqrt2) >> 16;
temp4 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
c2 = temp3 - temp4;
temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
temp4 = (ip[7] * sinpi8sqrt2) >> 16;
d2 = temp3 + temp4;
temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
temp4 = (ip[7] * sinpi8sqrt2) >> 16;
d2 = temp3 + temp4;
op[0] = (a1 + d1 + 4) >> 3;
op[3] = (a1 - d1 + 4) >> 3;
op[1] = (b1 + c1 + 4) >> 3;
op[2] = (b1 - c1 + 4) >> 3;
op[0] = (a1 + d1 + 4) >> 3;
op[3] = (a1 - d1 + 4) >> 3;
op[1] = (b1 + c1 + 4) >> 3;
op[2] = (b1 - c1 + 4) >> 3;
a1 = ip[4] + ip[6];
b1 = ip[4] - ip[6];
a1 = ip[4] + ip[6];
b1 = ip[4] - ip[6];
op[4] = (a1 + d2 + 4) >> 3;
op[7] = (a1 - d2 + 4) >> 3;
op[5] = (b1 + c2 + 4) >> 3;
op[6] = (b1 - c2 + 4) >> 3;
op[4] = (a1 + d2 + 4) >> 3;
op[7] = (a1 - d2 + 4) >> 3;
op[5] = (b1 + c2 + 4) >> 3;
op[6] = (b1 - c2 + 4) >> 3;
a1 = ip[8] + ip[10];
b1 = ip[8] - ip[10];
a1 = ip[8] + ip[10];
b1 = ip[8] - ip[10];
temp1 = (ip[9] * sinpi8sqrt2) >> 16;
temp2 = ip[11] + ((ip[11] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = (ip[9] * sinpi8sqrt2) >> 16;
temp2 = ip[11] + ((ip[11] * cospi8sqrt2minus1) >> 16);
c1 = temp1 - temp2;
temp1 = ip[9] + ((ip[9] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[11] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
temp1 = ip[9] + ((ip[9] * cospi8sqrt2minus1) >> 16);
temp2 = (ip[11] * sinpi8sqrt2) >> 16;
d1 = temp1 + temp2;
temp3 = (ip[13] * sinpi8sqrt2) >> 16;
temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
c2 = temp3 - temp4;
temp3 = (ip[13] * sinpi8sqrt2) >> 16;
temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
c2 = temp3 - temp4;
temp3 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
temp4 = (ip[15] * sinpi8sqrt2) >> 16;
d2 = temp3 + temp4;
temp3 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
temp4 = (ip[15] * sinpi8sqrt2) >> 16;
d2 = temp3 + temp4;
op[8] = (a1 + d1 + 4) >> 3;
op[11] = (a1 - d1 + 4) >> 3;
op[9] = (b1 + c1 + 4) >> 3;
op[10] = (b1 - c1 + 4) >> 3;
op[8] = (a1 + d1 + 4) >> 3;
op[11] = (a1 - d1 + 4) >> 3;
op[9] = (b1 + c1 + 4) >> 3;
op[10] = (b1 - c1 + 4) >> 3;
a1 = ip[12] + ip[14];
b1 = ip[12] - ip[14];
a1 = ip[12] + ip[14];
b1 = ip[12] - ip[14];
op[12] = (a1 + d2 + 4) >> 3;
op[15] = (a1 - d2 + 4) >> 3;
op[13] = (b1 + c2 + 4) >> 3;
op[14] = (b1 - c2 + 4) >> 3;
op[12] = (a1 + d2 + 4) >> 3;
op[15] = (a1 - d2 + 4) >> 3;
op[13] = (b1 + c2 + 4) >> 3;
op[14] = (b1 - c2 + 4) >> 3;
ip = output;
ip = output;
for (r = 0; r < 4; r++)
{
for (c = 0; c < 4; c++)
{
short a = ip[c] + pred_ptr[c] ;
dst_ptr[c] = cm[a] ;
}
ip += 4;
dst_ptr += dst_stride;
pred_ptr += pred_stride;
for (r = 0; r < 4; r++) {
for (c = 0; c < 4; c++) {
short a = ip[c] + pred_ptr[c];
dst_ptr[c] = cm[a];
}
ip += 4;
dst_ptr += dst_stride;
pred_ptr += pred_stride;
}
}
void vp8_dc_only_idct_add_dspr2(short input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride)
{
int a1;
int i, absa1;
int t2, vector_a1, vector_a;
void vp8_dc_only_idct_add_dspr2(short input_dc, unsigned char *pred_ptr,
int pred_stride, unsigned char *dst_ptr,
int dst_stride) {
int a1;
int i, absa1;
int t2, vector_a1, vector_a;
/* a1 = ((input_dc + 4) >> 3); */
__asm__ __volatile__ (
"addi %[a1], %[input_dc], 4 \n\t"
"sra %[a1], %[a1], 3 \n\t"
: [a1] "=r" (a1)
: [input_dc] "r" (input_dc)
);
/* a1 = ((input_dc + 4) >> 3); */
__asm__ __volatile__(
"addi %[a1], %[input_dc], 4 \n\t"
"sra %[a1], %[a1], 3 \n\t"
: [a1] "=r"(a1)
: [input_dc] "r"(input_dc));
if (a1 < 0)
{
/* use quad-byte
* input and output memory are four byte aligned
*/
__asm__ __volatile__ (
"abs %[absa1], %[a1] \n\t"
"replv.qb %[vector_a1], %[absa1] \n\t"
: [absa1] "=r" (absa1), [vector_a1] "=r" (vector_a1)
: [a1] "r" (a1)
);
if (a1 < 0) {
/* use quad-byte
* input and output memory are four byte aligned
*/
__asm__ __volatile__(
"abs %[absa1], %[a1] \n\t"
"replv.qb %[vector_a1], %[absa1] \n\t"
: [absa1] "=r"(absa1), [vector_a1] "=r"(vector_a1)
: [a1] "r"(a1));
/* use (a1 - predptr[c]) instead a1 + predptr[c] */
for (i = 4; i--;)
{
__asm__ __volatile__ (
"lw %[t2], 0(%[pred_ptr]) \n\t"
"add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
"subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t"
"sw %[vector_a], 0(%[dst_ptr]) \n\t"
"add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
: [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),
[dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr)
: [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1)
);
}
/* use (a1 - predptr[c]) instead a1 + predptr[c] */
for (i = 4; i--;) {
__asm__ __volatile__(
"lw %[t2], 0(%[pred_ptr]) \n\t"
"add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
"subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t"
"sw %[vector_a], 0(%[dst_ptr]) \n\t"
"add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
: [t2] "=&r"(t2), [vector_a] "=&r"(vector_a),
[dst_ptr] "+&r"(dst_ptr), [pred_ptr] "+&r"(pred_ptr)
: [dst_stride] "r"(dst_stride), [pred_stride] "r"(pred_stride),
[vector_a1] "r"(vector_a1));
}
else
{
/* use quad-byte
* input and output memory are four byte aligned
*/
__asm__ __volatile__ (
"replv.qb %[vector_a1], %[a1] \n\t"
: [vector_a1] "=r" (vector_a1)
: [a1] "r" (a1)
);
} else {
/* use quad-byte
* input and output memory are four byte aligned
*/
__asm__ __volatile__("replv.qb %[vector_a1], %[a1] \n\t"
: [vector_a1] "=r"(vector_a1)
: [a1] "r"(a1));
for (i = 4; i--;)
{
__asm__ __volatile__ (
"lw %[t2], 0(%[pred_ptr]) \n\t"
"add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
"addu_s.qb %[vector_a], %[vector_a1], %[t2] \n\t"
"sw %[vector_a], 0(%[dst_ptr]) \n\t"
"add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
: [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),
[dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr)
: [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1)
);
}
for (i = 4; i--;) {
__asm__ __volatile__(
"lw %[t2], 0(%[pred_ptr]) \n\t"
"add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
"addu_s.qb %[vector_a], %[vector_a1], %[t2] \n\t"
"sw %[vector_a], 0(%[dst_ptr]) \n\t"
"add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
: [t2] "=&r"(t2), [vector_a] "=&r"(vector_a),
[dst_ptr] "+&r"(dst_ptr), [pred_ptr] "+&r"(pred_ptr)
: [dst_stride] "r"(dst_stride), [pred_stride] "r"(pred_stride),
[vector_a1] "r"(vector_a1));
}
}
}
void vp8_short_inv_walsh4x4_dspr2(short *input, short *mb_dqcoeff)
{
short output[16];
int i;
int a1, b1, c1, d1;
int a2, b2, c2, d2;
short *ip = input;
short *op = output;
void vp8_short_inv_walsh4x4_dspr2(short *input, short *mb_dqcoeff) {
short output[16];
int i;
int a1, b1, c1, d1;
int a2, b2, c2, d2;
short *ip = input;
short *op = output;
prefetch_load_short(ip);
prefetch_load_short(ip);
for (i = 4; i--;)
{
a1 = ip[0] + ip[12];
b1 = ip[4] + ip[8];
c1 = ip[4] - ip[8];
d1 = ip[0] - ip[12];
for (i = 4; i--;) {
a1 = ip[0] + ip[12];
b1 = ip[4] + ip[8];
c1 = ip[4] - ip[8];
d1 = ip[0] - ip[12];
op[0] = a1 + b1;
op[4] = c1 + d1;
op[8] = a1 - b1;
op[12] = d1 - c1;
op[0] = a1 + b1;
op[4] = c1 + d1;
op[8] = a1 - b1;
op[12] = d1 - c1;
ip++;
op++;
}
ip++;
op++;
}
ip = output;
op = output;
ip = output;
op = output;
prefetch_load_short(ip);
prefetch_load_short(ip);
for (i = 4; i--;)
{
a1 = ip[0] + ip[3] + 3;
b1 = ip[1] + ip[2];
c1 = ip[1] - ip[2];
d1 = ip[0] - ip[3] + 3;
for (i = 4; i--;) {
a1 = ip[0] + ip[3] + 3;
b1 = ip[1] + ip[2];
c1 = ip[1] - ip[2];
d1 = ip[0] - ip[3] + 3;
a2 = a1 + b1;
b2 = d1 + c1;
c2 = a1 - b1;
d2 = d1 - c1;
a2 = a1 + b1;
b2 = d1 + c1;
c2 = a1 - b1;
d2 = d1 - c1;
op[0] = a2 >> 3;
op[1] = b2 >> 3;
op[2] = c2 >> 3;
op[3] = d2 >> 3;
op[0] = a2 >> 3;
op[1] = b2 >> 3;
op[2] = c2 >> 3;
op[3] = d2 >> 3;
ip += 4;
op += 4;
}
ip += 4;
op += 4;
}
for (i = 0; i < 16; i++)
{
mb_dqcoeff[i * 16] = output[i];
}
for (i = 0; i < 16; i++) {
mb_dqcoeff[i * 16] = output[i];
}
}
void vp8_short_inv_walsh4x4_1_dspr2(short *input, short *mb_dqcoeff)
{
int a1;
void vp8_short_inv_walsh4x4_1_dspr2(short *input, short *mb_dqcoeff) {
int a1;
a1 = ((input[0] + 3) >> 3);
a1 = ((input[0] + 3) >> 3);
__asm__ __volatile__ (
"sh %[a1], 0(%[mb_dqcoeff]) \n\t"
"sh %[a1], 32(%[mb_dqcoeff]) \n\t"
"sh %[a1], 64(%[mb_dqcoeff]) \n\t"
"sh %[a1], 96(%[mb_dqcoeff]) \n\t"
"sh %[a1], 128(%[mb_dqcoeff]) \n\t"
"sh %[a1], 160(%[mb_dqcoeff]) \n\t"
"sh %[a1], 192(%[mb_dqcoeff]) \n\t"
"sh %[a1], 224(%[mb_dqcoeff]) \n\t"
"sh %[a1], 256(%[mb_dqcoeff]) \n\t"
"sh %[a1], 288(%[mb_dqcoeff]) \n\t"
"sh %[a1], 320(%[mb_dqcoeff]) \n\t"
"sh %[a1], 352(%[mb_dqcoeff]) \n\t"
"sh %[a1], 384(%[mb_dqcoeff]) \n\t"
"sh %[a1], 416(%[mb_dqcoeff]) \n\t"
"sh %[a1], 448(%[mb_dqcoeff]) \n\t"
"sh %[a1], 480(%[mb_dqcoeff]) \n\t"
__asm__ __volatile__(
"sh %[a1], 0(%[mb_dqcoeff]) \n\t"
"sh %[a1], 32(%[mb_dqcoeff]) \n\t"
"sh %[a1], 64(%[mb_dqcoeff]) \n\t"
"sh %[a1], 96(%[mb_dqcoeff]) \n\t"
"sh %[a1], 128(%[mb_dqcoeff]) \n\t"
"sh %[a1], 160(%[mb_dqcoeff]) \n\t"
"sh %[a1], 192(%[mb_dqcoeff]) \n\t"
"sh %[a1], 224(%[mb_dqcoeff]) \n\t"
"sh %[a1], 256(%[mb_dqcoeff]) \n\t"
"sh %[a1], 288(%[mb_dqcoeff]) \n\t"
"sh %[a1], 320(%[mb_dqcoeff]) \n\t"
"sh %[a1], 352(%[mb_dqcoeff]) \n\t"
"sh %[a1], 384(%[mb_dqcoeff]) \n\t"
"sh %[a1], 416(%[mb_dqcoeff]) \n\t"
"sh %[a1], 448(%[mb_dqcoeff]) \n\t"
"sh %[a1], 480(%[mb_dqcoeff]) \n\t"
:
: [a1] "r" (a1), [mb_dqcoeff] "r" (mb_dqcoeff)
);
:
: [a1] "r"(a1), [mb_dqcoeff] "r"(mb_dqcoeff));
}
#endif

View File

@ -8,114 +8,90 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vpx/vpx_integer.h"
#if HAVE_DSPR2
inline void prefetch_load_int(unsigned char *src)
{
__asm__ __volatile__ (
"pref 0, 0(%[src]) \n\t"
:
: [src] "r" (src)
);
inline void prefetch_load_int(unsigned char *src) {
__asm__ __volatile__("pref 0, 0(%[src]) \n\t" : : [src] "r"(src));
}
__inline void vp8_copy_mem16x16_dspr2(unsigned char *RESTRICT src,
int src_stride,
unsigned char *RESTRICT dst,
int dst_stride) {
int r;
unsigned int a0, a1, a2, a3;
__inline void vp8_copy_mem16x16_dspr2(
unsigned char *RESTRICT src,
int src_stride,
unsigned char *RESTRICT dst,
int dst_stride)
{
int r;
unsigned int a0, a1, a2, a3;
for (r = 16; r--;)
{
/* load src data in cache memory */
prefetch_load_int(src + src_stride);
/* use unaligned memory load and store */
__asm__ __volatile__ (
"ulw %[a0], 0(%[src]) \n\t"
"ulw %[a1], 4(%[src]) \n\t"
"ulw %[a2], 8(%[src]) \n\t"
"ulw %[a3], 12(%[src]) \n\t"
"sw %[a0], 0(%[dst]) \n\t"
"sw %[a1], 4(%[dst]) \n\t"
"sw %[a2], 8(%[dst]) \n\t"
"sw %[a3], 12(%[dst]) \n\t"
: [a0] "=&r" (a0), [a1] "=&r" (a1),
[a2] "=&r" (a2), [a3] "=&r" (a3)
: [src] "r" (src), [dst] "r" (dst)
);
src += src_stride;
dst += dst_stride;
}
}
__inline void vp8_copy_mem8x8_dspr2(
unsigned char *RESTRICT src,
int src_stride,
unsigned char *RESTRICT dst,
int dst_stride)
{
int r;
unsigned int a0, a1;
for (r = 16; r--;) {
/* load src data in cache memory */
prefetch_load_int(src + src_stride);
for (r = 8; r--;)
{
/* use unaligned memory load and store */
__asm__ __volatile__ (
"ulw %[a0], 0(%[src]) \n\t"
"ulw %[a1], 4(%[src]) \n\t"
"sw %[a0], 0(%[dst]) \n\t"
"sw %[a1], 4(%[dst]) \n\t"
: [a0] "=&r" (a0), [a1] "=&r" (a1)
: [src] "r" (src), [dst] "r" (dst)
);
/* use unaligned memory load and store */
__asm__ __volatile__(
"ulw %[a0], 0(%[src]) \n\t"
"ulw %[a1], 4(%[src]) \n\t"
"ulw %[a2], 8(%[src]) \n\t"
"ulw %[a3], 12(%[src]) \n\t"
"sw %[a0], 0(%[dst]) \n\t"
"sw %[a1], 4(%[dst]) \n\t"
"sw %[a2], 8(%[dst]) \n\t"
"sw %[a3], 12(%[dst]) \n\t"
: [a0] "=&r"(a0), [a1] "=&r"(a1), [a2] "=&r"(a2), [a3] "=&r"(a3)
: [src] "r"(src), [dst] "r"(dst));
src += src_stride;
dst += dst_stride;
}
src += src_stride;
dst += dst_stride;
}
}
__inline void vp8_copy_mem8x8_dspr2(unsigned char *RESTRICT src, int src_stride,
unsigned char *RESTRICT dst,
int dst_stride) {
int r;
unsigned int a0, a1;
__inline void vp8_copy_mem8x4_dspr2(
unsigned char *RESTRICT src,
int src_stride,
unsigned char *RESTRICT dst,
int dst_stride)
{
int r;
unsigned int a0, a1;
/* load src data in cache memory */
prefetch_load_int(src + src_stride);
/* load src data in cache memory */
prefetch_load_int(src + src_stride);
for (r = 8; r--;) {
/* use unaligned memory load and store */
__asm__ __volatile__(
"ulw %[a0], 0(%[src]) \n\t"
"ulw %[a1], 4(%[src]) \n\t"
"sw %[a0], 0(%[dst]) \n\t"
"sw %[a1], 4(%[dst]) \n\t"
: [a0] "=&r"(a0), [a1] "=&r"(a1)
: [src] "r"(src), [dst] "r"(dst));
for (r = 4; r--;)
{
/* use unaligned memory load and store */
__asm__ __volatile__ (
"ulw %[a0], 0(%[src]) \n\t"
"ulw %[a1], 4(%[src]) \n\t"
"sw %[a0], 0(%[dst]) \n\t"
"sw %[a1], 4(%[dst]) \n\t"
: [a0] "=&r" (a0), [a1] "=&r" (a1)
: [src] "r" (src), [dst] "r" (dst)
);
src += src_stride;
dst += dst_stride;
}
}
src += src_stride;
dst += dst_stride;
}
__inline void vp8_copy_mem8x4_dspr2(unsigned char *RESTRICT src, int src_stride,
unsigned char *RESTRICT dst,
int dst_stride) {
int r;
unsigned int a0, a1;
/* load src data in cache memory */
prefetch_load_int(src + src_stride);
for (r = 4; r--;) {
/* use unaligned memory load and store */
__asm__ __volatile__(
"ulw %[a0], 0(%[src]) \n\t"
"ulw %[a1], 4(%[src]) \n\t"
"sw %[a0], 0(%[dst]) \n\t"
"sw %[a1], 4(%[dst]) \n\t"
: [a0] "=&r"(a0), [a1] "=&r"(a1)
: [src] "r"(src), [dst] "r"(dst));
src += src_stride;
dst += dst_stride;
}
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -11,60 +11,52 @@
#include "./vp8_rtcd.h"
#include "vp8/common/mips/msa/vp8_macros_msa.h"
static void copy_8x4_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
uint64_t src0, src1, src2, src3;
static void copy_8x4_msa(uint8_t *src, int32_t src_stride, uint8_t *dst,
int32_t dst_stride) {
uint64_t src0, src1, src2, src3;
LD4(src, src_stride, src0, src1, src2, src3);
SD4(src0, src1, src2, src3, dst, dst_stride);
LD4(src, src_stride, src0, src1, src2, src3);
SD4(src0, src1, src2, src3, dst, dst_stride);
}
static void copy_8x8_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
uint64_t src0, src1, src2, src3;
static void copy_8x8_msa(uint8_t *src, int32_t src_stride, uint8_t *dst,
int32_t dst_stride) {
uint64_t src0, src1, src2, src3;
LD4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
SD4(src0, src1, src2, src3, dst, dst_stride);
dst += (4 * dst_stride);
LD4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
SD4(src0, src1, src2, src3, dst, dst_stride);
dst += (4 * dst_stride);
LD4(src, src_stride, src0, src1, src2, src3);
SD4(src0, src1, src2, src3, dst, dst_stride);
LD4(src, src_stride, src0, src1, src2, src3);
SD4(src0, src1, src2, src3, dst, dst_stride);
}
static void copy_16x16_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
static void copy_16x16_msa(uint8_t *src, int32_t src_stride, uint8_t *dst,
int32_t dst_stride) {
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
src += (8 * src_stride);
LD_UB8(src, src_stride, src8, src9, src10, src11, src12, src13, src14,
src15);
LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
src += (8 * src_stride);
LD_UB8(src, src_stride, src8, src9, src10, src11, src12, src13, src14, src15);
ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
dst += (8 * dst_stride);
ST_UB8(src8, src9, src10, src11, src12, src13, src14, src15, dst,
dst_stride);
ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
dst += (8 * dst_stride);
ST_UB8(src8, src9, src10, src11, src12, src13, src14, src15, dst, dst_stride);
}
void vp8_copy_mem16x16_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
copy_16x16_msa(src, src_stride, dst, dst_stride);
void vp8_copy_mem16x16_msa(uint8_t *src, int32_t src_stride, uint8_t *dst,
int32_t dst_stride) {
copy_16x16_msa(src, src_stride, dst, dst_stride);
}
void vp8_copy_mem8x8_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
copy_8x8_msa(src, src_stride, dst, dst_stride);
void vp8_copy_mem8x8_msa(uint8_t *src, int32_t src_stride, uint8_t *dst,
int32_t dst_stride) {
copy_8x8_msa(src, src_stride, dst, dst_stride);
}
void vp8_copy_mem8x4_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
copy_8x4_msa(src, src_stride, dst, dst_stride);
void vp8_copy_mem8x4_msa(uint8_t *src, int32_t src_stride, uint8_t *dst,
int32_t dst_stride) {
copy_8x4_msa(src, src_stride, dst, dst_stride);
}

View File

@ -15,443 +15,401 @@
static const int32_t cospi8sqrt2minus1 = 20091;
static const int32_t sinpi8sqrt2 = 35468;
#define TRANSPOSE_TWO_4x4_H(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
v8i16 s4_m, s5_m, s6_m, s7_m; \
\
TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, s4_m, s5_m, s6_m, s7_m); \
ILVR_D2_SH(s6_m, s4_m, s7_m, s5_m, out0, out2); \
out1 = (v8i16)__msa_ilvl_d((v2i64)s6_m, (v2i64)s4_m); \
out3 = (v8i16)__msa_ilvl_d((v2i64)s7_m, (v2i64)s5_m); \
}
#define TRANSPOSE_TWO_4x4_H(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
v8i16 s4_m, s5_m, s6_m, s7_m; \
\
TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, s4_m, s5_m, s6_m, s7_m); \
ILVR_D2_SH(s6_m, s4_m, s7_m, s5_m, out0, out2); \
out1 = (v8i16)__msa_ilvl_d((v2i64)s6_m, (v2i64)s4_m); \
out3 = (v8i16)__msa_ilvl_d((v2i64)s7_m, (v2i64)s5_m); \
}
#define EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in) \
({ \
v8i16 out_m; \
v8i16 zero_m = { 0 }; \
v4i32 tmp1_m, tmp2_m; \
v4i32 sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \
\
ILVRL_H2_SW(in, zero_m, tmp1_m, tmp2_m); \
tmp1_m >>= 16; \
tmp2_m >>= 16; \
tmp1_m = (tmp1_m * sinpi8_sqrt2_m) >> 16; \
tmp2_m = (tmp2_m * sinpi8_sqrt2_m) >> 16; \
out_m = __msa_pckev_h((v8i16)tmp2_m, (v8i16)tmp1_m); \
\
out_m; \
})
#define EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in) \
({ \
v8i16 out_m; \
v8i16 zero_m = { 0 }; \
v4i32 tmp1_m, tmp2_m; \
v4i32 sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \
\
ILVRL_H2_SW(in, zero_m, tmp1_m, tmp2_m); \
tmp1_m >>= 16; \
tmp2_m >>= 16; \
tmp1_m = (tmp1_m * sinpi8_sqrt2_m) >> 16; \
tmp2_m = (tmp2_m * sinpi8_sqrt2_m) >> 16; \
out_m = __msa_pckev_h((v8i16)tmp2_m, (v8i16)tmp1_m); \
\
out_m; \
})
#define VP8_IDCT_1D_H(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
v8i16 a1_m, b1_m, c1_m, d1_m; \
v8i16 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
v8i16 const_cospi8sqrt2minus1_m; \
\
const_cospi8sqrt2minus1_m = __msa_fill_h(cospi8sqrt2minus1); \
a1_m = in0 + in2; \
b1_m = in0 - in2; \
c_tmp1_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in1); \
c_tmp2_m = __msa_mul_q_h(in3, const_cospi8sqrt2minus1_m); \
c_tmp2_m = c_tmp2_m >> 1; \
c_tmp2_m = in3 + c_tmp2_m; \
c1_m = c_tmp1_m - c_tmp2_m; \
d_tmp1_m = __msa_mul_q_h(in1, const_cospi8sqrt2minus1_m); \
d_tmp1_m = d_tmp1_m >> 1; \
d_tmp1_m = in1 + d_tmp1_m; \
d_tmp2_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in3); \
d1_m = d_tmp1_m + d_tmp2_m; \
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
}
#define VP8_IDCT_1D_H(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
v8i16 a1_m, b1_m, c1_m, d1_m; \
v8i16 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
v8i16 const_cospi8sqrt2minus1_m; \
\
const_cospi8sqrt2minus1_m = __msa_fill_h(cospi8sqrt2minus1); \
a1_m = in0 + in2; \
b1_m = in0 - in2; \
c_tmp1_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in1); \
c_tmp2_m = __msa_mul_q_h(in3, const_cospi8sqrt2minus1_m); \
c_tmp2_m = c_tmp2_m >> 1; \
c_tmp2_m = in3 + c_tmp2_m; \
c1_m = c_tmp1_m - c_tmp2_m; \
d_tmp1_m = __msa_mul_q_h(in1, const_cospi8sqrt2minus1_m); \
d_tmp1_m = d_tmp1_m >> 1; \
d_tmp1_m = in1 + d_tmp1_m; \
d_tmp2_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in3); \
d1_m = d_tmp1_m + d_tmp2_m; \
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
}
#define VP8_IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
v4i32 a1_m, b1_m, c1_m, d1_m; \
v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
v4i32 const_cospi8sqrt2minus1_m, sinpi8_sqrt2_m; \
\
const_cospi8sqrt2minus1_m = __msa_fill_w(cospi8sqrt2minus1); \
sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \
a1_m = in0 + in2; \
b1_m = in0 - in2; \
c_tmp1_m = (in1 * sinpi8_sqrt2_m) >> 16; \
c_tmp2_m = in3 + ((in3 * const_cospi8sqrt2minus1_m) >> 16); \
c1_m = c_tmp1_m - c_tmp2_m; \
d_tmp1_m = in1 + ((in1 * const_cospi8sqrt2minus1_m) >> 16); \
d_tmp2_m = (in3 * sinpi8_sqrt2_m) >> 16; \
d1_m = d_tmp1_m + d_tmp2_m; \
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
}
#define VP8_IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
v4i32 a1_m, b1_m, c1_m, d1_m; \
v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
v4i32 const_cospi8sqrt2minus1_m, sinpi8_sqrt2_m; \
\
const_cospi8sqrt2minus1_m = __msa_fill_w(cospi8sqrt2minus1); \
sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \
a1_m = in0 + in2; \
b1_m = in0 - in2; \
c_tmp1_m = (in1 * sinpi8_sqrt2_m) >> 16; \
c_tmp2_m = in3 + ((in3 * const_cospi8sqrt2minus1_m) >> 16); \
c1_m = c_tmp1_m - c_tmp2_m; \
d_tmp1_m = in1 + ((in1 * const_cospi8sqrt2minus1_m) >> 16); \
d_tmp2_m = (in3 * sinpi8_sqrt2_m) >> 16; \
d1_m = d_tmp1_m + d_tmp2_m; \
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
}
static void idct4x4_addblk_msa(int16_t *input, uint8_t *pred,
int32_t pred_stride,
uint8_t *dest, int32_t dest_stride)
{
v8i16 input0, input1;
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
v4i32 res0, res1, res2, res3;
v16i8 zero = { 0 };
v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31 };
int32_t pred_stride, uint8_t *dest,
int32_t dest_stride) {
v8i16 input0, input1;
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
v4i32 res0, res1, res2, res3;
v16i8 zero = { 0 };
v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
LD_SH2(input, 8, input0, input1);
UNPCK_SH_SW(input0, in0, in1);
UNPCK_SH_SW(input1, in2, in3);
VP8_IDCT_1D_W(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
VP8_IDCT_1D_W(hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3);
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3);
ILVR_B4_SW(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1,
res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1,
res2, res3);
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
res0 = CLIP_SW_0_255(res0);
res1 = CLIP_SW_0_255(res1);
res2 = CLIP_SW_0_255(res2);
res3 = CLIP_SW_0_255(res3);
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
LD_SH2(input, 8, input0, input1);
UNPCK_SH_SW(input0, in0, in1);
UNPCK_SH_SW(input1, in2, in3);
VP8_IDCT_1D_W(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
VP8_IDCT_1D_W(hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3);
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3);
ILVR_B4_SW(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1,
res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2,
res3);
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
res0 = CLIP_SW_0_255(res0);
res1 = CLIP_SW_0_255(res1);
res2 = CLIP_SW_0_255(res2);
res3 = CLIP_SW_0_255(res3);
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
}
static void idct4x4_addconst_msa(int16_t in_dc, uint8_t *pred,
int32_t pred_stride,
uint8_t *dest, int32_t dest_stride)
{
v8i16 vec;
v8i16 res0, res1, res2, res3;
v16i8 zero = { 0 };
v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
v16i8 mask = { 0, 2, 4, 6, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
int32_t pred_stride, uint8_t *dest,
int32_t dest_stride) {
v8i16 vec;
v8i16 res0, res1, res2, res3;
v16i8 zero = { 0 };
v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
v16i8 mask = { 0, 2, 4, 6, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
vec = __msa_fill_h(in_dc);
vec = __msa_srari_h(vec, 3);
LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3);
ILVR_B4_SH(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1,
res2, res3);
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
CLIP_SH4_0_255(res0, res1, res2, res3);
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
vec = __msa_fill_h(in_dc);
vec = __msa_srari_h(vec, 3);
LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3);
ILVR_B4_SH(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1,
res2, res3);
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
CLIP_SH4_0_255(res0, res1, res2, res3);
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
}
void vp8_short_inv_walsh4x4_msa(int16_t *input, int16_t *mb_dq_coeff)
{
v8i16 input0, input1;
v4i32 in0, in1, in2, in3, a1, b1, c1, d1;
v4i32 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
void vp8_short_inv_walsh4x4_msa(int16_t *input, int16_t *mb_dq_coeff) {
v8i16 input0, input1;
v4i32 in0, in1, in2, in3, a1, b1, c1, d1;
v4i32 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
LD_SH2(input, 8, input0, input1);
UNPCK_SH_SW(input0, in0, in1);
UNPCK_SH_SW(input1, in2, in3);
BUTTERFLY_4(in0, in1, in2, in3, a1, b1, c1, d1);
BUTTERFLY_4(a1, d1, c1, b1, hz0, hz1, hz3, hz2);
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
BUTTERFLY_4(hz0, hz1, hz2, hz3, a1, b1, c1, d1);
BUTTERFLY_4(a1, d1, c1, b1, vt0, vt1, vt3, vt2);
ADD4(vt0, 3, vt1, 3, vt2, 3, vt3, 3, vt0, vt1, vt2, vt3);
SRA_4V(vt0, vt1, vt2, vt3, 3);
mb_dq_coeff[0] = __msa_copy_s_h((v8i16)vt0, 0);
mb_dq_coeff[16] = __msa_copy_s_h((v8i16)vt1, 0);
mb_dq_coeff[32] = __msa_copy_s_h((v8i16)vt2, 0);
mb_dq_coeff[48] = __msa_copy_s_h((v8i16)vt3, 0);
mb_dq_coeff[64] = __msa_copy_s_h((v8i16)vt0, 2);
mb_dq_coeff[80] = __msa_copy_s_h((v8i16)vt1, 2);
mb_dq_coeff[96] = __msa_copy_s_h((v8i16)vt2, 2);
mb_dq_coeff[112] = __msa_copy_s_h((v8i16)vt3, 2);
mb_dq_coeff[128] = __msa_copy_s_h((v8i16)vt0, 4);
mb_dq_coeff[144] = __msa_copy_s_h((v8i16)vt1, 4);
mb_dq_coeff[160] = __msa_copy_s_h((v8i16)vt2, 4);
mb_dq_coeff[176] = __msa_copy_s_h((v8i16)vt3, 4);
mb_dq_coeff[192] = __msa_copy_s_h((v8i16)vt0, 6);
mb_dq_coeff[208] = __msa_copy_s_h((v8i16)vt1, 6);
mb_dq_coeff[224] = __msa_copy_s_h((v8i16)vt2, 6);
mb_dq_coeff[240] = __msa_copy_s_h((v8i16)vt3, 6);
LD_SH2(input, 8, input0, input1);
UNPCK_SH_SW(input0, in0, in1);
UNPCK_SH_SW(input1, in2, in3);
BUTTERFLY_4(in0, in1, in2, in3, a1, b1, c1, d1);
BUTTERFLY_4(a1, d1, c1, b1, hz0, hz1, hz3, hz2);
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
BUTTERFLY_4(hz0, hz1, hz2, hz3, a1, b1, c1, d1);
BUTTERFLY_4(a1, d1, c1, b1, vt0, vt1, vt3, vt2);
ADD4(vt0, 3, vt1, 3, vt2, 3, vt3, 3, vt0, vt1, vt2, vt3);
SRA_4V(vt0, vt1, vt2, vt3, 3);
mb_dq_coeff[0] = __msa_copy_s_h((v8i16)vt0, 0);
mb_dq_coeff[16] = __msa_copy_s_h((v8i16)vt1, 0);
mb_dq_coeff[32] = __msa_copy_s_h((v8i16)vt2, 0);
mb_dq_coeff[48] = __msa_copy_s_h((v8i16)vt3, 0);
mb_dq_coeff[64] = __msa_copy_s_h((v8i16)vt0, 2);
mb_dq_coeff[80] = __msa_copy_s_h((v8i16)vt1, 2);
mb_dq_coeff[96] = __msa_copy_s_h((v8i16)vt2, 2);
mb_dq_coeff[112] = __msa_copy_s_h((v8i16)vt3, 2);
mb_dq_coeff[128] = __msa_copy_s_h((v8i16)vt0, 4);
mb_dq_coeff[144] = __msa_copy_s_h((v8i16)vt1, 4);
mb_dq_coeff[160] = __msa_copy_s_h((v8i16)vt2, 4);
mb_dq_coeff[176] = __msa_copy_s_h((v8i16)vt3, 4);
mb_dq_coeff[192] = __msa_copy_s_h((v8i16)vt0, 6);
mb_dq_coeff[208] = __msa_copy_s_h((v8i16)vt1, 6);
mb_dq_coeff[224] = __msa_copy_s_h((v8i16)vt2, 6);
mb_dq_coeff[240] = __msa_copy_s_h((v8i16)vt3, 6);
}
static void dequant_idct4x4_addblk_msa(int16_t *input, int16_t *dequant_input,
uint8_t *dest, int32_t dest_stride)
{
v8i16 input0, input1, dequant_in0, dequant_in1, mul0, mul1;
v8i16 in0, in1, in2, in3;
v8i16 hz0_h, hz1_h, hz2_h, hz3_h;
v16i8 dest0, dest1, dest2, dest3;
v4i32 hz0_w, hz1_w, hz2_w, hz3_w;
v4i32 vt0, vt1, vt2, vt3, res0, res1, res2, res3;
v2i64 zero = { 0 };
v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31 };
uint8_t *dest, int32_t dest_stride) {
v8i16 input0, input1, dequant_in0, dequant_in1, mul0, mul1;
v8i16 in0, in1, in2, in3;
v8i16 hz0_h, hz1_h, hz2_h, hz3_h;
v16i8 dest0, dest1, dest2, dest3;
v4i32 hz0_w, hz1_w, hz2_w, hz3_w;
v4i32 vt0, vt1, vt2, vt3, res0, res1, res2, res3;
v2i64 zero = { 0 };
v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
LD_SH2(input, 8, input0, input1);
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
MUL2(input0, dequant_in0, input1, dequant_in1, mul0, mul1);
PCKEV_D2_SH(zero, mul0, zero, mul1, in0, in2);
PCKOD_D2_SH(zero, mul0, zero, mul1, in1, in3);
VP8_IDCT_1D_H(in0, in1, in2, in3, hz0_h, hz1_h, hz2_h, hz3_h);
PCKEV_D2_SH(hz1_h, hz0_h, hz3_h, hz2_h, mul0, mul1);
UNPCK_SH_SW(mul0, hz0_w, hz1_w);
UNPCK_SH_SW(mul1, hz2_w, hz3_w);
TRANSPOSE4x4_SW_SW(hz0_w, hz1_w, hz2_w, hz3_w, hz0_w, hz1_w, hz2_w, hz3_w);
VP8_IDCT_1D_W(hz0_w, hz1_w, hz2_w, hz3_w, vt0, vt1, vt2, vt3);
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1,
res2, res3);
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
res0 = CLIP_SW_0_255(res0);
res1 = CLIP_SW_0_255(res1);
res2 = CLIP_SW_0_255(res2);
res3 = CLIP_SW_0_255(res3);
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
LD_SH2(input, 8, input0, input1);
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
MUL2(input0, dequant_in0, input1, dequant_in1, mul0, mul1);
PCKEV_D2_SH(zero, mul0, zero, mul1, in0, in2);
PCKOD_D2_SH(zero, mul0, zero, mul1, in1, in3);
VP8_IDCT_1D_H(in0, in1, in2, in3, hz0_h, hz1_h, hz2_h, hz3_h);
PCKEV_D2_SH(hz1_h, hz0_h, hz3_h, hz2_h, mul0, mul1);
UNPCK_SH_SW(mul0, hz0_w, hz1_w);
UNPCK_SH_SW(mul1, hz2_w, hz3_w);
TRANSPOSE4x4_SW_SW(hz0_w, hz1_w, hz2_w, hz3_w, hz0_w, hz1_w, hz2_w, hz3_w);
VP8_IDCT_1D_W(hz0_w, hz1_w, hz2_w, hz3_w, vt0, vt1, vt2, vt3);
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2,
res3);
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
res0 = CLIP_SW_0_255(res0);
res1 = CLIP_SW_0_255(res1);
res2 = CLIP_SW_0_255(res2);
res3 = CLIP_SW_0_255(res3);
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
}
static void dequant_idct4x4_addblk_2x_msa(int16_t *input,
int16_t *dequant_input,
uint8_t *dest, int32_t dest_stride)
{
v16u8 dest0, dest1, dest2, dest3;
v8i16 in0, in1, in2, in3;
v8i16 mul0, mul1, mul2, mul3, dequant_in0, dequant_in1;
v8i16 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
v8i16 res0, res1, res2, res3;
v4i32 hz0l, hz1l, hz2l, hz3l, hz0r, hz1r, hz2r, hz3r;
v4i32 vt0l, vt1l, vt2l, vt3l, vt0r, vt1r, vt2r, vt3r;
v16i8 zero = { 0 };
int16_t *dequant_input, uint8_t *dest,
int32_t dest_stride) {
v16u8 dest0, dest1, dest2, dest3;
v8i16 in0, in1, in2, in3;
v8i16 mul0, mul1, mul2, mul3, dequant_in0, dequant_in1;
v8i16 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
v8i16 res0, res1, res2, res3;
v4i32 hz0l, hz1l, hz2l, hz3l, hz0r, hz1r, hz2r, hz3r;
v4i32 vt0l, vt1l, vt2l, vt3l, vt0r, vt1r, vt2r, vt3r;
v16i8 zero = { 0 };
LD_SH4(input, 8, in0, in1, in2, in3);
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
MUL4(in0, dequant_in0, in1, dequant_in1, in2, dequant_in0, in3, dequant_in1,
mul0, mul1, mul2, mul3);
PCKEV_D2_SH(mul2, mul0, mul3, mul1, in0, in2);
PCKOD_D2_SH(mul2, mul0, mul3, mul1, in1, in3);
VP8_IDCT_1D_H(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
TRANSPOSE_TWO_4x4_H(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
UNPCK_SH_SW(hz0, hz0r, hz0l);
UNPCK_SH_SW(hz1, hz1r, hz1l);
UNPCK_SH_SW(hz2, hz2r, hz2l);
UNPCK_SH_SW(hz3, hz3r, hz3l);
VP8_IDCT_1D_W(hz0l, hz1l, hz2l, hz3l, vt0l, vt1l, vt2l, vt3l);
SRARI_W4_SW(vt0l, vt1l, vt2l, vt3l, 3);
VP8_IDCT_1D_W(hz0r, hz1r, hz2r, hz3r, vt0r, vt1r, vt2r, vt3r);
SRARI_W4_SW(vt0r, vt1r, vt2r, vt3r, 3);
PCKEV_H4_SH(vt0l, vt0r, vt1l, vt1r, vt2l, vt2r, vt3l, vt3r, vt0, vt1, vt2,
vt3);
TRANSPOSE_TWO_4x4_H(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
res2, res3);
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
CLIP_SH4_0_255(res0, res1, res2, res3);
PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1,
res2, res3);
PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
LD_SH4(input, 8, in0, in1, in2, in3);
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
MUL4(in0, dequant_in0, in1, dequant_in1, in2, dequant_in0, in3, dequant_in1,
mul0, mul1, mul2, mul3);
PCKEV_D2_SH(mul2, mul0, mul3, mul1, in0, in2);
PCKOD_D2_SH(mul2, mul0, mul3, mul1, in1, in3);
VP8_IDCT_1D_H(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
TRANSPOSE_TWO_4x4_H(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
UNPCK_SH_SW(hz0, hz0r, hz0l);
UNPCK_SH_SW(hz1, hz1r, hz1l);
UNPCK_SH_SW(hz2, hz2r, hz2l);
UNPCK_SH_SW(hz3, hz3r, hz3l);
VP8_IDCT_1D_W(hz0l, hz1l, hz2l, hz3l, vt0l, vt1l, vt2l, vt3l);
SRARI_W4_SW(vt0l, vt1l, vt2l, vt3l, 3);
VP8_IDCT_1D_W(hz0r, hz1r, hz2r, hz3r, vt0r, vt1r, vt2r, vt3r);
SRARI_W4_SW(vt0r, vt1r, vt2r, vt3r, 3);
PCKEV_H4_SH(vt0l, vt0r, vt1l, vt1r, vt2l, vt2r, vt3l, vt3r, vt0, vt1, vt2,
vt3);
TRANSPOSE_TWO_4x4_H(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
res2, res3);
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
CLIP_SH4_0_255(res0, res1, res2, res3);
PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1, res2,
res3);
PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
__asm__ __volatile__(
"sw $zero, 0(%[input]) \n\t"
"sw $zero, 4(%[input]) \n\t"
"sw $zero, 8(%[input]) \n\t"
"sw $zero, 12(%[input]) \n\t"
"sw $zero, 16(%[input]) \n\t"
"sw $zero, 20(%[input]) \n\t"
"sw $zero, 24(%[input]) \n\t"
"sw $zero, 28(%[input]) \n\t"
"sw $zero, 32(%[input]) \n\t"
"sw $zero, 36(%[input]) \n\t"
"sw $zero, 40(%[input]) \n\t"
"sw $zero, 44(%[input]) \n\t"
"sw $zero, 48(%[input]) \n\t"
"sw $zero, 52(%[input]) \n\t"
"sw $zero, 56(%[input]) \n\t"
"sw $zero, 60(%[input]) \n\t"::
__asm__ __volatile__(
"sw $zero, 0(%[input]) \n\t"
"sw $zero, 4(%[input]) \n\t"
"sw $zero, 8(%[input]) \n\t"
"sw $zero, 12(%[input]) \n\t"
"sw $zero, 16(%[input]) \n\t"
"sw $zero, 20(%[input]) \n\t"
"sw $zero, 24(%[input]) \n\t"
"sw $zero, 28(%[input]) \n\t"
"sw $zero, 32(%[input]) \n\t"
"sw $zero, 36(%[input]) \n\t"
"sw $zero, 40(%[input]) \n\t"
"sw $zero, 44(%[input]) \n\t"
"sw $zero, 48(%[input]) \n\t"
"sw $zero, 52(%[input]) \n\t"
"sw $zero, 56(%[input]) \n\t"
"sw $zero, 60(%[input]) \n\t" ::
[input] "r"(input)
);
[input] "r"(input));
}
static void dequant_idct_addconst_2x_msa(int16_t *input, int16_t *dequant_input,
uint8_t *dest, int32_t dest_stride)
{
v8i16 input_dc0, input_dc1, vec;
v16u8 dest0, dest1, dest2, dest3;
v16i8 zero = { 0 };
v8i16 res0, res1, res2, res3;
uint8_t *dest, int32_t dest_stride) {
v8i16 input_dc0, input_dc1, vec;
v16u8 dest0, dest1, dest2, dest3;
v16i8 zero = { 0 };
v8i16 res0, res1, res2, res3;
input_dc0 = __msa_fill_h(input[0] * dequant_input[0]);
input_dc1 = __msa_fill_h(input[16] * dequant_input[0]);
SRARI_H2_SH(input_dc0, input_dc1, 3);
vec = (v8i16)__msa_pckev_d((v2i64)input_dc1, (v2i64)input_dc0);
input[0] = 0;
input[16] = 0;
LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0,
res1, res2, res3);
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
CLIP_SH4_0_255(res0, res1, res2, res3);
PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1,
res2, res3);
PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
input_dc0 = __msa_fill_h(input[0] * dequant_input[0]);
input_dc1 = __msa_fill_h(input[16] * dequant_input[0]);
SRARI_H2_SH(input_dc0, input_dc1, 3);
vec = (v8i16)__msa_pckev_d((v2i64)input_dc1, (v2i64)input_dc0);
input[0] = 0;
input[16] = 0;
LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
res2, res3);
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
CLIP_SH4_0_255(res0, res1, res2, res3);
PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1, res2,
res3);
PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
}
void vp8_short_idct4x4llm_msa(int16_t *input, uint8_t *pred_ptr,
int32_t pred_stride, uint8_t *dst_ptr,
int32_t dst_stride)
{
idct4x4_addblk_msa(input, pred_ptr, pred_stride, dst_ptr, dst_stride);
int32_t dst_stride) {
idct4x4_addblk_msa(input, pred_ptr, pred_stride, dst_ptr, dst_stride);
}
void vp8_dc_only_idct_add_msa(int16_t input_dc, uint8_t *pred_ptr,
int32_t pred_stride, uint8_t *dst_ptr,
int32_t dst_stride)
{
idct4x4_addconst_msa(input_dc, pred_ptr, pred_stride, dst_ptr, dst_stride);
int32_t dst_stride) {
idct4x4_addconst_msa(input_dc, pred_ptr, pred_stride, dst_ptr, dst_stride);
}
void vp8_dequantize_b_msa(BLOCKD *d, int16_t *DQC)
{
v8i16 dqc0, dqc1, q0, q1, dq0, dq1;
void vp8_dequantize_b_msa(BLOCKD *d, int16_t *DQC) {
v8i16 dqc0, dqc1, q0, q1, dq0, dq1;
LD_SH2(DQC, 8, dqc0, dqc1);
LD_SH2(d->qcoeff, 8, q0, q1);
MUL2(dqc0, q0, dqc1, q1, dq0, dq1);
ST_SH2(dq0, dq1, d->dqcoeff, 8);
LD_SH2(DQC, 8, dqc0, dqc1);
LD_SH2(d->qcoeff, 8, q0, q1);
MUL2(dqc0, q0, dqc1, q1, dq0, dq1);
ST_SH2(dq0, dq1, d->dqcoeff, 8);
}
void vp8_dequant_idct_add_msa(int16_t *input, int16_t *dq,
uint8_t *dest, int32_t stride)
{
dequant_idct4x4_addblk_msa(input, dq, dest, stride);
void vp8_dequant_idct_add_msa(int16_t *input, int16_t *dq, uint8_t *dest,
int32_t stride) {
dequant_idct4x4_addblk_msa(input, dq, dest, stride);
__asm__ __volatile__ (
"sw $zero, 0(%[input]) \n\t"
"sw $zero, 4(%[input]) \n\t"
"sw $zero, 8(%[input]) \n\t"
"sw $zero, 12(%[input]) \n\t"
"sw $zero, 16(%[input]) \n\t"
"sw $zero, 20(%[input]) \n\t"
"sw $zero, 24(%[input]) \n\t"
"sw $zero, 28(%[input]) \n\t"
__asm__ __volatile__(
"sw $zero, 0(%[input]) \n\t"
"sw $zero, 4(%[input]) \n\t"
"sw $zero, 8(%[input]) \n\t"
"sw $zero, 12(%[input]) \n\t"
"sw $zero, 16(%[input]) \n\t"
"sw $zero, 20(%[input]) \n\t"
"sw $zero, 24(%[input]) \n\t"
"sw $zero, 28(%[input]) \n\t"
:
: [input] "r" (input)
);
:
: [input] "r"(input));
}
void vp8_dequant_idct_add_y_block_msa(int16_t *q, int16_t *dq,
uint8_t *dst, int32_t stride,
char *eobs)
{
int16_t *eobs_h = (int16_t *)eobs;
uint8_t i;
void vp8_dequant_idct_add_y_block_msa(int16_t *q, int16_t *dq, uint8_t *dst,
int32_t stride, char *eobs) {
int16_t *eobs_h = (int16_t *)eobs;
uint8_t i;
for (i = 4; i--;)
{
if (eobs_h[0])
{
if (eobs_h[0] & 0xfefe)
{
dequant_idct4x4_addblk_2x_msa(q, dq, dst, stride);
}
else
{
dequant_idct_addconst_2x_msa(q, dq, dst, stride);
}
}
q += 32;
if (eobs_h[1])
{
if (eobs_h[1] & 0xfefe)
{
dequant_idct4x4_addblk_2x_msa(q, dq, dst + 8, stride);
}
else
{
dequant_idct_addconst_2x_msa(q, dq, dst + 8, stride);
}
}
q += 32;
dst += (4 * stride);
eobs_h += 2;
}
}
void vp8_dequant_idct_add_uv_block_msa(int16_t *q, int16_t *dq,
uint8_t *dstu, uint8_t *dstv,
int32_t stride, char *eobs)
{
int16_t *eobs_h = (int16_t *)eobs;
if (eobs_h[0])
{
if (eobs_h[0] & 0xfefe)
{
dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride);
}
else
{
dequant_idct_addconst_2x_msa(q, dq, dstu, stride);
}
}
q += 32;
dstu += (stride * 4);
if (eobs_h[1])
{
if (eobs_h[1] & 0xfefe)
{
dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride);
}
else
{
dequant_idct_addconst_2x_msa(q, dq, dstu, stride);
}
for (i = 4; i--;) {
if (eobs_h[0]) {
if (eobs_h[0] & 0xfefe) {
dequant_idct4x4_addblk_2x_msa(q, dq, dst, stride);
} else {
dequant_idct_addconst_2x_msa(q, dq, dst, stride);
}
}
q += 32;
if (eobs_h[2])
{
if (eobs_h[2] & 0xfefe)
{
dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride);
}
else
{
dequant_idct_addconst_2x_msa(q, dq, dstv, stride);
}
if (eobs_h[1]) {
if (eobs_h[1] & 0xfefe) {
dequant_idct4x4_addblk_2x_msa(q, dq, dst + 8, stride);
} else {
dequant_idct_addconst_2x_msa(q, dq, dst + 8, stride);
}
}
q += 32;
dstv += (stride * 4);
if (eobs_h[3])
{
if (eobs_h[3] & 0xfefe)
{
dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride);
}
else
{
dequant_idct_addconst_2x_msa(q, dq, dstv, stride);
}
}
dst += (4 * stride);
eobs_h += 2;
}
}
void vp8_dequant_idct_add_uv_block_msa(int16_t *q, int16_t *dq, uint8_t *dstu,
uint8_t *dstv, int32_t stride,
char *eobs) {
int16_t *eobs_h = (int16_t *)eobs;
if (eobs_h[0]) {
if (eobs_h[0] & 0xfefe) {
dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride);
} else {
dequant_idct_addconst_2x_msa(q, dq, dstu, stride);
}
}
q += 32;
dstu += (stride * 4);
if (eobs_h[1]) {
if (eobs_h[1] & 0xfefe) {
dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride);
} else {
dequant_idct_addconst_2x_msa(q, dq, dstu, stride);
}
}
q += 32;
if (eobs_h[2]) {
if (eobs_h[2] & 0xfefe) {
dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride);
} else {
dequant_idct_addconst_2x_msa(q, dq, dstv, stride);
}
}
q += 32;
dstv += (stride * 4);
if (eobs_h[3]) {
if (eobs_h[3] & 0xfefe) {
dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride);
} else {
dequant_idct_addconst_2x_msa(q, dq, dstv, stride);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -14,133 +14,126 @@
static void filter_by_weight8x8_msa(uint8_t *src_ptr, int32_t src_stride,
uint8_t *dst_ptr, int32_t dst_stride,
int32_t src_weight)
{
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
int32_t row;
uint64_t src0_d, src1_d, dst0_d, dst1_d;
v16i8 src0 = { 0 };
v16i8 src1 = { 0 };
v16i8 dst0 = { 0 };
v16i8 dst1 = { 0 };
v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
int32_t src_weight) {
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
int32_t row;
uint64_t src0_d, src1_d, dst0_d, dst1_d;
v16i8 src0 = { 0 };
v16i8 src1 = { 0 };
v16i8 dst0 = { 0 };
v16i8 dst1 = { 0 };
v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
src_wt = __msa_fill_h(src_weight);
dst_wt = __msa_fill_h(dst_weight);
src_wt = __msa_fill_h(src_weight);
dst_wt = __msa_fill_h(dst_weight);
for (row = 2; row--;)
{
LD2(src_ptr, src_stride, src0_d, src1_d);
src_ptr += (2 * src_stride);
LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
INSERT_D2_SB(src0_d, src1_d, src0);
INSERT_D2_SB(dst0_d, dst1_d, dst0);
for (row = 2; row--;) {
LD2(src_ptr, src_stride, src0_d, src1_d);
src_ptr += (2 * src_stride);
LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
INSERT_D2_SB(src0_d, src1_d, src0);
INSERT_D2_SB(dst0_d, dst1_d, dst0);
LD2(src_ptr, src_stride, src0_d, src1_d);
src_ptr += (2 * src_stride);
LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
INSERT_D2_SB(src0_d, src1_d, src1);
INSERT_D2_SB(dst0_d, dst1_d, dst1);
LD2(src_ptr, src_stride, src0_d, src1_d);
src_ptr += (2 * src_stride);
LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
INSERT_D2_SB(src0_d, src1_d, src1);
INSERT_D2_SB(dst0_d, dst1_d, dst1);
UNPCK_UB_SH(src0, src_r, src_l);
UNPCK_UB_SH(dst0, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
ST8x2_UB(dst0, dst_ptr, dst_stride);
dst_ptr += (2 * dst_stride);
UNPCK_UB_SH(src0, src_r, src_l);
UNPCK_UB_SH(dst0, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
ST8x2_UB(dst0, dst_ptr, dst_stride);
dst_ptr += (2 * dst_stride);
UNPCK_UB_SH(src1, src_r, src_l);
UNPCK_UB_SH(dst1, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
ST8x2_UB(dst1, dst_ptr, dst_stride);
dst_ptr += (2 * dst_stride);
}
UNPCK_UB_SH(src1, src_r, src_l);
UNPCK_UB_SH(dst1, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
ST8x2_UB(dst1, dst_ptr, dst_stride);
dst_ptr += (2 * dst_stride);
}
}
static void filter_by_weight16x16_msa(uint8_t *src_ptr, int32_t src_stride,
uint8_t *dst_ptr, int32_t dst_stride,
int32_t src_weight)
{
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
int32_t row;
v16i8 src0, src1, src2, src3;
v16i8 dst0, dst1, dst2, dst3;
v8i16 src_wt, dst_wt;
v8i16 res_h_r, res_h_l;
v8i16 src_r, src_l, dst_r, dst_l;
int32_t src_weight) {
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
int32_t row;
v16i8 src0, src1, src2, src3;
v16i8 dst0, dst1, dst2, dst3;
v8i16 src_wt, dst_wt;
v8i16 res_h_r, res_h_l;
v8i16 src_r, src_l, dst_r, dst_l;
src_wt = __msa_fill_h(src_weight);
dst_wt = __msa_fill_h(dst_weight);
src_wt = __msa_fill_h(src_weight);
dst_wt = __msa_fill_h(dst_weight);
for (row = 4; row--;)
{
LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
src_ptr += (4 * src_stride);
LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
for (row = 4; row--;) {
LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
src_ptr += (4 * src_stride);
LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
UNPCK_UB_SH(src0, src_r, src_l);
UNPCK_UB_SH(dst0, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
UNPCK_UB_SH(src0, src_r, src_l);
UNPCK_UB_SH(dst0, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
UNPCK_UB_SH(src1, src_r, src_l);
UNPCK_UB_SH(dst1, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
UNPCK_UB_SH(src1, src_r, src_l);
UNPCK_UB_SH(dst1, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
UNPCK_UB_SH(src2, src_r, src_l);
UNPCK_UB_SH(dst2, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
UNPCK_UB_SH(src2, src_r, src_l);
UNPCK_UB_SH(dst2, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
UNPCK_UB_SH(src3, src_r, src_l);
UNPCK_UB_SH(dst3, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
}
UNPCK_UB_SH(src3, src_r, src_l);
UNPCK_UB_SH(dst3, dst_r, dst_l);
res_h_r = (src_r * src_wt);
res_h_r += (dst_r * dst_wt);
res_h_l = (src_l * src_wt);
res_h_l += (dst_l * dst_wt);
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
dst_ptr += dst_stride;
}
}
void vp8_filter_by_weight16x16_msa(uint8_t *src_ptr, int32_t src_stride,
uint8_t *dst_ptr, int32_t dst_stride,
int32_t src_weight)
{
filter_by_weight16x16_msa(src_ptr, src_stride, dst_ptr, dst_stride,
src_weight);
int32_t src_weight) {
filter_by_weight16x16_msa(src_ptr, src_stride, dst_ptr, dst_stride,
src_weight);
}
void vp8_filter_by_weight8x8_msa(uint8_t *src_ptr, int32_t src_stride,
uint8_t *dst_ptr, int32_t dst_stride,
int32_t src_weight)
{
filter_by_weight8x8_msa(src_ptr, src_stride, dst_ptr, dst_stride,
src_weight);
int32_t src_weight) {
filter_by_weight8x8_msa(src_ptr, src_stride, dst_ptr, dst_stride, src_weight);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -8,33 +8,31 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "entropy.h"
const int vp8_mode_contexts[6][4] =
{
{
/* 0 */
7, 1, 1, 143,
},
{
/* 1 */
14, 18, 14, 107,
},
{
/* 2 */
135, 64, 57, 68,
},
{
/* 3 */
60, 56, 128, 65,
},
{
/* 4 */
159, 134, 128, 34,
},
{
/* 5 */
234, 188, 128, 28,
},
const int vp8_mode_contexts[6][4] = {
{
/* 0 */
7, 1, 1, 143,
},
{
/* 1 */
14, 18, 14, 107,
},
{
/* 2 */
135, 64, 57, 68,
},
{
/* 3 */
60, 56, 128, 65,
},
{
/* 4 */
159, 134, 128, 34,
},
{
/* 5 */
234, 188, 128, 28,
},
};

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_MODECONT_H_
#define VP8_COMMON_MODECONT_H_

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_MV_H_
#define VP8_COMMON_MV_H_
#include "vpx/vpx_integer.h"
@ -17,17 +16,15 @@
extern "C" {
#endif
typedef struct
{
short row;
short col;
typedef struct {
short row;
short col;
} MV;
typedef union int_mv
{
uint32_t as_int;
MV as_mv;
} int_mv; /* facilitates faster equality tests and copies */
typedef union int_mv {
uint32_t as_int;
MV as_mv;
} int_mv; /* facilitates faster equality tests and copies */
#ifdef __cplusplus
} // extern "C"

View File

@ -8,13 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ONYX_H_
#define VP8_COMMON_ONYX_H_
#ifdef __cplusplus
extern "C"
{
extern "C" {
#endif
#include "vpx_config.h"
@ -24,256 +22,258 @@ extern "C"
#include "vpx_scale/yv12config.h"
#include "ppflags.h"
struct VP8_COMP;
struct VP8_COMP;
/* Create/destroy static data structures. */
/* Create/destroy static data structures. */
typedef enum
{
NORMAL = 0,
FOURFIVE = 1,
THREEFIVE = 2,
ONETWO = 3
typedef enum {
NORMAL = 0,
FOURFIVE = 1,
THREEFIVE = 2,
ONETWO = 3
} VPX_SCALING;
} VPX_SCALING;
typedef enum {
USAGE_LOCAL_FILE_PLAYBACK = 0x0,
USAGE_STREAM_FROM_SERVER = 0x1,
USAGE_CONSTRAINED_QUALITY = 0x2,
USAGE_CONSTANT_QUALITY = 0x3
} END_USAGE;
typedef enum
{
USAGE_LOCAL_FILE_PLAYBACK = 0x0,
USAGE_STREAM_FROM_SERVER = 0x1,
USAGE_CONSTRAINED_QUALITY = 0x2,
USAGE_CONSTANT_QUALITY = 0x3
} END_USAGE;
typedef enum
{
MODE_REALTIME = 0x0,
MODE_GOODQUALITY = 0x1,
MODE_BESTQUALITY = 0x2,
MODE_FIRSTPASS = 0x3,
MODE_SECONDPASS = 0x4,
MODE_SECONDPASS_BEST = 0x5
} MODE;
typedef enum
{
FRAMEFLAGS_KEY = 1,
FRAMEFLAGS_GOLDEN = 2,
FRAMEFLAGS_ALTREF = 4
} FRAMETYPE_FLAGS;
typedef enum {
MODE_REALTIME = 0x0,
MODE_GOODQUALITY = 0x1,
MODE_BESTQUALITY = 0x2,
MODE_FIRSTPASS = 0x3,
MODE_SECONDPASS = 0x4,
MODE_SECONDPASS_BEST = 0x5
} MODE;
typedef enum {
FRAMEFLAGS_KEY = 1,
FRAMEFLAGS_GOLDEN = 2,
FRAMEFLAGS_ALTREF = 4
} FRAMETYPE_FLAGS;
#include <assert.h>
static INLINE void Scale2Ratio(int mode, int *hr, int *hs)
{
switch (mode)
{
case NORMAL:
*hr = 1;
*hs = 1;
break;
case FOURFIVE:
*hr = 4;
*hs = 5;
break;
case THREEFIVE:
*hr = 3;
*hs = 5;
break;
case ONETWO:
*hr = 1;
*hs = 2;
break;
default:
*hr = 1;
*hs = 1;
assert(0);
break;
}
}
static INLINE void Scale2Ratio(int mode, int *hr, int *hs) {
switch (mode) {
case NORMAL:
*hr = 1;
*hs = 1;
break;
case FOURFIVE:
*hr = 4;
*hs = 5;
break;
case THREEFIVE:
*hr = 3;
*hs = 5;
break;
case ONETWO:
*hr = 1;
*hs = 2;
break;
default:
*hr = 1;
*hs = 1;
assert(0);
break;
}
}
typedef struct
{
/* 4 versions of bitstream defined:
* 0 best quality/slowest decode, 3 lowest quality/fastest decode
*/
int Version;
int Width;
int Height;
struct vpx_rational timebase;
unsigned int target_bandwidth; /* kilobits per second */
typedef struct {
/* 4 versions of bitstream defined:
* 0 best quality/slowest decode, 3 lowest quality/fastest decode
*/
int Version;
int Width;
int Height;
struct vpx_rational timebase;
unsigned int target_bandwidth; /* kilobits per second */
/* Parameter used for applying denoiser.
* For temporal denoiser: noise_sensitivity = 0 means off,
* noise_sensitivity = 1 means temporal denoiser on for Y channel only,
* noise_sensitivity = 2 means temporal denoiser on for all channels.
* noise_sensitivity = 3 means aggressive denoising mode.
* noise_sensitivity >= 4 means adaptive denoising mode.
* Temporal denoiser is enabled via the configuration option:
* CONFIG_TEMPORAL_DENOISING.
* For spatial denoiser: noise_sensitivity controls the amount of
* pre-processing blur: noise_sensitivity = 0 means off.
* Spatial denoiser invoked under !CONFIG_TEMPORAL_DENOISING.
*/
int noise_sensitivity;
/* Parameter used for applying denoiser.
* For temporal denoiser: noise_sensitivity = 0 means off,
* noise_sensitivity = 1 means temporal denoiser on for Y channel only,
* noise_sensitivity = 2 means temporal denoiser on for all channels.
* noise_sensitivity = 3 means aggressive denoising mode.
* noise_sensitivity >= 4 means adaptive denoising mode.
* Temporal denoiser is enabled via the configuration option:
* CONFIG_TEMPORAL_DENOISING.
* For spatial denoiser: noise_sensitivity controls the amount of
* pre-processing blur: noise_sensitivity = 0 means off.
* Spatial denoiser invoked under !CONFIG_TEMPORAL_DENOISING.
*/
int noise_sensitivity;
/* parameter used for sharpening output: recommendation 0: */
int Sharpness;
int cpu_used;
unsigned int rc_max_intra_bitrate_pct;
unsigned int screen_content_mode;
/* parameter used for sharpening output: recommendation 0: */
int Sharpness;
int cpu_used;
unsigned int rc_max_intra_bitrate_pct;
unsigned int screen_content_mode;
/* mode ->
*(0)=Realtime/Live Encoding. This mode is optimized for realtim
* encoding (for example, capturing a television signal or feed
* from a live camera). ( speed setting controls how fast )
*(1)=Good Quality Fast Encoding. The encoder balances quality with
* the amount of time it takes to encode the output. ( speed
* setting controls how fast )
*(2)=One Pass - Best Quality. The encoder places priority on the
* quality of the output over encoding speed. The output is
* compressed at the highest possible quality. This option takes
* the longest amount of time to encode. ( speed setting ignored
* )
*(3)=Two Pass - First Pass. The encoder generates a file of
* statistics for use in the second encoding pass. ( speed
* setting controls how fast )
*(4)=Two Pass - Second Pass. The encoder uses the statistics that
* were generated in the first encoding pass to create the
* compressed output. ( speed setting controls how fast )
*(5)=Two Pass - Second Pass Best. The encoder uses the statistics
* that were generated in the first encoding pass to create the
* compressed output using the highest possible quality, and
* taking a longer amount of time to encode.. ( speed setting
* ignored )
*/
int Mode;
/* mode ->
*(0)=Realtime/Live Encoding. This mode is optimized for realtim
* encoding (for example, capturing a television signal or feed
* from a live camera). ( speed setting controls how fast )
*(1)=Good Quality Fast Encoding. The encoder balances quality with
* the amount of time it takes to encode the output. ( speed
* setting controls how fast )
*(2)=One Pass - Best Quality. The encoder places priority on the
* quality of the output over encoding speed. The output is
* compressed at the highest possible quality. This option takes
* the longest amount of time to encode. ( speed setting ignored
* )
*(3)=Two Pass - First Pass. The encoder generates a file of
* statistics for use in the second encoding pass. ( speed
* setting controls how fast )
*(4)=Two Pass - Second Pass. The encoder uses the statistics that
* were generated in the first encoding pass to create the
* compressed output. ( speed setting controls how fast )
*(5)=Two Pass - Second Pass Best. The encoder uses the statistics
* that were generated in the first encoding pass to create the
* compressed output using the highest possible quality, and
* taking a longer amount of time to encode.. ( speed setting
* ignored )
*/
int Mode;
/* Key Framing Operations */
int auto_key; /* automatically detect cut scenes */
int key_freq; /* maximum distance to key frame. */
/* Key Framing Operations */
int auto_key; /* automatically detect cut scenes */
int key_freq; /* maximum distance to key frame. */
/* lagged compression (if allow_lag == 0 lag_in_frames is ignored) */
int allow_lag;
int lag_in_frames; /* how many frames lag before we start encoding */
/* lagged compression (if allow_lag == 0 lag_in_frames is ignored) */
int allow_lag;
int lag_in_frames; /* how many frames lag before we start encoding */
/*
* DATARATE CONTROL OPTIONS
*/
/*
* DATARATE CONTROL OPTIONS
*/
int end_usage; /* vbr or cbr */
int end_usage; /* vbr or cbr */
/* buffer targeting aggressiveness */
int under_shoot_pct;
int over_shoot_pct;
/* buffer targeting aggressiveness */
int under_shoot_pct;
int over_shoot_pct;
/* buffering parameters */
int64_t starting_buffer_level;
int64_t optimal_buffer_level;
int64_t maximum_buffer_size;
/* buffering parameters */
int64_t starting_buffer_level;
int64_t optimal_buffer_level;
int64_t maximum_buffer_size;
int64_t starting_buffer_level_in_ms;
int64_t optimal_buffer_level_in_ms;
int64_t maximum_buffer_size_in_ms;
int64_t starting_buffer_level_in_ms;
int64_t optimal_buffer_level_in_ms;
int64_t maximum_buffer_size_in_ms;
/* controlling quality */
int fixed_q;
int worst_allowed_q;
int best_allowed_q;
int cq_level;
/* controlling quality */
int fixed_q;
int worst_allowed_q;
int best_allowed_q;
int cq_level;
/* allow internal resizing */
int allow_spatial_resampling;
int resample_down_water_mark;
int resample_up_water_mark;
/* allow internal resizing */
int allow_spatial_resampling;
int resample_down_water_mark;
int resample_up_water_mark;
/* allow internal frame rate alterations */
int allow_df;
int drop_frames_water_mark;
/* allow internal frame rate alterations */
int allow_df;
int drop_frames_water_mark;
/* two pass datarate control */
int two_pass_vbrbias;
int two_pass_vbrmin_section;
int two_pass_vbrmax_section;
/* two pass datarate control */
int two_pass_vbrbias;
int two_pass_vbrmin_section;
int two_pass_vbrmax_section;
/*
* END DATARATE CONTROL OPTIONS
*/
/*
* END DATARATE CONTROL OPTIONS
*/
/* these parameters aren't to be used in final build don't use!!! */
int play_alternate;
int alt_freq;
int alt_q;
int key_q;
int gold_q;
/* these parameters aren't to be used in final build don't use!!! */
int play_alternate;
int alt_freq;
int alt_q;
int key_q;
int gold_q;
int multi_threaded; /* how many threads to run the encoder on */
int token_partitions; /* how many token partitions to create */
int multi_threaded; /* how many threads to run the encoder on */
int token_partitions; /* how many token partitions to create */
/* early breakout threshold: for video conf recommend 800 */
int encode_breakout;
/* early breakout threshold: for video conf recommend 800 */
int encode_breakout;
/* Bitfield defining the error resiliency features to enable.
* Can provide decodable frames after losses in previous
* frames and decodable partitions after losses in the same frame.
*/
unsigned int error_resilient_mode;
/* Bitfield defining the error resiliency features to enable.
* Can provide decodable frames after losses in previous
* frames and decodable partitions after losses in the same frame.
*/
unsigned int error_resilient_mode;
int arnr_max_frames;
int arnr_strength;
int arnr_type;
int arnr_max_frames;
int arnr_strength;
int arnr_type;
vpx_fixed_buf_t two_pass_stats_in;
struct vpx_codec_pkt_list *output_pkt_list;
vpx_fixed_buf_t two_pass_stats_in;
struct vpx_codec_pkt_list *output_pkt_list;
vp8e_tuning tuning;
vp8e_tuning tuning;
/* Temporal scaling parameters */
unsigned int number_of_layers;
unsigned int target_bitrate[VPX_TS_MAX_PERIODICITY];
unsigned int rate_decimator[VPX_TS_MAX_PERIODICITY];
unsigned int periodicity;
unsigned int layer_id[VPX_TS_MAX_PERIODICITY];
/* Temporal scaling parameters */
unsigned int number_of_layers;
unsigned int target_bitrate[VPX_TS_MAX_PERIODICITY];
unsigned int rate_decimator[VPX_TS_MAX_PERIODICITY];
unsigned int periodicity;
unsigned int layer_id[VPX_TS_MAX_PERIODICITY];
#if CONFIG_MULTI_RES_ENCODING
/* Number of total resolutions encoded */
unsigned int mr_total_resolutions;
/* Number of total resolutions encoded */
unsigned int mr_total_resolutions;
/* Current encoder ID */
unsigned int mr_encoder_id;
/* Current encoder ID */
unsigned int mr_encoder_id;
/* Down-sampling factor */
vpx_rational_t mr_down_sampling_factor;
/* Down-sampling factor */
vpx_rational_t mr_down_sampling_factor;
/* Memory location to store low-resolution encoder's mode info */
void* mr_low_res_mode_info;
/* Memory location to store low-resolution encoder's mode info */
void *mr_low_res_mode_info;
#endif
} VP8_CONFIG;
} VP8_CONFIG;
void vp8_initialize();
void vp8_initialize();
struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf);
void vp8_remove_compressor(struct VP8_COMP **comp);
struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf);
void vp8_remove_compressor(struct VP8_COMP* *comp);
void vp8_init_config(struct VP8_COMP *onyx, VP8_CONFIG *oxcf);
void vp8_change_config(struct VP8_COMP *onyx, VP8_CONFIG *oxcf);
void vp8_init_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
void vp8_change_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
int vp8_receive_raw_frame(struct VP8_COMP *comp, unsigned int frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time_stamp);
int vp8_get_compressed_data(struct VP8_COMP *comp, unsigned int *frame_flags,
unsigned long *size, unsigned char *dest,
unsigned char *dest_end, int64_t *time_stamp,
int64_t *time_end, int flush);
int vp8_get_preview_raw_frame(struct VP8_COMP *comp, YV12_BUFFER_CONFIG *dest,
vp8_ppflags_t *flags);
int vp8_receive_raw_frame(struct VP8_COMP* comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp);
int vp8_get_compressed_data(struct VP8_COMP* comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush);
int vp8_get_preview_raw_frame(struct VP8_COMP* comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
int vp8_use_as_reference(struct VP8_COMP* comp, int ref_frame_flags);
int vp8_update_reference(struct VP8_COMP* comp, int ref_frame_flags);
int vp8_get_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
int vp8_set_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
int vp8_update_entropy(struct VP8_COMP* comp, int update);
int vp8_set_roimap(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]);
int vp8_set_active_map(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols);
int vp8_set_internal_size(struct VP8_COMP* comp, VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
int vp8_get_quantizer(struct VP8_COMP* c);
int vp8_use_as_reference(struct VP8_COMP *comp, int ref_frame_flags);
int vp8_update_reference(struct VP8_COMP *comp, int ref_frame_flags);
int vp8_get_reference(struct VP8_COMP *comp,
enum vpx_ref_frame_type ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
int vp8_set_reference(struct VP8_COMP *comp,
enum vpx_ref_frame_type ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
int vp8_update_entropy(struct VP8_COMP *comp, int update);
int vp8_set_roimap(struct VP8_COMP *comp, unsigned char *map, unsigned int rows,
unsigned int cols, int delta_q[4], int delta_lf[4],
unsigned int threshold[4]);
int vp8_set_active_map(struct VP8_COMP *comp, unsigned char *map,
unsigned int rows, unsigned int cols);
int vp8_set_internal_size(struct VP8_COMP *comp, VPX_SCALING horiz_mode,
VPX_SCALING vert_mode);
int vp8_get_quantizer(struct VP8_COMP *c);
#ifdef __cplusplus
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ONYXC_INT_H_
#define VP8_COMMON_ONYXC_INT_H_
@ -38,144 +37,143 @@ extern "C" {
#define MAX_PARTITIONS 9
typedef struct frame_contexts
{
vp8_prob bmode_prob [VP8_BINTRAMODES-1];
vp8_prob ymode_prob [VP8_YMODES-1]; /* interframe intra mode probs */
vp8_prob uv_mode_prob [VP8_UV_MODES-1];
vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
MV_CONTEXT mvc[2];
typedef struct frame_contexts {
vp8_prob bmode_prob[VP8_BINTRAMODES - 1];
vp8_prob ymode_prob[VP8_YMODES - 1]; /* interframe intra mode probs */
vp8_prob uv_mode_prob[VP8_UV_MODES - 1];
vp8_prob sub_mv_ref_prob[VP8_SUBMVREFS - 1];
vp8_prob
coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
MV_CONTEXT mvc[2];
} FRAME_CONTEXT;
typedef enum
{
ONE_PARTITION = 0,
TWO_PARTITION = 1,
FOUR_PARTITION = 2,
EIGHT_PARTITION = 3
typedef enum {
ONE_PARTITION = 0,
TWO_PARTITION = 1,
FOUR_PARTITION = 2,
EIGHT_PARTITION = 3
} TOKEN_PARTITION;
typedef enum
{
RECON_CLAMP_REQUIRED = 0,
RECON_CLAMP_NOTREQUIRED = 1
typedef enum {
RECON_CLAMP_REQUIRED = 0,
RECON_CLAMP_NOTREQUIRED = 1
} CLAMP_TYPE;
typedef struct VP8Common
{
struct vpx_internal_error_info error;
{
struct vpx_internal_error_info error;
DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]);
DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]);
DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]);
DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]);
DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]);
DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]);
int Width;
int Height;
int horiz_scale;
int vert_scale;
int Width;
int Height;
int horiz_scale;
int vert_scale;
CLAMP_TYPE clamp_type;
CLAMP_TYPE clamp_type;
YV12_BUFFER_CONFIG *frame_to_show;
YV12_BUFFER_CONFIG *frame_to_show;
YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
YV12_BUFFER_CONFIG temp_scale_frame;
YV12_BUFFER_CONFIG temp_scale_frame;
#if CONFIG_POSTPROC
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG post_proc_buffer_int;
int post_proc_buffer_int_used;
unsigned char *pp_limits_buffer; /* post-processing filter coefficients */
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG post_proc_buffer_int;
int post_proc_buffer_int_used;
unsigned char *pp_limits_buffer; /* post-processing filter coefficients */
#endif
FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
FRAME_TYPE frame_type;
FRAME_TYPE
last_frame_type; /* Save last frame's frame type for motion search. */
FRAME_TYPE frame_type;
int show_frame;
int show_frame;
int frame_flags;
int MBs;
int mb_rows;
int mb_cols;
int mode_info_stride;
int frame_flags;
int MBs;
int mb_rows;
int mb_cols;
int mode_info_stride;
/* profile settings */
int mb_no_coeff_skip;
int no_lpf;
int use_bilinear_mc_filter;
int full_pixel;
/* profile settings */
int mb_no_coeff_skip;
int no_lpf;
int use_bilinear_mc_filter;
int full_pixel;
int base_qindex;
int base_qindex;
int y1dc_delta_q;
int y2dc_delta_q;
int y2ac_delta_q;
int uvdc_delta_q;
int uvac_delta_q;
int y1dc_delta_q;
int y2dc_delta_q;
int y2ac_delta_q;
int uvdc_delta_q;
int uvac_delta_q;
/* We allocate a MODE_INFO struct for each macroblock, together with
an extra row on top and column on the left to simplify prediction. */
/* We allocate a MODE_INFO struct for each macroblock, together with
an extra row on top and column on the left to simplify prediction. */
MODE_INFO *mip; /* Base of allocated array */
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
MODE_INFO *mip; /* Base of allocated array */
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
#if CONFIG_ERROR_CONCEALMENT
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
#endif
MODE_INFO *show_frame_mi; /* MODE_INFO for the last decoded frame
to show */
LOOPFILTERTYPE filter_type;
/* MODE_INFO for the last decoded frame to show */
MODE_INFO *show_frame_mi;
LOOPFILTERTYPE filter_type;
loop_filter_info_n lf_info;
loop_filter_info_n lf_info;
int filter_level;
int last_sharpness_level;
int sharpness_level;
int filter_level;
int last_sharpness_level;
int sharpness_level;
int refresh_last_frame; /* Two state 0 = NO, 1 = YES */
int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */
int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */
int refresh_last_frame; /* Two state 0 = NO, 1 = YES */
int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */
int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */
int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */
int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */
int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */
int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */
int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */
int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */
int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */
int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */
/* Y,U,V,Y2 */
ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */
ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */
/* Y,U,V,Y2 */
ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */
ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */
FRAME_CONTEXT lfc; /* last frame entropy */
FRAME_CONTEXT fc; /* this frame entropy */
FRAME_CONTEXT lfc; /* last frame entropy */
FRAME_CONTEXT fc; /* this frame entropy */
unsigned int current_video_frame;
unsigned int current_video_frame;
int version;
int version;
TOKEN_PARTITION multi_token_partition;
TOKEN_PARTITION multi_token_partition;
#ifdef PACKET_TESTING
VP8_HEADER oh;
VP8_HEADER oh;
#endif
#if CONFIG_POSTPROC_VISUALIZER
double bitrate;
double framerate;
double bitrate;
double framerate;
#endif
#if CONFIG_MULTITHREAD
int processor_core_count;
int processor_core_count;
#endif
#if CONFIG_POSTPROC
struct postproc_state postproc_state;
struct postproc_state postproc_state;
#endif
int cpu_caps;
int cpu_caps;
} VP8_COMMON;
#ifdef __cplusplus

View File

@ -8,15 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_ONYXD_H_
#define VP8_COMMON_ONYXD_H_
/* Create/destroy static data structures. */
#ifdef __cplusplus
extern "C"
{
extern "C" {
#endif
#include "vpx_scale/yv12config.h"
#include "ppflags.h"
@ -24,40 +21,40 @@ extern "C"
#include "vpx/vpx_codec.h"
#include "vpx/vp8.h"
struct VP8D_COMP;
struct VP8D_COMP;
typedef struct
{
int Width;
int Height;
int Version;
int postprocess;
int max_threads;
int error_concealment;
} VP8D_CONFIG;
typedef struct {
int Width;
int Height;
int Version;
int postprocess;
int max_threads;
int error_concealment;
} VP8D_CONFIG;
typedef enum
{
VP8D_OK = 0
} VP8D_SETTING;
typedef enum { VP8D_OK = 0 } VP8D_SETTING;
void vp8dx_initialize(void);
void vp8dx_initialize(void);
void vp8dx_set_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst, int x);
void vp8dx_set_setting(struct VP8D_COMP *comp, VP8D_SETTING oxst, int x);
int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst);
int vp8dx_get_setting(struct VP8D_COMP *comp, VP8D_SETTING oxst);
int vp8dx_receive_compressed_data(struct VP8D_COMP* comp,
size_t size, const uint8_t *dest,
int64_t time_stamp);
int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
int vp8dx_receive_compressed_data(struct VP8D_COMP *comp, size_t size,
const uint8_t *dest, int64_t time_stamp);
int vp8dx_get_raw_frame(struct VP8D_COMP *comp, YV12_BUFFER_CONFIG *sd,
int64_t *time_stamp, int64_t *time_end_stamp,
vp8_ppflags_t *flags);
vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP *comp,
enum vpx_ref_frame_type ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP *comp,
enum vpx_ref_frame_type ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
#ifdef __cplusplus
}
#endif
#endif // VP8_COMMON_ONYXD_H_

File diff suppressed because it is too large Load Diff

View File

@ -8,19 +8,17 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_POSTPROC_H_
#define VP8_COMMON_POSTPROC_H_
#include "vpx_ports/mem.h"
struct postproc_state
{
int last_q;
int last_noise;
int last_base_qindex;
int last_frame_valid;
int clamp;
int8_t *generated_noise;
struct postproc_state {
int last_q;
int last_noise;
int last_base_qindex;
int last_frame_valid;
int clamp;
int8_t *generated_noise;
};
#include "onyxc_int.h"
#include "ppflags.h"
@ -31,21 +29,12 @@ extern "C" {
int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest,
vp8_ppflags_t *flags);
void vp8_de_noise(struct VP8Common *oci, YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, int flag,
int uvfilter);
void vp8_de_noise(struct VP8Common *oci,
YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *post,
int q,
int low_var_thresh,
int flag,
int uvfilter);
void vp8_deblock(struct VP8Common *oci,
YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *post,
int q,
int low_var_thresh,
int flag);
void vp8_deblock(struct VP8Common *oci, YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, int flag);
#define MFQE_PRECISION 4

View File

@ -8,38 +8,35 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_PPFLAGS_H_
#define VP8_COMMON_PPFLAGS_H_
#ifdef __cplusplus
extern "C" {
#endif
enum
{
VP8D_NOFILTERING = 0,
VP8D_DEBLOCK = 1<<0,
VP8D_DEMACROBLOCK = 1<<1,
VP8D_ADDNOISE = 1<<2,
VP8D_DEBUG_TXT_FRAME_INFO = 1<<3,
VP8D_DEBUG_TXT_MBLK_MODES = 1<<4,
VP8D_DEBUG_TXT_DC_DIFF = 1<<5,
VP8D_DEBUG_TXT_RATE_INFO = 1<<6,
VP8D_DEBUG_DRAW_MV = 1<<7,
VP8D_DEBUG_CLR_BLK_MODES = 1<<8,
VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9,
VP8D_MFQE = 1<<10
enum {
VP8D_NOFILTERING = 0,
VP8D_DEBLOCK = 1 << 0,
VP8D_DEMACROBLOCK = 1 << 1,
VP8D_ADDNOISE = 1 << 2,
VP8D_DEBUG_TXT_FRAME_INFO = 1 << 3,
VP8D_DEBUG_TXT_MBLK_MODES = 1 << 4,
VP8D_DEBUG_TXT_DC_DIFF = 1 << 5,
VP8D_DEBUG_TXT_RATE_INFO = 1 << 6,
VP8D_DEBUG_DRAW_MV = 1 << 7,
VP8D_DEBUG_CLR_BLK_MODES = 1 << 8,
VP8D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9,
VP8D_MFQE = 1 << 10
};
typedef struct
{
int post_proc_flag;
int deblocking_level;
int noise_level;
int display_ref_frame_flag;
int display_mb_modes_flag;
int display_b_modes_flag;
int display_mv_flag;
typedef struct {
int post_proc_flag;
int deblocking_level;
int noise_level;
int display_ref_frame_flag;
int display_mb_modes_flag;
int display_b_modes_flag;
int display_mv_flag;
} vp8_ppflags_t;
#ifdef __cplusplus

View File

@ -8,128 +8,117 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "quant_common.h"
static const int dc_qlookup[QINDEX_RANGE] =
{
4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
static const int dc_qlookup[QINDEX_RANGE] = {
4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17,
17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54,
55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
70, 71, 72, 73, 74, 75, 76, 76, 77, 78, 79, 80, 81, 82, 83,
84, 85, 86, 87, 88, 89, 91, 93, 95, 96, 98, 100, 101, 102, 104,
106, 108, 110, 112, 114, 116, 118, 122, 124, 126, 128, 130, 132, 134, 136,
138, 140, 143, 145, 148, 151, 154, 157,
};
static const int ac_qlookup[QINDEX_RANGE] =
{
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
static const int ac_qlookup[QINDEX_RANGE] = {
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68,
70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98,
100, 102, 104, 106, 108, 110, 112, 114, 116, 119, 122, 125, 128, 131, 134,
137, 140, 143, 146, 149, 152, 155, 158, 161, 164, 167, 170, 173, 177, 181,
185, 189, 193, 197, 201, 205, 209, 213, 217, 221, 225, 229, 234, 239, 245,
249, 254, 259, 264, 269, 274, 279, 284,
};
int vp8_dc_quant(int QIndex, int Delta) {
int retval;
int vp8_dc_quant(int QIndex, int Delta)
{
int retval;
QIndex = QIndex + Delta;
QIndex = QIndex + Delta;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
retval = dc_qlookup[ QIndex ];
return retval;
retval = dc_qlookup[QIndex];
return retval;
}
int vp8_dc2quant(int QIndex, int Delta)
{
int retval;
int vp8_dc2quant(int QIndex, int Delta) {
int retval;
QIndex = QIndex + Delta;
QIndex = QIndex + Delta;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
retval = dc_qlookup[ QIndex ] * 2;
return retval;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
retval = dc_qlookup[QIndex] * 2;
return retval;
}
int vp8_dc_uv_quant(int QIndex, int Delta)
{
int retval;
int vp8_dc_uv_quant(int QIndex, int Delta) {
int retval;
QIndex = QIndex + Delta;
QIndex = QIndex + Delta;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
retval = dc_qlookup[ QIndex ];
retval = dc_qlookup[QIndex];
if (retval > 132)
retval = 132;
if (retval > 132) retval = 132;
return retval;
return retval;
}
int vp8_ac_yquant(int QIndex)
{
int retval;
int vp8_ac_yquant(int QIndex) {
int retval;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
retval = ac_qlookup[ QIndex ];
return retval;
retval = ac_qlookup[QIndex];
return retval;
}
int vp8_ac2quant(int QIndex, int Delta)
{
int retval;
int vp8_ac2quant(int QIndex, int Delta) {
int retval;
QIndex = QIndex + Delta;
QIndex = QIndex + Delta;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
/* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
* The smallest precision for that is '(x*6349) >> 12' but 16 is a good
* word size. */
retval = (ac_qlookup[ QIndex ] * 101581) >> 16;
/* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
* The smallest precision for that is '(x*6349) >> 12' but 16 is a good
* word size. */
retval = (ac_qlookup[QIndex] * 101581) >> 16;
if (retval < 8)
retval = 8;
if (retval < 8) retval = 8;
return retval;
return retval;
}
int vp8_ac_uv_quant(int QIndex, int Delta)
{
int retval;
int vp8_ac_uv_quant(int QIndex, int Delta) {
int retval;
QIndex = QIndex + Delta;
QIndex = QIndex + Delta;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
if (QIndex > 127)
QIndex = 127;
else if (QIndex < 0)
QIndex = 0;
retval = ac_qlookup[ QIndex ];
return retval;
retval = ac_qlookup[QIndex];
return retval;
}

View File

@ -11,7 +11,6 @@
#ifndef VP8_COMMON_QUANT_COMMON_H_
#define VP8_COMMON_QUANT_COMMON_H_
#include "string.h"
#include "blockd.h"
#include "onyxc_int.h"

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <limits.h>
#include <string.h>
@ -21,524 +20,474 @@
#include "onyxc_int.h"
#endif
void vp8_copy_mem16x16_c(
unsigned char *src,
int src_stride,
unsigned char *dst,
int dst_stride)
{
void vp8_copy_mem16x16_c(unsigned char *src, int src_stride, unsigned char *dst,
int dst_stride) {
int r;
int r;
for (r = 0; r < 16; r++)
{
memcpy(dst, src, 16);
src += src_stride;
dst += dst_stride;
}
for (r = 0; r < 16; r++) {
memcpy(dst, src, 16);
src += src_stride;
dst += dst_stride;
}
}
void vp8_copy_mem8x8_c(
unsigned char *src,
int src_stride,
unsigned char *dst,
int dst_stride)
{
int r;
void vp8_copy_mem8x8_c(unsigned char *src, int src_stride, unsigned char *dst,
int dst_stride) {
int r;
for (r = 0; r < 8; r++)
{
memcpy(dst, src, 8);
src += src_stride;
dst += dst_stride;
}
for (r = 0; r < 8; r++) {
memcpy(dst, src, 8);
src += src_stride;
dst += dst_stride;
}
}
void vp8_copy_mem8x4_c(
unsigned char *src,
int src_stride,
unsigned char *dst,
int dst_stride)
{
int r;
void vp8_copy_mem8x4_c(unsigned char *src, int src_stride, unsigned char *dst,
int dst_stride) {
int r;
for (r = 0; r < 4; r++)
{
memcpy(dst, src, 8);
src += src_stride;
dst += dst_stride;
}
for (r = 0; r < 4; r++) {
memcpy(dst, src, 8);
src += src_stride;
dst += dst_stride;
}
}
void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre,
int pre_stride, vp8_subpix_fn_t sppf) {
int r;
unsigned char *pred_ptr = d->predictor;
unsigned char *ptr;
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride +
(d->bmi.mv.as_mv.col >> 3);
void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf)
{
int r;
unsigned char *pred_ptr = d->predictor;
unsigned char *ptr;
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
}
else
{
for (r = 0; r < 4; r++)
{
pred_ptr[0] = ptr[0];
pred_ptr[1] = ptr[1];
pred_ptr[2] = ptr[2];
pred_ptr[3] = ptr[3];
pred_ptr += pitch;
ptr += pre_stride;
}
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) {
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7,
pred_ptr, pitch);
} else {
for (r = 0; r < 4; r++) {
pred_ptr[0] = ptr[0];
pred_ptr[1] = ptr[1];
pred_ptr[2] = ptr[2];
pred_ptr[3] = ptr[3];
pred_ptr += pitch;
ptr += pre_stride;
}
}
}
static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride)
{
unsigned char *ptr;
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d,
unsigned char *dst, int dst_stride,
unsigned char *base_pre, int pre_stride) {
unsigned char *ptr;
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride +
(d->bmi.mv.as_mv.col >> 3);
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
x->subpixel_predict8x8(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
}
else
{
vp8_copy_mem8x8(ptr, pre_stride, dst, dst_stride);
}
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) {
x->subpixel_predict8x8(ptr, pre_stride, d->bmi.mv.as_mv.col & 7,
d->bmi.mv.as_mv.row & 7, dst, dst_stride);
} else {
vp8_copy_mem8x8(ptr, pre_stride, dst, dst_stride);
}
}
static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride)
{
unsigned char *ptr;
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d,
unsigned char *dst, int dst_stride,
unsigned char *base_pre, int pre_stride) {
unsigned char *ptr;
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride +
(d->bmi.mv.as_mv.col >> 3);
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
x->subpixel_predict8x4(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
}
else
{
vp8_copy_mem8x4(ptr, pre_stride, dst, dst_stride);
}
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) {
x->subpixel_predict8x4(ptr, pre_stride, d->bmi.mv.as_mv.col & 7,
d->bmi.mv.as_mv.row & 7, dst, dst_stride);
} else {
vp8_copy_mem8x4(ptr, pre_stride, dst, dst_stride);
}
}
static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf)
{
int r;
unsigned char *ptr;
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst,
int dst_stride, unsigned char *base_pre,
int pre_stride, vp8_subpix_fn_t sppf) {
int r;
unsigned char *ptr;
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride +
(d->bmi.mv.as_mv.col >> 3);
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
}
else
{
for (r = 0; r < 4; r++)
{
dst[0] = ptr[0];
dst[1] = ptr[1];
dst[2] = ptr[2];
dst[3] = ptr[3];
dst += dst_stride;
ptr += pre_stride;
}
}
}
/*encoder only*/
void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x)
{
unsigned char *uptr, *vptr;
unsigned char *upred_ptr = &x->predictor[256];
unsigned char *vpred_ptr = &x->predictor[320];
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int offset;
int pre_stride = x->pre.uv_stride;
/* calc uv motion vectors */
mv_row += 1 | (mv_row >> (sizeof(int) * CHAR_BIT - 1));
mv_col += 1 | (mv_col >> (sizeof(int) * CHAR_BIT - 1));
mv_row /= 2;
mv_col /= 2;
mv_row &= x->fullpixel_mask;
mv_col &= x->fullpixel_mask;
offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
uptr = x->pre.u_buffer + offset;
vptr = x->pre.v_buffer + offset;
if ((mv_row | mv_col) & 7)
{
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
}
else
{
vp8_copy_mem8x8(uptr, pre_stride, upred_ptr, 8);
vp8_copy_mem8x8(vptr, pre_stride, vpred_ptr, 8);
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) {
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst,
dst_stride);
} else {
for (r = 0; r < 4; r++) {
dst[0] = ptr[0];
dst[1] = ptr[1];
dst[2] = ptr[2];
dst[3] = ptr[3];
dst += dst_stride;
ptr += pre_stride;
}
}
}
/*encoder only*/
void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x)
{
int i, j;
int pre_stride = x->pre.uv_stride;
unsigned char *base_pre;
void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x) {
unsigned char *uptr, *vptr;
unsigned char *upred_ptr = &x->predictor[256];
unsigned char *vpred_ptr = &x->predictor[320];
/* build uv mvs */
for (i = 0; i < 2; i++)
{
for (j = 0; j < 2; j++)
{
int yoffset = i * 8 + j * 2;
int uoffset = 16 + i * 2 + j;
int voffset = 20 + i * 2 + j;
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int offset;
int pre_stride = x->pre.uv_stride;
int temp;
/* calc uv motion vectors */
mv_row += 1 | (mv_row >> (sizeof(int) * CHAR_BIT - 1));
mv_col += 1 | (mv_col >> (sizeof(int) * CHAR_BIT - 1));
mv_row /= 2;
mv_col /= 2;
mv_row &= x->fullpixel_mask;
mv_col &= x->fullpixel_mask;
temp = x->block[yoffset ].bmi.mv.as_mv.row
+ x->block[yoffset+1].bmi.mv.as_mv.row
+ x->block[yoffset+4].bmi.mv.as_mv.row
+ x->block[yoffset+5].bmi.mv.as_mv.row;
offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
uptr = x->pre.u_buffer + offset;
vptr = x->pre.v_buffer + offset;
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
temp = x->block[yoffset ].bmi.mv.as_mv.col
+ x->block[yoffset+1].bmi.mv.as_mv.col
+ x->block[yoffset+4].bmi.mv.as_mv.col
+ x->block[yoffset+5].bmi.mv.as_mv.col;
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
}
}
base_pre = x->pre.u_buffer;
for (i = 16; i < 20; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
else
{
vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict);
vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict);
}
}
base_pre = x->pre.v_buffer;
for (i = 20; i < 24; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
else
{
vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict);
vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict);
}
}
if ((mv_row | mv_col) & 7) {
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr,
8);
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr,
8);
} else {
vp8_copy_mem8x8(uptr, pre_stride, upred_ptr, 8);
vp8_copy_mem8x8(vptr, pre_stride, vpred_ptr, 8);
}
}
/*encoder only*/
void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
unsigned char *dst_y,
int dst_ystride)
{
unsigned char *ptr_base;
unsigned char *ptr;
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int pre_stride = x->pre.y_stride;
void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x) {
int i, j;
int pre_stride = x->pre.uv_stride;
unsigned char *base_pre;
ptr_base = x->pre.y_buffer;
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
/* build uv mvs */
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
int yoffset = i * 8 + j * 2;
int uoffset = 16 + i * 2 + j;
int voffset = 20 + i * 2 + j;
if ((mv_row | mv_col) & 7)
{
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7,
dst_y, dst_ystride);
int temp;
temp = x->block[yoffset].bmi.mv.as_mv.row +
x->block[yoffset + 1].bmi.mv.as_mv.row +
x->block[yoffset + 4].bmi.mv.as_mv.row +
x->block[yoffset + 5].bmi.mv.as_mv.row;
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
temp = x->block[yoffset].bmi.mv.as_mv.col +
x->block[yoffset + 1].bmi.mv.as_mv.col +
x->block[yoffset + 4].bmi.mv.as_mv.col +
x->block[yoffset + 5].bmi.mv.as_mv.col;
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
}
else
{
vp8_copy_mem16x16(ptr, pre_stride, dst_y,
dst_ystride);
}
base_pre = x->pre.u_buffer;
for (i = 16; i < 20; i += 2) {
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i + 1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
else {
vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride,
x->subpixel_predict);
vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride,
x->subpixel_predict);
}
}
base_pre = x->pre.v_buffer;
for (i = 20; i < 24; i += 2) {
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i + 1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
else {
vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride,
x->subpixel_predict);
vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride,
x->subpixel_predict);
}
}
}
static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
{
/* If the MV points so far into the UMV border that no visible pixels
* are used for reconstruction, the subpel part of the MV can be
* discarded and the MV limited to 16 pixels with equivalent results.
*
* This limit kicks in at 19 pixels for the top and left edges, for
* the 16 pixels plus 3 taps right of the central pixel when subpel
* filtering. The bottom and right edges use 16 pixels plus 2 pixels
* left of the central pixel when filtering.
*/
if (mv->col < (xd->mb_to_left_edge - (19 << 3)))
mv->col = xd->mb_to_left_edge - (16 << 3);
else if (mv->col > xd->mb_to_right_edge + (18 << 3))
mv->col = xd->mb_to_right_edge + (16 << 3);
/*encoder only*/
void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, unsigned char *dst_y,
int dst_ystride) {
unsigned char *ptr_base;
unsigned char *ptr;
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int pre_stride = x->pre.y_stride;
if (mv->row < (xd->mb_to_top_edge - (19 << 3)))
mv->row = xd->mb_to_top_edge - (16 << 3);
else if (mv->row > xd->mb_to_bottom_edge + (18 << 3))
mv->row = xd->mb_to_bottom_edge + (16 << 3);
ptr_base = x->pre.y_buffer;
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
if ((mv_row | mv_col) & 7) {
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_y,
dst_ystride);
} else {
vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
}
}
static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
/* If the MV points so far into the UMV border that no visible pixels
* are used for reconstruction, the subpel part of the MV can be
* discarded and the MV limited to 16 pixels with equivalent results.
*
* This limit kicks in at 19 pixels for the top and left edges, for
* the 16 pixels plus 3 taps right of the central pixel when subpel
* filtering. The bottom and right edges use 16 pixels plus 2 pixels
* left of the central pixel when filtering.
*/
if (mv->col < (xd->mb_to_left_edge - (19 << 3)))
mv->col = xd->mb_to_left_edge - (16 << 3);
else if (mv->col > xd->mb_to_right_edge + (18 << 3))
mv->col = xd->mb_to_right_edge + (16 << 3);
if (mv->row < (xd->mb_to_top_edge - (19 << 3)))
mv->row = xd->mb_to_top_edge - (16 << 3);
else if (mv->row > xd->mb_to_bottom_edge + (18 << 3))
mv->row = xd->mb_to_bottom_edge + (16 << 3);
}
/* A version of the above function for chroma block MVs.*/
static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
{
mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ?
(xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col;
mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ?
(xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col;
static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
mv->col = (2 * mv->col < (xd->mb_to_left_edge - (19 << 3)))
? (xd->mb_to_left_edge - (16 << 3)) >> 1
: mv->col;
mv->col = (2 * mv->col > xd->mb_to_right_edge + (18 << 3))
? (xd->mb_to_right_edge + (16 << 3)) >> 1
: mv->col;
mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ?
(xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row;
mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ?
(xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
mv->row = (2 * mv->row < (xd->mb_to_top_edge - (19 << 3)))
? (xd->mb_to_top_edge - (16 << 3)) >> 1
: mv->row;
mv->row = (2 * mv->row > xd->mb_to_bottom_edge + (18 << 3))
? (xd->mb_to_bottom_edge + (16 << 3)) >> 1
: mv->row;
}
void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
unsigned char *dst_y,
void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, unsigned char *dst_y,
unsigned char *dst_u,
unsigned char *dst_v,
int dst_ystride,
int dst_uvstride)
{
int offset;
unsigned char *ptr;
unsigned char *uptr, *vptr;
unsigned char *dst_v, int dst_ystride,
int dst_uvstride) {
int offset;
unsigned char *ptr;
unsigned char *uptr, *vptr;
int_mv _16x16mv;
int_mv _16x16mv;
unsigned char *ptr_base = x->pre.y_buffer;
int pre_stride = x->pre.y_stride;
unsigned char *ptr_base = x->pre.y_buffer;
int pre_stride = x->pre.y_stride;
_16x16mv.as_int = x->mode_info_context->mbmi.mv.as_int;
_16x16mv.as_int = x->mode_info_context->mbmi.mv.as_int;
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
{
clamp_mv_to_umv_border(&_16x16mv.as_mv, x);
}
if (x->mode_info_context->mbmi.need_to_clamp_mvs) {
clamp_mv_to_umv_border(&_16x16mv.as_mv, x);
}
ptr = ptr_base + ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
ptr = ptr_base + (_16x16mv.as_mv.row >> 3) * pre_stride +
(_16x16mv.as_mv.col >> 3);
if ( _16x16mv.as_int & 0x00070007)
{
x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_y, dst_ystride);
}
else
{
vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
}
if (_16x16mv.as_int & 0x00070007) {
x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7,
_16x16mv.as_mv.row & 7, dst_y, dst_ystride);
} else {
vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
}
/* calc uv motion vectors */
_16x16mv.as_mv.row += 1 | (_16x16mv.as_mv.row >> (sizeof(int) * CHAR_BIT - 1));
_16x16mv.as_mv.col += 1 | (_16x16mv.as_mv.col >> (sizeof(int) * CHAR_BIT - 1));
_16x16mv.as_mv.row /= 2;
_16x16mv.as_mv.col /= 2;
_16x16mv.as_mv.row &= x->fullpixel_mask;
_16x16mv.as_mv.col &= x->fullpixel_mask;
/* calc uv motion vectors */
_16x16mv.as_mv.row +=
1 | (_16x16mv.as_mv.row >> (sizeof(int) * CHAR_BIT - 1));
_16x16mv.as_mv.col +=
1 | (_16x16mv.as_mv.col >> (sizeof(int) * CHAR_BIT - 1));
_16x16mv.as_mv.row /= 2;
_16x16mv.as_mv.col /= 2;
_16x16mv.as_mv.row &= x->fullpixel_mask;
_16x16mv.as_mv.col &= x->fullpixel_mask;
pre_stride >>= 1;
offset = ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
uptr = x->pre.u_buffer + offset;
vptr = x->pre.v_buffer + offset;
pre_stride >>= 1;
offset = (_16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
uptr = x->pre.u_buffer + offset;
vptr = x->pre.v_buffer + offset;
if ( _16x16mv.as_int & 0x00070007)
{
x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_u, dst_uvstride);
x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_v, dst_uvstride);
}
else
{
vp8_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride);
vp8_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride);
}
if (_16x16mv.as_int & 0x00070007) {
x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7,
_16x16mv.as_mv.row & 7, dst_u, dst_uvstride);
x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7,
_16x16mv.as_mv.row & 7, dst_v, dst_uvstride);
} else {
vp8_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride);
vp8_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride);
}
}
static void build_inter4x4_predictors_mb(MACROBLOCKD *x)
{
int i;
unsigned char *base_dst = x->dst.y_buffer;
unsigned char *base_pre = x->pre.y_buffer;
static void build_inter4x4_predictors_mb(MACROBLOCKD *x) {
int i;
unsigned char *base_dst = x->dst.y_buffer;
unsigned char *base_pre = x->pre.y_buffer;
if (x->mode_info_context->mbmi.partitioning < 3)
{
BLOCKD *b;
int dst_stride = x->dst.y_stride;
if (x->mode_info_context->mbmi.partitioning < 3) {
BLOCKD *b;
int dst_stride = x->dst.y_stride;
x->block[ 0].bmi = x->mode_info_context->bmi[ 0];
x->block[ 2].bmi = x->mode_info_context->bmi[ 2];
x->block[ 8].bmi = x->mode_info_context->bmi[ 8];
x->block[10].bmi = x->mode_info_context->bmi[10];
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
{
clamp_mv_to_umv_border(&x->block[ 0].bmi.mv.as_mv, x);
clamp_mv_to_umv_border(&x->block[ 2].bmi.mv.as_mv, x);
clamp_mv_to_umv_border(&x->block[ 8].bmi.mv.as_mv, x);
clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x);
}
b = &x->block[ 0];
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
b = &x->block[ 2];
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
b = &x->block[ 8];
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
b = &x->block[10];
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
}
else
{
for (i = 0; i < 16; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
int dst_stride = x->dst.y_stride;
x->block[i+0].bmi = x->mode_info_context->bmi[i+0];
x->block[i+1].bmi = x->mode_info_context->bmi[i+1];
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
{
clamp_mv_to_umv_border(&x->block[i+0].bmi.mv.as_mv, x);
clamp_mv_to_umv_border(&x->block[i+1].bmi.mv.as_mv, x);
}
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
else
{
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
}
}
}
base_dst = x->dst.u_buffer;
base_pre = x->pre.u_buffer;
for (i = 16; i < 20; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
int dst_stride = x->dst.uv_stride;
/* Note: uv mvs already clamped in build_4x4uvmvs() */
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
else
{
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
}
x->block[0].bmi = x->mode_info_context->bmi[0];
x->block[2].bmi = x->mode_info_context->bmi[2];
x->block[8].bmi = x->mode_info_context->bmi[8];
x->block[10].bmi = x->mode_info_context->bmi[10];
if (x->mode_info_context->mbmi.need_to_clamp_mvs) {
clamp_mv_to_umv_border(&x->block[0].bmi.mv.as_mv, x);
clamp_mv_to_umv_border(&x->block[2].bmi.mv.as_mv, x);
clamp_mv_to_umv_border(&x->block[8].bmi.mv.as_mv, x);
clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x);
}
base_dst = x->dst.v_buffer;
base_pre = x->pre.v_buffer;
for (i = 20; i < 24; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
int dst_stride = x->dst.uv_stride;
b = &x->block[0];
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre,
dst_stride);
b = &x->block[2];
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre,
dst_stride);
b = &x->block[8];
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre,
dst_stride);
b = &x->block[10];
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre,
dst_stride);
} else {
for (i = 0; i < 16; i += 2) {
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i + 1];
int dst_stride = x->dst.y_stride;
/* Note: uv mvs already clamped in build_4x4uvmvs() */
x->block[i + 0].bmi = x->mode_info_context->bmi[i + 0];
x->block[i + 1].bmi = x->mode_info_context->bmi[i + 1];
if (x->mode_info_context->mbmi.need_to_clamp_mvs) {
clamp_mv_to_umv_border(&x->block[i + 0].bmi.mv.as_mv, x);
clamp_mv_to_umv_border(&x->block[i + 1].bmi.mv.as_mv, x);
}
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
else
{
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
}
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride,
base_pre, dst_stride);
else {
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride,
base_pre, dst_stride, x->subpixel_predict);
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride,
base_pre, dst_stride, x->subpixel_predict);
}
}
}
base_dst = x->dst.u_buffer;
base_pre = x->pre.u_buffer;
for (i = 16; i < 20; i += 2) {
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i + 1];
int dst_stride = x->dst.uv_stride;
/* Note: uv mvs already clamped in build_4x4uvmvs() */
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride,
base_pre, dst_stride);
else {
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre,
dst_stride, x->subpixel_predict);
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre,
dst_stride, x->subpixel_predict);
}
}
base_dst = x->dst.v_buffer;
base_pre = x->pre.v_buffer;
for (i = 20; i < 24; i += 2) {
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i + 1];
int dst_stride = x->dst.uv_stride;
/* Note: uv mvs already clamped in build_4x4uvmvs() */
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride,
base_pre, dst_stride);
else {
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre,
dst_stride, x->subpixel_predict);
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre,
dst_stride, x->subpixel_predict);
}
}
}
static
void build_4x4uvmvs(MACROBLOCKD *x)
{
int i, j;
static void build_4x4uvmvs(MACROBLOCKD *x) {
int i, j;
for (i = 0; i < 2; i++)
{
for (j = 0; j < 2; j++)
{
int yoffset = i * 8 + j * 2;
int uoffset = 16 + i * 2 + j;
int voffset = 20 + i * 2 + j;
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
int yoffset = i * 8 + j * 2;
int uoffset = 16 + i * 2 + j;
int voffset = 20 + i * 2 + j;
int temp;
int temp;
temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row
+ x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row;
temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row +
x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row +
x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row +
x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row;
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col
+ x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col;
temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col +
x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col +
x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col +
x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col;
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x);
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x);
x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
}
x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
}
}
}
void vp8_build_inter_predictors_mb(MACROBLOCKD *xd)
{
if (xd->mode_info_context->mbmi.mode != SPLITMV)
{
vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.y_stride, xd->dst.uv_stride);
}
else
{
build_4x4uvmvs(xd);
build_inter4x4_predictors_mb(xd);
}
void vp8_build_inter_predictors_mb(MACROBLOCKD *xd) {
if (xd->mode_info_context->mbmi.mode != SPLITMV) {
vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer,
xd->dst.v_buffer, xd->dst.y_stride,
xd->dst.uv_stride);
} else {
build_4x4uvmvs(xd);
build_inter4x4_predictors_mb(xd);
}
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_RECONINTER_H_
#define VP8_COMMON_RECONINTER_H_
@ -17,21 +16,16 @@ extern "C" {
#endif
extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x);
extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
unsigned char *dst_y,
unsigned char *dst_u,
unsigned char *dst_v,
int dst_ystride,
int dst_uvstride);
extern void vp8_build_inter16x16_predictors_mb(
MACROBLOCKD *x, unsigned char *dst_y, unsigned char *dst_u,
unsigned char *dst_v, int dst_ystride, int dst_uvstride);
extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
unsigned char *dst_y,
int dst_ystride);
extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch,
unsigned char *base_pre,
int pre_stride,
vp8_subpix_fn_t sppf);
int pre_stride, vp8_subpix_fn_t sppf);
extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x);
extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x);

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "./vp8_rtcd.h"
@ -19,9 +18,9 @@
#include "vp8/common/reconintra4x4.h"
enum {
SIZE_16,
SIZE_8,
NUM_SIZES,
SIZE_16,
SIZE_8,
NUM_SIZES,
};
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
@ -30,88 +29,68 @@ typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
static intra_pred_fn pred[4][NUM_SIZES];
static intra_pred_fn dc_pred[2][2][NUM_SIZES];
static void vp8_init_intra_predictors_internal(void)
{
#define INIT_SIZE(sz) \
pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \
pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \
pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \
\
dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \
dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \
dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \
dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz
static void vp8_init_intra_predictors_internal(void) {
#define INIT_SIZE(sz) \
pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \
pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \
pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \
\
dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \
dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \
dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \
dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz
INIT_SIZE(16);
INIT_SIZE(8);
vp8_init_intra4x4_predictors_internal();
INIT_SIZE(16);
INIT_SIZE(8);
vp8_init_intra4x4_predictors_internal();
}
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
unsigned char * yabove_row,
unsigned char * yleft,
int left_stride,
unsigned char * ypred_ptr,
int y_stride)
{
MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode;
DECLARE_ALIGNED(16, uint8_t, yleft_col[16]);
int i;
intra_pred_fn fn;
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, unsigned char *yabove_row,
unsigned char *yleft, int left_stride,
unsigned char *ypred_ptr, int y_stride) {
MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode;
DECLARE_ALIGNED(16, uint8_t, yleft_col[16]);
int i;
intra_pred_fn fn;
for (i = 0; i < 16; i++)
{
yleft_col[i] = yleft[i* left_stride];
}
for (i = 0; i < 16; i++) {
yleft_col[i] = yleft[i * left_stride];
}
if (mode == DC_PRED)
{
fn = dc_pred[x->left_available][x->up_available][SIZE_16];
}
else
{
fn = pred[mode][SIZE_16];
}
if (mode == DC_PRED) {
fn = dc_pred[x->left_available][x->up_available][SIZE_16];
} else {
fn = pred[mode][SIZE_16];
}
fn(ypred_ptr, y_stride, yabove_row, yleft_col);
fn(ypred_ptr, y_stride, yabove_row, yleft_col);
}
void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
unsigned char * uleft,
unsigned char * vleft,
int left_stride,
unsigned char * upred_ptr,
unsigned char * vpred_ptr,
int pred_stride)
{
MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode;
unsigned char uleft_col[8];
unsigned char vleft_col[8];
int i;
intra_pred_fn fn;
void vp8_build_intra_predictors_mbuv_s(
MACROBLOCKD *x, unsigned char *uabove_row, unsigned char *vabove_row,
unsigned char *uleft, unsigned char *vleft, int left_stride,
unsigned char *upred_ptr, unsigned char *vpred_ptr, int pred_stride) {
MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode;
unsigned char uleft_col[8];
unsigned char vleft_col[8];
int i;
intra_pred_fn fn;
for (i = 0; i < 8; i++)
{
uleft_col[i] = uleft[i * left_stride];
vleft_col[i] = vleft[i * left_stride];
}
for (i = 0; i < 8; i++) {
uleft_col[i] = uleft[i * left_stride];
vleft_col[i] = vleft[i * left_stride];
}
if (uvmode == DC_PRED)
{
fn = dc_pred[x->left_available][x->up_available][SIZE_8];
}
else
{
fn = pred[uvmode][SIZE_8];
}
if (uvmode == DC_PRED) {
fn = dc_pred[x->left_available][x->up_available][SIZE_8];
} else {
fn = pred[uvmode][SIZE_8];
}
fn(upred_ptr, pred_stride, uabove_row, uleft_col);
fn(vpred_ptr, pred_stride, vabove_row, vleft_col);
fn(upred_ptr, pred_stride, uabove_row, uleft_col);
fn(vpred_ptr, pred_stride, vabove_row, vleft_col);
}
void vp8_init_intra_predictors(void)
{
once(vp8_init_intra_predictors_internal);
void vp8_init_intra_predictors(void) {
once(vp8_init_intra_predictors_internal);
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_RECONINTRA_H_
#define VP8_COMMON_RECONINTRA_H_
@ -18,22 +17,14 @@
extern "C" {
#endif
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
unsigned char *yabove_row,
unsigned char *yleft,
int left_stride,
unsigned char *ypred_ptr,
int y_stride);
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, unsigned char *yabove_row,
unsigned char *yleft, int left_stride,
unsigned char *ypred_ptr, int y_stride);
void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
unsigned char * uleft,
unsigned char * vleft,
int left_stride,
unsigned char * upred_ptr,
unsigned char * vpred_ptr,
int pred_stride);
void vp8_build_intra_predictors_mbuv_s(
MACROBLOCKD *x, unsigned char *uabove_row, unsigned char *vabove_row,
unsigned char *uleft, unsigned char *vleft, int left_stride,
unsigned char *upred_ptr, unsigned char *vpred_ptr, int pred_stride);
void vp8_init_intra_predictors(void);

View File

@ -21,35 +21,32 @@ typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
static intra_pred_fn pred[10];
void vp8_init_intra4x4_predictors_internal(void)
{
pred[B_DC_PRED] = vpx_dc_predictor_4x4;
pred[B_TM_PRED] = vpx_tm_predictor_4x4;
pred[B_VE_PRED] = vpx_ve_predictor_4x4;
pred[B_HE_PRED] = vpx_he_predictor_4x4;
pred[B_LD_PRED] = vpx_d45e_predictor_4x4;
pred[B_RD_PRED] = vpx_d135_predictor_4x4;
pred[B_VR_PRED] = vpx_d117_predictor_4x4;
pred[B_VL_PRED] = vpx_d63f_predictor_4x4;
pred[B_HD_PRED] = vpx_d153_predictor_4x4;
pred[B_HU_PRED] = vpx_d207_predictor_4x4;
void vp8_init_intra4x4_predictors_internal(void) {
pred[B_DC_PRED] = vpx_dc_predictor_4x4;
pred[B_TM_PRED] = vpx_tm_predictor_4x4;
pred[B_VE_PRED] = vpx_ve_predictor_4x4;
pred[B_HE_PRED] = vpx_he_predictor_4x4;
pred[B_LD_PRED] = vpx_d45e_predictor_4x4;
pred[B_RD_PRED] = vpx_d135_predictor_4x4;
pred[B_VR_PRED] = vpx_d117_predictor_4x4;
pred[B_VL_PRED] = vpx_d63f_predictor_4x4;
pred[B_HD_PRED] = vpx_d153_predictor_4x4;
pred[B_HU_PRED] = vpx_d207_predictor_4x4;
}
void vp8_intra4x4_predict(unsigned char *above,
unsigned char *yleft, int left_stride,
B_PREDICTION_MODE b_mode,
void vp8_intra4x4_predict(unsigned char *above, unsigned char *yleft,
int left_stride, B_PREDICTION_MODE b_mode,
unsigned char *dst, int dst_stride,
unsigned char top_left)
{
unsigned char Left[4];
unsigned char Aboveb[12], *Above = Aboveb + 4;
unsigned char top_left) {
unsigned char Left[4];
unsigned char Aboveb[12], *Above = Aboveb + 4;
Left[0] = yleft[0];
Left[1] = yleft[left_stride];
Left[2] = yleft[2 * left_stride];
Left[3] = yleft[3 * left_stride];
memcpy(Above, above, 8);
Above[-1] = top_left;
Left[0] = yleft[0];
Left[1] = yleft[left_stride];
Left[2] = yleft[2 * left_stride];
Left[3] = yleft[3 * left_stride];
memcpy(Above, above, 8);
Above[-1] = top_left;
pred[b_mode](dst, dst_stride, Above, Left);
pred[b_mode](dst, dst_stride, Above, Left);
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_RECONINTRA4X4_H_
#define VP8_COMMON_RECONINTRA4X4_H_
#include "vp8/common/blockd.h"
@ -18,24 +17,22 @@ extern "C" {
#endif
static INLINE void intra_prediction_down_copy(MACROBLOCKD *xd,
unsigned char *above_right_src)
{
int dst_stride = xd->dst.y_stride;
unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16;
unsigned char *above_right_src) {
int dst_stride = xd->dst.y_stride;
unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16;
unsigned int *src_ptr = (unsigned int *)above_right_src;
unsigned int *dst_ptr0 = (unsigned int *)(above_right_dst + 4 * dst_stride);
unsigned int *dst_ptr1 = (unsigned int *)(above_right_dst + 8 * dst_stride);
unsigned int *dst_ptr2 = (unsigned int *)(above_right_dst + 12 * dst_stride);
unsigned int *src_ptr = (unsigned int *)above_right_src;
unsigned int *dst_ptr0 = (unsigned int *)(above_right_dst + 4 * dst_stride);
unsigned int *dst_ptr1 = (unsigned int *)(above_right_dst + 8 * dst_stride);
unsigned int *dst_ptr2 = (unsigned int *)(above_right_dst + 12 * dst_stride);
*dst_ptr0 = *src_ptr;
*dst_ptr1 = *src_ptr;
*dst_ptr2 = *src_ptr;
*dst_ptr0 = *src_ptr;
*dst_ptr1 = *src_ptr;
*dst_ptr2 = *src_ptr;
}
void vp8_intra4x4_predict(unsigned char *Above,
unsigned char *yleft, int left_stride,
B_PREDICTION_MODE b_mode,
void vp8_intra4x4_predict(unsigned char *Above, unsigned char *yleft,
int left_stride, B_PREDICTION_MODE b_mode,
unsigned char *dst, int dst_stride,
unsigned char top_left);

View File

@ -12,8 +12,4 @@
#include "./vp8_rtcd.h"
#include "vpx_ports/vpx_once.h"
void vp8_rtcd()
{
once(setup_rtcd_internal);
}
void vp8_rtcd() { once(setup_rtcd_internal); }

View File

@ -8,32 +8,28 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "setupintrarecon.h"
#include "vpx_mem/vpx_mem.h"
void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
{
int i;
void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf) {
int i;
/* set up frame new frame for intra coded blocks */
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
for (i = 0; i < ybf->y_height; i++)
ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129;
/* set up frame new frame for intra coded blocks */
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
for (i = 0; i < ybf->y_height; i++)
ybf->y_buffer[ybf->y_stride * i - 1] = (unsigned char)129;
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
for (i = 0; i < ybf->uv_height; i++)
ybf->u_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
for (i = 0; i < ybf->uv_height; i++)
ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
for (i = 0; i < ybf->uv_height; i++)
ybf->u_buffer[ybf->uv_stride * i - 1] = (unsigned char)129;
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
for (i = 0; i < ybf->uv_height; i++)
ybf->v_buffer[ybf->uv_stride * i - 1] = (unsigned char)129;
}
void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf)
{
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf) {
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
}

View File

@ -22,20 +22,15 @@ extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf);
static INLINE void setup_intra_recon_left(unsigned char *y_buffer,
unsigned char *u_buffer,
unsigned char *v_buffer,
int y_stride,
int uv_stride)
{
int i;
unsigned char *v_buffer, int y_stride,
int uv_stride) {
int i;
for (i = 0; i < 16; i++)
y_buffer[y_stride *i] = (unsigned char) 129;
for (i = 0; i < 16; i++) y_buffer[y_stride * i] = (unsigned char)129;
for (i = 0; i < 8; i++)
u_buffer[uv_stride *i] = (unsigned char) 129;
for (i = 0; i < 8; i++) u_buffer[uv_stride * i] = (unsigned char)129;
for (i = 0; i < 8; i++)
v_buffer[uv_stride *i] = (unsigned char) 129;
for (i = 0; i < 8; i++) v_buffer[uv_stride * i] = (unsigned char)129;
}
#ifdef __cplusplus

View File

@ -8,27 +8,25 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "swapyv12buffer.h"
void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame)
{
unsigned char *temp;
void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame,
YV12_BUFFER_CONFIG *last_frame) {
unsigned char *temp;
temp = last_frame->buffer_alloc;
last_frame->buffer_alloc = new_frame->buffer_alloc;
new_frame->buffer_alloc = temp;
temp = last_frame->buffer_alloc;
last_frame->buffer_alloc = new_frame->buffer_alloc;
new_frame->buffer_alloc = temp;
temp = last_frame->y_buffer;
last_frame->y_buffer = new_frame->y_buffer;
new_frame->y_buffer = temp;
temp = last_frame->y_buffer;
last_frame->y_buffer = new_frame->y_buffer;
new_frame->y_buffer = temp;
temp = last_frame->u_buffer;
last_frame->u_buffer = new_frame->u_buffer;
new_frame->u_buffer = temp;
temp = last_frame->v_buffer;
last_frame->v_buffer = new_frame->v_buffer;
new_frame->v_buffer = temp;
temp = last_frame->u_buffer;
last_frame->u_buffer = new_frame->u_buffer;
new_frame->u_buffer = temp;
temp = last_frame->v_buffer;
last_frame->v_buffer = new_frame->v_buffer;
new_frame->v_buffer = temp;
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_SWAPYV12BUFFER_H_
#define VP8_COMMON_SWAPYV12BUFFER_H_
@ -18,7 +17,8 @@
extern "C" {
#endif
void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame);
void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame,
YV12_BUFFER_CONFIG *last_frame);
#ifdef __cplusplus
} // extern "C"

View File

@ -10,121 +10,109 @@
#include <stdlib.h>
void vp8_blit_text(const char *msg, unsigned char *address, const int pitch) {
int letter_bitmap;
unsigned char *output_pos = address;
int colpos;
const int font[] = {
0x0, 0x5C00, 0x8020, 0xAFABEA, 0xD7EC0, 0x1111111, 0x1855740,
0x18000, 0x45C0, 0x74400, 0x51140, 0x23880, 0xC4000, 0x21080,
0x80000, 0x111110, 0xE9D72E, 0x87E40, 0x12AD732, 0xAAD62A, 0x4F94C4,
0x4D6B7, 0x456AA, 0x3E8423, 0xAAD6AA, 0xAAD6A2, 0x2800, 0x2A00,
0x8A880, 0x52940, 0x22A20, 0x15422, 0x6AD62E, 0x1E4A53E, 0xAAD6BF,
0x8C62E, 0xE8C63F, 0x118D6BF, 0x1094BF, 0xCAC62E, 0x1F2109F, 0x118FE31,
0xF8C628, 0x8A89F, 0x108421F, 0x1F1105F, 0x1F4105F, 0xE8C62E, 0x2294BF,
0x164C62E, 0x12694BF, 0x8AD6A2, 0x10FC21, 0x1F8421F, 0x744107, 0xF8220F,
0x1151151, 0x117041, 0x119D731, 0x47E0, 0x1041041, 0xFC400, 0x10440,
0x1084210, 0x820
};
colpos = 0;
void vp8_blit_text(const char *msg, unsigned char *address, const int pitch)
{
int letter_bitmap;
unsigned char *output_pos = address;
int colpos;
const int font[] =
{
0x0, 0x5C00, 0x8020, 0xAFABEA, 0xD7EC0, 0x1111111, 0x1855740, 0x18000,
0x45C0, 0x74400, 0x51140, 0x23880, 0xC4000, 0x21080, 0x80000, 0x111110,
0xE9D72E, 0x87E40, 0x12AD732, 0xAAD62A, 0x4F94C4, 0x4D6B7, 0x456AA,
0x3E8423, 0xAAD6AA, 0xAAD6A2, 0x2800, 0x2A00, 0x8A880, 0x52940, 0x22A20,
0x15422, 0x6AD62E, 0x1E4A53E, 0xAAD6BF, 0x8C62E, 0xE8C63F, 0x118D6BF,
0x1094BF, 0xCAC62E, 0x1F2109F, 0x118FE31, 0xF8C628, 0x8A89F, 0x108421F,
0x1F1105F, 0x1F4105F, 0xE8C62E, 0x2294BF, 0x164C62E, 0x12694BF, 0x8AD6A2,
0x10FC21, 0x1F8421F, 0x744107, 0xF8220F, 0x1151151, 0x117041, 0x119D731,
0x47E0, 0x1041041, 0xFC400, 0x10440, 0x1084210, 0x820
};
colpos = 0;
while (msg[colpos] != 0) {
char letter = msg[colpos];
int fontcol, fontrow;
while (msg[colpos] != 0)
{
char letter = msg[colpos];
int fontcol, fontrow;
if (letter <= 'Z' && letter >= ' ')
letter_bitmap = font[letter - ' '];
else if (letter <= 'z' && letter >= 'a')
letter_bitmap = font[letter - 'a' + 'A' - ' '];
else
letter_bitmap = font[0];
if (letter <= 'Z' && letter >= ' ')
letter_bitmap = font[letter-' '];
else if (letter <= 'z' && letter >= 'a')
letter_bitmap = font[letter-'a'+'A' - ' '];
else
letter_bitmap = font[0];
for (fontcol = 6; fontcol >= 0; fontcol--)
for (fontrow = 0; fontrow < 5; fontrow++)
output_pos[fontrow * pitch + fontcol] =
((letter_bitmap >> (fontcol * 5)) & (1 << fontrow) ? 255 : 0);
for (fontcol = 6; fontcol >= 0 ; fontcol--)
for (fontrow = 0; fontrow < 5; fontrow++)
output_pos[fontrow *pitch + fontcol] =
((letter_bitmap >> (fontcol * 5)) & (1 << fontrow) ? 255 : 0);
output_pos += 7;
colpos++;
}
output_pos += 7;
colpos++;
}
}
static void plot (const int x, const int y, unsigned char *image, const int pitch)
{
image [x+y*pitch] ^= 255;
static void plot(const int x, const int y, unsigned char *image,
const int pitch) {
image[x + y * pitch] ^= 255;
}
/* Bresenham line algorithm */
void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch)
{
int steep = abs(y1 - y0) > abs(x1 - x0);
int deltax, deltay;
int error, ystep, y, x;
void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image,
const int pitch) {
int steep = abs(y1 - y0) > abs(x1 - x0);
int deltax, deltay;
int error, ystep, y, x;
if (steep)
{
int t;
t = x0;
x0 = y0;
y0 = t;
if (steep) {
int t;
t = x0;
x0 = y0;
y0 = t;
t = x1;
x1 = y1;
y1 = t;
t = x1;
x1 = y1;
y1 = t;
}
if (x0 > x1) {
int t;
t = x0;
x0 = x1;
x1 = t;
t = y0;
y0 = y1;
y1 = t;
}
deltax = x1 - x0;
deltay = abs(y1 - y0);
error = deltax / 2;
y = y0;
if (y0 < y1)
ystep = 1;
else
ystep = -1;
if (steep) {
for (x = x0; x <= x1; x++) {
plot(y, x, image, pitch);
error = error - deltay;
if (error < 0) {
y = y + ystep;
error = error + deltax;
}
}
} else {
for (x = x0; x <= x1; x++) {
plot(x, y, image, pitch);
if (x0 > x1)
{
int t;
t = x0;
x0 = x1;
x1 = t;
t = y0;
y0 = y1;
y1 = t;
}
deltax = x1 - x0;
deltay = abs(y1 - y0);
error = deltax / 2;
y = y0;
if (y0 < y1)
ystep = 1;
else
ystep = -1;
if (steep)
{
for (x = x0; x <= x1; x++)
{
plot(y,x, image, pitch);
error = error - deltay;
if (error < 0)
{
y = y + ystep;
error = error + deltax;
}
}
}
else
{
for (x = x0; x <= x1; x++)
{
plot(x,y, image, pitch);
error = error - deltay;
if (error < 0)
{
y = y + ystep;
error = error + deltax;
}
}
error = error - deltay;
if (error < 0) {
y = y + ystep;
error = error + deltax;
}
}
}
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_THREADING_H_
#define VP8_COMMON_THREADING_H_
@ -30,10 +29,12 @@ extern "C" {
#define THREAD_SPECIFIC_INDEX DWORD
#define pthread_t HANDLE
#define pthread_attr_t DWORD
#define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread)
#define pthread_detach(thread) \
if (thread != NULL) CloseHandle(thread)
#define thread_sleep(nms) Sleep(nms)
#define pthread_cancel(thread) terminate_thread(thread,0)
#define ts_key_create(ts_key, destructor) {ts_key = TlsAlloc();};
#define pthread_cancel(thread) terminate_thread(thread, 0)
#define ts_key_create(ts_key, destructor) \
{ ts_key = TlsAlloc(); };
#define pthread_getspecific(ts_key) TlsGetValue(ts_key)
#define pthread_setspecific(ts_key, value) TlsSetValue(ts_key, (void *)value)
#define pthread_self() GetCurrentThreadId()
@ -53,9 +54,9 @@ extern "C" {
#define thread_sleep(nms) DosSleep(nms)
#define pthread_cancel(thread) DosKillThread(thread)
#define ts_key_create(ts_key, destructor) \
DosAllocThreadLocalMemory(1, &(ts_key));
DosAllocThreadLocalMemory(1, &(ts_key));
#define pthread_getspecific(ts_key) ((void *)(*(ts_key)))
#define pthread_setspecific(ts_key, value) (*(ts_key)=(ULONG)(value))
#define pthread_setspecific(ts_key, value) (*(ts_key) = (ULONG)(value))
#define pthread_self() _gettid()
#else
#ifdef __APPLE__
@ -75,85 +76,82 @@ extern "C" {
#define THREAD_FUNCTION void *
#define THREAD_FUNCTION_RETURN void *
#define THREAD_SPECIFIC_INDEX pthread_key_t
#define ts_key_create(ts_key, destructor) pthread_key_create (&(ts_key), destructor);
#define ts_key_create(ts_key, destructor) \
pthread_key_create(&(ts_key), destructor);
#endif
/* Synchronization macros: Win32 and Pthreads */
#if defined(_WIN32) && !HAVE_PTHREAD_H
#define sem_t HANDLE
#define pause(voidpara) __asm PAUSE
#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL)
#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE))
#define sem_post(sem) ReleaseSemaphore(*sem,1,NULL)
#define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE)
#define sem_init(sem, sem_attr1, sem_init_value) \
(int)((*sem = CreateSemaphore(NULL, 0, 32768, NULL)) == NULL)
#define sem_wait(sem) \
(int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem, INFINITE))
#define sem_post(sem) ReleaseSemaphore(*sem, 1, NULL)
#define sem_destroy(sem) \
if (*sem) ((int)(CloseHandle(*sem)) == TRUE)
#define thread_sleep(nms) Sleep(nms)
#elif defined(__OS2__)
typedef struct
{
HEV event;
HMTX wait_mutex;
HMTX count_mutex;
int count;
typedef struct {
HEV event;
HMTX wait_mutex;
HMTX count_mutex;
int count;
} sem_t;
static inline int sem_init(sem_t *sem, int pshared, unsigned int value)
{
DosCreateEventSem(NULL, &sem->event, pshared ? DC_SEM_SHARED : 0,
value > 0 ? TRUE : FALSE);
DosCreateMutexSem(NULL, &sem->wait_mutex, 0, FALSE);
DosCreateMutexSem(NULL, &sem->count_mutex, 0, FALSE);
static inline int sem_init(sem_t *sem, int pshared, unsigned int value) {
DosCreateEventSem(NULL, &sem->event, pshared ? DC_SEM_SHARED : 0,
value > 0 ? TRUE : FALSE);
DosCreateMutexSem(NULL, &sem->wait_mutex, 0, FALSE);
DosCreateMutexSem(NULL, &sem->count_mutex, 0, FALSE);
sem->count = value;
sem->count = value;
return 0;
return 0;
}
static inline int sem_wait(sem_t * sem)
{
DosRequestMutexSem(sem->wait_mutex, -1);
static inline int sem_wait(sem_t *sem) {
DosRequestMutexSem(sem->wait_mutex, -1);
DosWaitEventSem(sem->event, -1);
DosWaitEventSem(sem->event, -1);
DosRequestMutexSem(sem->count_mutex, -1);
DosRequestMutexSem(sem->count_mutex, -1);
sem->count--;
if (sem->count == 0)
{
ULONG post_count;
sem->count--;
if (sem->count == 0) {
ULONG post_count;
DosResetEventSem(sem->event, &post_count);
}
DosResetEventSem(sem->event, &post_count);
}
DosReleaseMutexSem(sem->count_mutex);
DosReleaseMutexSem(sem->count_mutex);
DosReleaseMutexSem(sem->wait_mutex);
DosReleaseMutexSem(sem->wait_mutex);
return 0;
return 0;
}
static inline int sem_post(sem_t * sem)
{
DosRequestMutexSem(sem->count_mutex, -1);
static inline int sem_post(sem_t *sem) {
DosRequestMutexSem(sem->count_mutex, -1);
if (sem->count < 32768)
{
sem->count++;
DosPostEventSem(sem->event);
}
if (sem->count < 32768) {
sem->count++;
DosPostEventSem(sem->event);
}
DosReleaseMutexSem(sem->count_mutex);
DosReleaseMutexSem(sem->count_mutex);
return 0;
return 0;
}
static inline int sem_destroy(sem_t * sem)
{
DosCloseEventSem(sem->event);
DosCloseMutexSem(sem->wait_mutex);
DosCloseMutexSem(sem->count_mutex);
static inline int sem_destroy(sem_t *sem) {
DosCloseEventSem(sem->event);
DosCloseMutexSem(sem->wait_mutex);
DosCloseMutexSem(sem->count_mutex);
return 0;
return 0;
}
#define thread_sleep(nms) DosSleep(nms)
@ -162,15 +160,20 @@ static inline int sem_destroy(sem_t * sem)
#ifdef __APPLE__
#define sem_t semaphore_t
#define sem_init(X,Y,Z) semaphore_create(mach_task_self(), X, SYNC_POLICY_FIFO, Z)
#define sem_wait(sem) (semaphore_wait(*sem) )
#define sem_init(X, Y, Z) \
semaphore_create(mach_task_self(), X, SYNC_POLICY_FIFO, Z)
#define sem_wait(sem) (semaphore_wait(*sem))
#define sem_post(sem) semaphore_signal(*sem)
#define sem_destroy(sem) semaphore_destroy(mach_task_self(),*sem)
#define thread_sleep(nms) /* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
#define sem_destroy(sem) semaphore_destroy(mach_task_self(), *sem)
#define thread_sleep(nms)
/* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec =
1000*nms;nanosleep(&ts, NULL);} */
#else
#include <unistd.h>
#include <sched.h>
#define thread_sleep(nms) sched_yield();/* {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
#define thread_sleep(nms) sched_yield();
/* {struct timespec ts;ts.tv_sec=0;
ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
#endif
/* Not Windows. Assume pthreads */
@ -185,42 +188,41 @@ static inline int sem_destroy(sem_t * sem)
#include "vpx_util/vpx_thread.h"
static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
const int kMaxTryLocks = 4000;
int locked = 0;
int i;
const int kMaxTryLocks = 4000;
int locked = 0;
int i;
for (i = 0; i < kMaxTryLocks; ++i) {
if (!pthread_mutex_trylock(mutex)) {
locked = 1;
break;
}
for (i = 0; i < kMaxTryLocks; ++i) {
if (!pthread_mutex_trylock(mutex)) {
locked = 1;
break;
}
}
if (!locked)
pthread_mutex_lock(mutex);
if (!locked) pthread_mutex_lock(mutex);
}
static INLINE int protected_read(pthread_mutex_t *const mutex, const int *p) {
int ret;
mutex_lock(mutex);
ret = *p;
pthread_mutex_unlock(mutex);
return ret;
int ret;
mutex_lock(mutex);
ret = *p;
pthread_mutex_unlock(mutex);
return ret;
}
static INLINE void sync_read(pthread_mutex_t *const mutex, int mb_col,
const int *last_row_current_mb_col,
const int nsync) {
while (mb_col > (protected_read(mutex, last_row_current_mb_col) - nsync)) {
x86_pause_hint();
thread_sleep(0);
}
while (mb_col > (protected_read(mutex, last_row_current_mb_col) - nsync)) {
x86_pause_hint();
thread_sleep(0);
}
}
static INLINE void protected_write(pthread_mutex_t *mutex, int *p, int v) {
mutex_lock(mutex);
*p = v;
pthread_mutex_unlock(mutex);
mutex_lock(mutex);
*p = v;
pthread_mutex_unlock(mutex);
}
#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#if CONFIG_DEBUG
#include <assert.h>
#endif
@ -16,128 +15,95 @@
#include "treecoder.h"
static void tree2tok(
struct vp8_token_struct *const p,
vp8_tree t,
int i,
int v,
int L
)
{
v += v;
++L;
static void tree2tok(struct vp8_token_struct *const p, vp8_tree t, int i, int v,
int L) {
v += v;
++L;
do
{
const vp8_tree_index j = t[i++];
do {
const vp8_tree_index j = t[i++];
if (j <= 0)
{
p[-j].value = v;
p[-j].Len = L;
}
else
tree2tok(p, t, j, v, L);
}
while (++v & 1);
if (j <= 0) {
p[-j].value = v;
p[-j].Len = L;
} else
tree2tok(p, t, j, v, L);
} while (++v & 1);
}
void vp8_tokens_from_tree(struct vp8_token_struct *p, vp8_tree t)
{
tree2tok(p, t, 0, 0, 0);
void vp8_tokens_from_tree(struct vp8_token_struct *p, vp8_tree t) {
tree2tok(p, t, 0, 0, 0);
}
void vp8_tokens_from_tree_offset(struct vp8_token_struct *p, vp8_tree t,
int offset)
{
tree2tok(p - offset, t, 0, 0, 0);
int offset) {
tree2tok(p - offset, t, 0, 0, 0);
}
static void branch_counts(
int n, /* n = size of alphabet */
vp8_token tok [ /* n */ ],
vp8_tree tree,
unsigned int branch_ct [ /* n-1 */ ] [2],
const unsigned int num_events[ /* n */ ]
)
{
const int tree_len = n - 1;
int t = 0;
static void branch_counts(int n, /* n = size of alphabet */
vp8_token tok[/* n */], vp8_tree tree,
unsigned int branch_ct[/* n-1 */][2],
const unsigned int num_events[/* n */]) {
const int tree_len = n - 1;
int t = 0;
#if CONFIG_DEBUG
assert(tree_len);
assert(tree_len);
#endif
do
{
branch_ct[t][0] = branch_ct[t][1] = 0;
}
while (++t < tree_len);
do {
branch_ct[t][0] = branch_ct[t][1] = 0;
} while (++t < tree_len);
t = 0;
t = 0;
do
{
int L = tok[t].Len;
const int enc = tok[t].value;
const unsigned int ct = num_events[t];
do {
int L = tok[t].Len;
const int enc = tok[t].value;
const unsigned int ct = num_events[t];
vp8_tree_index i = 0;
vp8_tree_index i = 0;
do
{
const int b = (enc >> --L) & 1;
const int j = i >> 1;
do {
const int b = (enc >> --L) & 1;
const int j = i >> 1;
#if CONFIG_DEBUG
assert(j < tree_len && 0 <= L);
assert(j < tree_len && 0 <= L);
#endif
branch_ct [j] [b] += ct;
i = tree[ i + b];
}
while (i > 0);
branch_ct[j][b] += ct;
i = tree[i + b];
} while (i > 0);
#if CONFIG_DEBUG
assert(!L);
assert(!L);
#endif
}
while (++t < n);
} while (++t < n);
}
void vp8_tree_probs_from_distribution(int n, /* n = size of alphabet */
vp8_token tok[/* n */], vp8_tree tree,
vp8_prob probs[/* n-1 */],
unsigned int branch_ct[/* n-1 */][2],
const unsigned int num_events[/* n */],
unsigned int Pfac, int rd) {
const int tree_len = n - 1;
int t = 0;
void vp8_tree_probs_from_distribution(
int n, /* n = size of alphabet */
vp8_token tok [ /* n */ ],
vp8_tree tree,
vp8_prob probs [ /* n-1 */ ],
unsigned int branch_ct [ /* n-1 */ ] [2],
const unsigned int num_events[ /* n */ ],
unsigned int Pfac,
int rd
)
{
const int tree_len = n - 1;
int t = 0;
branch_counts(n, tok, tree, branch_ct, num_events);
branch_counts(n, tok, tree, branch_ct, num_events);
do
{
const unsigned int *const c = branch_ct[t];
const unsigned int tot = c[0] + c[1];
do {
const unsigned int *const c = branch_ct[t];
const unsigned int tot = c[0] + c[1];
#if CONFIG_DEBUG
assert(tot < (1 << 24)); /* no overflow below */
assert(tot < (1 << 24)); /* no overflow below */
#endif
if (tot)
{
const unsigned int p = ((c[0] * Pfac) + (rd ? tot >> 1 : 0)) / tot;
probs[t] = p < 256 ? (p ? p : 1) : 255; /* agree w/old version for now */
}
else
probs[t] = vp8_prob_half;
}
while (++t < tree_len);
if (tot) {
const unsigned int p = ((c[0] * Pfac) + (rd ? tot >> 1 : 0)) / tot;
probs[t] = p < 256 ? (p ? p : 1) : 255; /* agree w/old version for now */
} else
probs[t] = vp8_prob_half;
} while (++t < tree_len);
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_TREECODER_H_
#define VP8_COMMON_TREECODER_H_
@ -18,10 +17,9 @@ extern "C" {
typedef unsigned char vp8bc_index_t; /* probability index */
typedef unsigned char vp8_prob;
#define vp8_prob_half ( (vp8_prob) 128)
#define vp8_prob_half ((vp8_prob)128)
typedef signed char vp8_tree_index;
struct bool_coder_spec;
@ -34,10 +32,7 @@ typedef const bool_coder_spec c_bool_coder_spec;
typedef const bool_writer c_bool_writer;
typedef const bool_reader c_bool_reader;
# define vp8_complement( x) (255 - x)
#define vp8_complement(x) (255 - x)
/* We build coding trees compactly in arrays.
Each node of the tree is a pair of vp8_tree_indices.
@ -48,11 +43,9 @@ typedef const bool_reader c_bool_reader;
typedef const vp8_tree_index vp8_tree[], *vp8_tree_p;
typedef const struct vp8_token_struct
{
int value;
int Len;
typedef const struct vp8_token_struct {
int value;
int Len;
} vp8_token;
/* Construct encoding array from tree. */
@ -61,35 +54,26 @@ void vp8_tokens_from_tree(struct vp8_token_struct *, vp8_tree);
void vp8_tokens_from_tree_offset(struct vp8_token_struct *, vp8_tree,
int offset);
/* Convert array of token occurrence counts into a table of probabilities
for the associated binary encoding tree. Also writes count of branches
taken for each node on the tree; this facilitiates decisions as to
probability updates. */
void vp8_tree_probs_from_distribution(
int n, /* n = size of alphabet */
vp8_token tok [ /* n */ ],
vp8_tree tree,
vp8_prob probs [ /* n-1 */ ],
unsigned int branch_ct [ /* n-1 */ ] [2],
const unsigned int num_events[ /* n */ ],
unsigned int Pfactor,
int Round
);
void vp8_tree_probs_from_distribution(int n, /* n = size of alphabet */
vp8_token tok[/* n */], vp8_tree tree,
vp8_prob probs[/* n-1 */],
unsigned int branch_ct[/* n-1 */][2],
const unsigned int num_events[/* n */],
unsigned int Pfactor, int Round);
/* Variant of above using coder spec rather than hardwired 8-bit probs. */
void vp8bc_tree_probs_from_distribution(
int n, /* n = size of alphabet */
vp8_token tok [ /* n */ ],
vp8_tree tree,
vp8_prob probs [ /* n-1 */ ],
unsigned int branch_ct [ /* n-1 */ ] [2],
const unsigned int num_events[ /* n */ ],
c_bool_coder_spec *s
);
void vp8bc_tree_probs_from_distribution(int n, /* n = size of alphabet */
vp8_token tok[/* n */], vp8_tree tree,
vp8_prob probs[/* n-1 */],
unsigned int branch_ct[/* n-1 */][2],
const unsigned int num_events[/* n */],
c_bool_coder_spec *s);
#ifdef __cplusplus
} // extern "C"

View File

@ -17,235 +17,153 @@ extern "C" {
/*Generated file, included by entropymode.c*/
const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES] =
{
{ 0, 1 },
{ 2, 2 },
{ 6, 3 },
{ 28, 5 },
{ 30, 5 },
{ 58, 6 },
{ 59, 6 },
{ 62, 6 },
{ 126, 7 },
{ 127, 7 }
const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES] = {
{ 0, 1 }, { 2, 2 }, { 6, 3 }, { 28, 5 }, { 30, 5 },
{ 58, 6 }, { 59, 6 }, { 62, 6 }, { 126, 7 }, { 127, 7 }
};
const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES] =
{
{ 0, 1 },
{ 4, 3 },
{ 5, 3 },
{ 6, 3 },
{ 7, 3 }
const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES] = {
{ 0, 1 }, { 4, 3 }, { 5, 3 }, { 6, 3 }, { 7, 3 }
};
const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES] =
{
{ 4, 3 },
{ 5, 3 },
{ 6, 3 },
{ 7, 3 },
{ 0, 1 }
const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES] = {
{ 4, 3 }, { 5, 3 }, { 6, 3 }, { 7, 3 }, { 0, 1 }
};
const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES] =
{
{ 0, 1 },
{ 2, 2 },
{ 6, 3 },
{ 7, 3 }
const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES] = {
{ 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
};
const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS] =
{
{ 6, 3 },
{ 7, 3 },
{ 2, 2 },
{ 0, 1 }
const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS] = {
{ 6, 3 }, { 7, 3 }, { 2, 2 }, { 0, 1 }
};
const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS] =
{
{ 2, 2 },
{ 6, 3 },
{ 0, 1 },
{ 14, 4 },
{ 15, 4 }
const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS] = {
{ 2, 2 }, { 6, 3 }, { 0, 1 }, { 14, 4 }, { 15, 4 }
};
const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS] =
{
{ 0, 1 },
{ 2, 2 },
{ 6, 3 },
{ 7, 3 }
const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS] = {
{ 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
};
const struct vp8_token_struct vp8_small_mvencodings[8] =
{
{ 0, 3 },
{ 1, 3 },
{ 2, 3 },
{ 3, 3 },
{ 4, 3 },
{ 5, 3 },
{ 6, 3 },
{ 7, 3 }
const struct vp8_token_struct vp8_small_mvencodings[8] = {
{ 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 }, { 4, 3 }, { 5, 3 }, { 6, 3 }, { 7, 3 }
};
const vp8_prob vp8_ymode_prob[VP8_YMODES-1] =
{
112, 86, 140, 37
};
const vp8_prob vp8_ymode_prob[VP8_YMODES - 1] = { 112, 86, 140, 37 };
const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1] =
{
145, 156, 163, 128
};
const vp8_prob vp8_kf_ymode_prob[VP8_YMODES - 1] = { 145, 156, 163, 128 };
const vp8_prob vp8_uv_mode_prob[VP8_UV_MODES-1] =
{
162, 101, 204
};
const vp8_prob vp8_uv_mode_prob[VP8_UV_MODES - 1] = { 162, 101, 204 };
const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1] =
{
142, 114, 183
};
const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES - 1] = { 142, 114, 183 };
const vp8_prob vp8_bmode_prob[VP8_BINTRAMODES-1] =
{
120, 90, 79, 133, 87, 85, 80, 111, 151
};
const vp8_prob vp8_bmode_prob[VP8_BINTRAMODES - 1] = { 120, 90, 79, 133, 87,
85, 80, 111, 151 };
const vp8_prob vp8_kf_bmode_prob
[VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1] =
{
{
{ 231, 120, 48, 89, 115, 113, 120, 152, 112 },
{ 152, 179, 64, 126, 170, 118, 46, 70, 95 },
{ 175, 69, 143, 80, 85, 82, 72, 155, 103 },
{ 56, 58, 10, 171, 218, 189, 17, 13, 152 },
{ 144, 71, 10, 38, 171, 213, 144, 34, 26 },
{ 114, 26, 17, 163, 44, 195, 21, 10, 173 },
{ 121, 24, 80, 195, 26, 62, 44, 64, 85 },
{ 170, 46, 55, 19, 136, 160, 33, 206, 71 },
{ 63, 20, 8, 114, 114, 208, 12, 9, 226 },
{ 81, 40, 11, 96, 182, 84, 29, 16, 36 }
},
{
{ 134, 183, 89, 137, 98, 101, 106, 165, 148 },
{ 72, 187, 100, 130, 157, 111, 32, 75, 80 },
{ 66, 102, 167, 99, 74, 62, 40, 234, 128 },
{ 41, 53, 9, 178, 241, 141, 26, 8, 107 },
{ 104, 79, 12, 27, 217, 255, 87, 17, 7 },
{ 74, 43, 26, 146, 73, 166, 49, 23, 157 },
{ 65, 38, 105, 160, 51, 52, 31, 115, 128 },
{ 87, 68, 71, 44, 114, 51, 15, 186, 23 },
{ 47, 41, 14, 110, 182, 183, 21, 17, 194 },
{ 66, 45, 25, 102, 197, 189, 23, 18, 22 }
},
{
{ 88, 88, 147, 150, 42, 46, 45, 196, 205 },
{ 43, 97, 183, 117, 85, 38, 35, 179, 61 },
{ 39, 53, 200, 87, 26, 21, 43, 232, 171 },
{ 56, 34, 51, 104, 114, 102, 29, 93, 77 },
{ 107, 54, 32, 26, 51, 1, 81, 43, 31 },
{ 39, 28, 85, 171, 58, 165, 90, 98, 64 },
{ 34, 22, 116, 206, 23, 34, 43, 166, 73 },
{ 68, 25, 106, 22, 64, 171, 36, 225, 114 },
{ 34, 19, 21, 102, 132, 188, 16, 76, 124 },
{ 62, 18, 78, 95, 85, 57, 50, 48, 51 }
},
{
{ 193, 101, 35, 159, 215, 111, 89, 46, 111 },
{ 60, 148, 31, 172, 219, 228, 21, 18, 111 },
{ 112, 113, 77, 85, 179, 255, 38, 120, 114 },
{ 40, 42, 1, 196, 245, 209, 10, 25, 109 },
{ 100, 80, 8, 43, 154, 1, 51, 26, 71 },
{ 88, 43, 29, 140, 166, 213, 37, 43, 154 },
{ 61, 63, 30, 155, 67, 45, 68, 1, 209 },
{ 142, 78, 78, 16, 255, 128, 34, 197, 171 },
{ 41, 40, 5, 102, 211, 183, 4, 1, 221 },
{ 51, 50, 17, 168, 209, 192, 23, 25, 82 }
},
{
{ 125, 98, 42, 88, 104, 85, 117, 175, 82 },
{ 95, 84, 53, 89, 128, 100, 113, 101, 45 },
{ 75, 79, 123, 47, 51, 128, 81, 171, 1 },
{ 57, 17, 5, 71, 102, 57, 53, 41, 49 },
{ 115, 21, 2, 10, 102, 255, 166, 23, 6 },
{ 38, 33, 13, 121, 57, 73, 26, 1, 85 },
{ 41, 10, 67, 138, 77, 110, 90, 47, 114 },
{ 101, 29, 16, 10, 85, 128, 101, 196, 26 },
{ 57, 18, 10, 102, 102, 213, 34, 20, 43 },
{ 117, 20, 15, 36, 163, 128, 68, 1, 26 }
},
{
{ 138, 31, 36, 171, 27, 166, 38, 44, 229 },
{ 67, 87, 58, 169, 82, 115, 26, 59, 179 },
{ 63, 59, 90, 180, 59, 166, 93, 73, 154 },
{ 40, 40, 21, 116, 143, 209, 34, 39, 175 },
{ 57, 46, 22, 24, 128, 1, 54, 17, 37 },
{ 47, 15, 16, 183, 34, 223, 49, 45, 183 },
{ 46, 17, 33, 183, 6, 98, 15, 32, 183 },
{ 65, 32, 73, 115, 28, 128, 23, 128, 205 },
{ 40, 3, 9, 115, 51, 192, 18, 6, 223 },
{ 87, 37, 9, 115, 59, 77, 64, 21, 47 }
},
{
{ 104, 55, 44, 218, 9, 54, 53, 130, 226 },
{ 64, 90, 70, 205, 40, 41, 23, 26, 57 },
{ 54, 57, 112, 184, 5, 41, 38, 166, 213 },
{ 30, 34, 26, 133, 152, 116, 10, 32, 134 },
{ 75, 32, 12, 51, 192, 255, 160, 43, 51 },
{ 39, 19, 53, 221, 26, 114, 32, 73, 255 },
{ 31, 9, 65, 234, 2, 15, 1, 118, 73 },
{ 88, 31, 35, 67, 102, 85, 55, 186, 85 },
{ 56, 21, 23, 111, 59, 205, 45, 37, 192 },
{ 55, 38, 70, 124, 73, 102, 1, 34, 98 }
},
{
{ 102, 61, 71, 37, 34, 53, 31, 243, 192 },
{ 69, 60, 71, 38, 73, 119, 28, 222, 37 },
{ 68, 45, 128, 34, 1, 47, 11, 245, 171 },
{ 62, 17, 19, 70, 146, 85, 55, 62, 70 },
{ 75, 15, 9, 9, 64, 255, 184, 119, 16 },
{ 37, 43, 37, 154, 100, 163, 85, 160, 1 },
{ 63, 9, 92, 136, 28, 64, 32, 201, 85 },
{ 86, 6, 28, 5, 64, 255, 25, 248, 1 },
{ 56, 8, 17, 132, 137, 255, 55, 116, 128 },
{ 58, 15, 20, 82, 135, 57, 26, 121, 40 }
},
{
{ 164, 50, 31, 137, 154, 133, 25, 35, 218 },
{ 51, 103, 44, 131, 131, 123, 31, 6, 158 },
{ 86, 40, 64, 135, 148, 224, 45, 183, 128 },
{ 22, 26, 17, 131, 240, 154, 14, 1, 209 },
{ 83, 12, 13, 54, 192, 255, 68, 47, 28 },
{ 45, 16, 21, 91, 64, 222, 7, 1, 197 },
{ 56, 21, 39, 155, 60, 138, 23, 102, 213 },
{ 85, 26, 85, 85, 128, 128, 32, 146, 171 },
{ 18, 11, 7, 63, 144, 171, 4, 4, 246 },
{ 35, 27, 10, 146, 174, 171, 12, 26, 128 }
},
{
{ 190, 80, 35, 99, 180, 80, 126, 54, 45 },
{ 85, 126, 47, 87, 176, 51, 41, 20, 32 },
{ 101, 75, 128, 139, 118, 146, 116, 128, 85 },
{ 56, 41, 15, 176, 236, 85, 37, 9, 62 },
{ 146, 36, 19, 30, 171, 255, 97, 27, 20 },
{ 71, 30, 17, 119, 118, 255, 17, 18, 138 },
{ 101, 38, 60, 138, 55, 70, 43, 26, 142 },
{ 138, 45, 61, 62, 219, 1, 81, 188, 64 },
{ 32, 41, 20, 117, 151, 142, 20, 21, 163 },
{ 112, 19, 12, 61, 195, 128, 48, 4, 24 }
}
};
const vp8_prob
vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES - 1] = {
{ { 231, 120, 48, 89, 115, 113, 120, 152, 112 },
{ 152, 179, 64, 126, 170, 118, 46, 70, 95 },
{ 175, 69, 143, 80, 85, 82, 72, 155, 103 },
{ 56, 58, 10, 171, 218, 189, 17, 13, 152 },
{ 144, 71, 10, 38, 171, 213, 144, 34, 26 },
{ 114, 26, 17, 163, 44, 195, 21, 10, 173 },
{ 121, 24, 80, 195, 26, 62, 44, 64, 85 },
{ 170, 46, 55, 19, 136, 160, 33, 206, 71 },
{ 63, 20, 8, 114, 114, 208, 12, 9, 226 },
{ 81, 40, 11, 96, 182, 84, 29, 16, 36 } },
{ { 134, 183, 89, 137, 98, 101, 106, 165, 148 },
{ 72, 187, 100, 130, 157, 111, 32, 75, 80 },
{ 66, 102, 167, 99, 74, 62, 40, 234, 128 },
{ 41, 53, 9, 178, 241, 141, 26, 8, 107 },
{ 104, 79, 12, 27, 217, 255, 87, 17, 7 },
{ 74, 43, 26, 146, 73, 166, 49, 23, 157 },
{ 65, 38, 105, 160, 51, 52, 31, 115, 128 },
{ 87, 68, 71, 44, 114, 51, 15, 186, 23 },
{ 47, 41, 14, 110, 182, 183, 21, 17, 194 },
{ 66, 45, 25, 102, 197, 189, 23, 18, 22 } },
{ { 88, 88, 147, 150, 42, 46, 45, 196, 205 },
{ 43, 97, 183, 117, 85, 38, 35, 179, 61 },
{ 39, 53, 200, 87, 26, 21, 43, 232, 171 },
{ 56, 34, 51, 104, 114, 102, 29, 93, 77 },
{ 107, 54, 32, 26, 51, 1, 81, 43, 31 },
{ 39, 28, 85, 171, 58, 165, 90, 98, 64 },
{ 34, 22, 116, 206, 23, 34, 43, 166, 73 },
{ 68, 25, 106, 22, 64, 171, 36, 225, 114 },
{ 34, 19, 21, 102, 132, 188, 16, 76, 124 },
{ 62, 18, 78, 95, 85, 57, 50, 48, 51 } },
{ { 193, 101, 35, 159, 215, 111, 89, 46, 111 },
{ 60, 148, 31, 172, 219, 228, 21, 18, 111 },
{ 112, 113, 77, 85, 179, 255, 38, 120, 114 },
{ 40, 42, 1, 196, 245, 209, 10, 25, 109 },
{ 100, 80, 8, 43, 154, 1, 51, 26, 71 },
{ 88, 43, 29, 140, 166, 213, 37, 43, 154 },
{ 61, 63, 30, 155, 67, 45, 68, 1, 209 },
{ 142, 78, 78, 16, 255, 128, 34, 197, 171 },
{ 41, 40, 5, 102, 211, 183, 4, 1, 221 },
{ 51, 50, 17, 168, 209, 192, 23, 25, 82 } },
{ { 125, 98, 42, 88, 104, 85, 117, 175, 82 },
{ 95, 84, 53, 89, 128, 100, 113, 101, 45 },
{ 75, 79, 123, 47, 51, 128, 81, 171, 1 },
{ 57, 17, 5, 71, 102, 57, 53, 41, 49 },
{ 115, 21, 2, 10, 102, 255, 166, 23, 6 },
{ 38, 33, 13, 121, 57, 73, 26, 1, 85 },
{ 41, 10, 67, 138, 77, 110, 90, 47, 114 },
{ 101, 29, 16, 10, 85, 128, 101, 196, 26 },
{ 57, 18, 10, 102, 102, 213, 34, 20, 43 },
{ 117, 20, 15, 36, 163, 128, 68, 1, 26 } },
{ { 138, 31, 36, 171, 27, 166, 38, 44, 229 },
{ 67, 87, 58, 169, 82, 115, 26, 59, 179 },
{ 63, 59, 90, 180, 59, 166, 93, 73, 154 },
{ 40, 40, 21, 116, 143, 209, 34, 39, 175 },
{ 57, 46, 22, 24, 128, 1, 54, 17, 37 },
{ 47, 15, 16, 183, 34, 223, 49, 45, 183 },
{ 46, 17, 33, 183, 6, 98, 15, 32, 183 },
{ 65, 32, 73, 115, 28, 128, 23, 128, 205 },
{ 40, 3, 9, 115, 51, 192, 18, 6, 223 },
{ 87, 37, 9, 115, 59, 77, 64, 21, 47 } },
{ { 104, 55, 44, 218, 9, 54, 53, 130, 226 },
{ 64, 90, 70, 205, 40, 41, 23, 26, 57 },
{ 54, 57, 112, 184, 5, 41, 38, 166, 213 },
{ 30, 34, 26, 133, 152, 116, 10, 32, 134 },
{ 75, 32, 12, 51, 192, 255, 160, 43, 51 },
{ 39, 19, 53, 221, 26, 114, 32, 73, 255 },
{ 31, 9, 65, 234, 2, 15, 1, 118, 73 },
{ 88, 31, 35, 67, 102, 85, 55, 186, 85 },
{ 56, 21, 23, 111, 59, 205, 45, 37, 192 },
{ 55, 38, 70, 124, 73, 102, 1, 34, 98 } },
{ { 102, 61, 71, 37, 34, 53, 31, 243, 192 },
{ 69, 60, 71, 38, 73, 119, 28, 222, 37 },
{ 68, 45, 128, 34, 1, 47, 11, 245, 171 },
{ 62, 17, 19, 70, 146, 85, 55, 62, 70 },
{ 75, 15, 9, 9, 64, 255, 184, 119, 16 },
{ 37, 43, 37, 154, 100, 163, 85, 160, 1 },
{ 63, 9, 92, 136, 28, 64, 32, 201, 85 },
{ 86, 6, 28, 5, 64, 255, 25, 248, 1 },
{ 56, 8, 17, 132, 137, 255, 55, 116, 128 },
{ 58, 15, 20, 82, 135, 57, 26, 121, 40 } },
{ { 164, 50, 31, 137, 154, 133, 25, 35, 218 },
{ 51, 103, 44, 131, 131, 123, 31, 6, 158 },
{ 86, 40, 64, 135, 148, 224, 45, 183, 128 },
{ 22, 26, 17, 131, 240, 154, 14, 1, 209 },
{ 83, 12, 13, 54, 192, 255, 68, 47, 28 },
{ 45, 16, 21, 91, 64, 222, 7, 1, 197 },
{ 56, 21, 39, 155, 60, 138, 23, 102, 213 },
{ 85, 26, 85, 85, 128, 128, 32, 146, 171 },
{ 18, 11, 7, 63, 144, 171, 4, 4, 246 },
{ 35, 27, 10, 146, 174, 171, 12, 26, 128 } },
{ { 190, 80, 35, 99, 180, 80, 126, 54, 45 },
{ 85, 126, 47, 87, 176, 51, 41, 20, 32 },
{ 101, 75, 128, 139, 118, 146, 116, 128, 85 },
{ 56, 41, 15, 176, 236, 85, 37, 9, 62 },
{ 146, 36, 19, 30, 171, 255, 97, 27, 20 },
{ 71, 30, 17, 119, 118, 255, 17, 18, 138 },
{ 101, 38, 60, 138, 55, 70, 43, 26, 142 },
{ 138, 45, 61, 62, 219, 1, 81, 188, 64 },
{ 32, 41, 20, 117, 151, 142, 20, 21, 163 },
{ 112, 19, 12, 61, 195, 128, 48, 4, 24 } }
};
#ifdef __cplusplus
} // extern "C"

File diff suppressed because it is too large Load Diff

View File

@ -10,26 +10,20 @@
#include "vp8/common/x86/filter_x86.h"
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) =
{
{ 128, 128, 128, 128, 0, 0, 0, 0 },
{ 112, 112, 112, 112, 16, 16, 16, 16 },
{ 96, 96, 96, 96, 32, 32, 32, 32 },
{ 80, 80, 80, 80, 48, 48, 48, 48 },
{ 64, 64, 64, 64, 64, 64, 64, 64 },
{ 48, 48, 48, 48, 80, 80, 80, 80 },
{ 32, 32, 32, 32, 96, 96, 96, 96 },
{ 16, 16, 16, 16, 112, 112, 112, 112 }
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) = {
{ 128, 128, 128, 128, 0, 0, 0, 0 }, { 112, 112, 112, 112, 16, 16, 16, 16 },
{ 96, 96, 96, 96, 32, 32, 32, 32 }, { 80, 80, 80, 80, 48, 48, 48, 48 },
{ 64, 64, 64, 64, 64, 64, 64, 64 }, { 48, 48, 48, 48, 80, 80, 80, 80 },
{ 32, 32, 32, 32, 96, 96, 96, 96 }, { 16, 16, 16, 16, 112, 112, 112, 112 }
};
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]) =
{
{ 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 },
{ 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
{ 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
{ 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
{ 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
{ 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 }
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]) = {
{ 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 },
{ 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
{ 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
{ 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
{ 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
{ 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 }
};

View File

@ -15,114 +15,97 @@
extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC)
{
short *sq = (short *) d->qcoeff;
short *dq = (short *) d->dqcoeff;
void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC) {
short *sq = (short *)d->qcoeff;
short *dq = (short *)d->dqcoeff;
vp8_dequantize_b_impl_mmx(sq, dq, DQC);
vp8_dequantize_b_impl_mmx(sq, dq, DQC);
}
void vp8_dequant_idct_add_y_block_mmx
(short *q, short *dq,
unsigned char *dst, int stride, char *eobs)
{
int i;
void vp8_dequant_idct_add_y_block_mmx(short *q, short *dq, unsigned char *dst,
int stride, char *eobs) {
int i;
for (i = 0; i < 4; i++)
{
if (eobs[0] > 1)
vp8_dequant_idct_add_mmx (q, dq, dst, stride);
else if (eobs[0] == 1)
{
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dst, stride, dst, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
if (eobs[1] > 1)
vp8_dequant_idct_add_mmx (q+16, dq, dst+4, stride);
else if (eobs[1] == 1)
{
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dst+4, stride,
dst+4, stride);
memset(q + 16, 0, 2 * sizeof(q[0]));
}
if (eobs[2] > 1)
vp8_dequant_idct_add_mmx (q+32, dq, dst+8, stride);
else if (eobs[2] == 1)
{
vp8_dc_only_idct_add_mmx (q[32]*dq[0], dst+8, stride,
dst+8, stride);
memset(q + 32, 0, 2 * sizeof(q[0]));
}
if (eobs[3] > 1)
vp8_dequant_idct_add_mmx (q+48, dq, dst+12, stride);
else if (eobs[3] == 1)
{
vp8_dc_only_idct_add_mmx (q[48]*dq[0], dst+12, stride,
dst+12, stride);
memset(q + 48, 0, 2 * sizeof(q[0]));
}
q += 64;
dst += 4*stride;
eobs += 4;
for (i = 0; i < 4; i++) {
if (eobs[0] > 1)
vp8_dequant_idct_add_mmx(q, dq, dst, stride);
else if (eobs[0] == 1) {
vp8_dc_only_idct_add_mmx(q[0] * dq[0], dst, stride, dst, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
if (eobs[1] > 1)
vp8_dequant_idct_add_mmx(q + 16, dq, dst + 4, stride);
else if (eobs[1] == 1) {
vp8_dc_only_idct_add_mmx(q[16] * dq[0], dst + 4, stride, dst + 4, stride);
memset(q + 16, 0, 2 * sizeof(q[0]));
}
if (eobs[2] > 1)
vp8_dequant_idct_add_mmx(q + 32, dq, dst + 8, stride);
else if (eobs[2] == 1) {
vp8_dc_only_idct_add_mmx(q[32] * dq[0], dst + 8, stride, dst + 8, stride);
memset(q + 32, 0, 2 * sizeof(q[0]));
}
if (eobs[3] > 1)
vp8_dequant_idct_add_mmx(q + 48, dq, dst + 12, stride);
else if (eobs[3] == 1) {
vp8_dc_only_idct_add_mmx(q[48] * dq[0], dst + 12, stride, dst + 12,
stride);
memset(q + 48, 0, 2 * sizeof(q[0]));
}
q += 64;
dst += 4 * stride;
eobs += 4;
}
}
void vp8_dequant_idct_add_uv_block_mmx
(short *q, short *dq,
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
{
int i;
void vp8_dequant_idct_add_uv_block_mmx(short *q, short *dq, unsigned char *dstu,
unsigned char *dstv, int stride,
char *eobs) {
int i;
for (i = 0; i < 2; i++)
{
if (eobs[0] > 1)
vp8_dequant_idct_add_mmx (q, dq, dstu, stride);
else if (eobs[0] == 1)
{
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstu, stride, dstu, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
if (eobs[1] > 1)
vp8_dequant_idct_add_mmx (q+16, dq, dstu+4, stride);
else if (eobs[1] == 1)
{
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstu+4, stride,
dstu+4, stride);
memset(q + 16, 0, 2 * sizeof(q[0]));
}
q += 32;
dstu += 4*stride;
eobs += 2;
for (i = 0; i < 2; i++) {
if (eobs[0] > 1)
vp8_dequant_idct_add_mmx(q, dq, dstu, stride);
else if (eobs[0] == 1) {
vp8_dc_only_idct_add_mmx(q[0] * dq[0], dstu, stride, dstu, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
for (i = 0; i < 2; i++)
{
if (eobs[0] > 1)
vp8_dequant_idct_add_mmx (q, dq, dstv, stride);
else if (eobs[0] == 1)
{
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstv, stride, dstv, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
if (eobs[1] > 1)
vp8_dequant_idct_add_mmx (q+16, dq, dstv+4, stride);
else if (eobs[1] == 1)
{
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstv+4, stride,
dstv+4, stride);
memset(q + 16, 0, 2 * sizeof(q[0]));
}
q += 32;
dstv += 4*stride;
eobs += 2;
if (eobs[1] > 1)
vp8_dequant_idct_add_mmx(q + 16, dq, dstu + 4, stride);
else if (eobs[1] == 1) {
vp8_dc_only_idct_add_mmx(q[16] * dq[0], dstu + 4, stride, dstu + 4,
stride);
memset(q + 16, 0, 2 * sizeof(q[0]));
}
q += 32;
dstu += 4 * stride;
eobs += 2;
}
for (i = 0; i < 2; i++) {
if (eobs[0] > 1)
vp8_dequant_idct_add_mmx(q, dq, dstv, stride);
else if (eobs[0] == 1) {
vp8_dc_only_idct_add_mmx(q[0] * dq[0], dstv, stride, dstv, stride);
memset(q, 0, 2 * sizeof(q[0]));
}
if (eobs[1] > 1)
vp8_dequant_idct_add_mmx(q + 16, dq, dstv + 4, stride);
else if (eobs[1] == 1) {
vp8_dc_only_idct_add_mmx(q[16] * dq[0], dstv + 4, stride, dstv + 4,
stride);
memset(q + 16, 0, 2 * sizeof(q[0]));
}
q += 32;
dstv += 4 * stride;
eobs += 2;
}
}

View File

@ -11,79 +11,68 @@
#include "vpx_config.h"
#include "vp8_rtcd.h"
void vp8_idct_dequant_0_2x_sse2
(short *q, short *dq ,
unsigned char *dst, int dst_stride);
void vp8_idct_dequant_full_2x_sse2
(short *q, short *dq ,
unsigned char *dst, int dst_stride);
void vp8_idct_dequant_0_2x_sse2(short *q, short *dq, unsigned char *dst,
int dst_stride);
void vp8_idct_dequant_full_2x_sse2(short *q, short *dq, unsigned char *dst,
int dst_stride);
void vp8_dequant_idct_add_y_block_sse2
(short *q, short *dq,
unsigned char *dst, int stride, char *eobs)
{
int i;
void vp8_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *dst,
int stride, char *eobs) {
int i;
for (i = 0; i < 4; i++)
{
if (((short *)(eobs))[0])
{
if (((short *)(eobs))[0] & 0xfefe)
vp8_idct_dequant_full_2x_sse2 (q, dq, dst, stride);
else
vp8_idct_dequant_0_2x_sse2 (q, dq, dst, stride);
}
if (((short *)(eobs))[1])
{
if (((short *)(eobs))[1] & 0xfefe)
vp8_idct_dequant_full_2x_sse2 (q+32, dq, dst+8, stride);
else
vp8_idct_dequant_0_2x_sse2 (q+32, dq, dst+8, stride);
}
q += 64;
dst += stride*4;
eobs += 4;
}
}
void vp8_dequant_idct_add_uv_block_sse2
(short *q, short *dq,
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
{
if (((short *)(eobs))[0])
{
if (((short *)(eobs))[0] & 0xfefe)
vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride);
else
vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride);
}
q += 32;
dstu += stride*4;
if (((short *)(eobs))[1])
{
if (((short *)(eobs))[1] & 0xfefe)
vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride);
else
vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride);
}
q += 32;
if (((short *)(eobs))[2])
{
if (((short *)(eobs))[2] & 0xfefe)
vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride);
else
vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride);
}
q += 32;
dstv += stride*4;
if (((short *)(eobs))[3])
{
if (((short *)(eobs))[3] & 0xfefe)
vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride);
for (i = 0; i < 4; i++) {
if (((short *)(eobs))[0]) {
if (((short *)(eobs))[0] & 0xfefe)
vp8_idct_dequant_full_2x_sse2(q, dq, dst, stride);
else
vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride);
vp8_idct_dequant_0_2x_sse2(q, dq, dst, stride);
}
if (((short *)(eobs))[1]) {
if (((short *)(eobs))[1] & 0xfefe)
vp8_idct_dequant_full_2x_sse2(q + 32, dq, dst + 8, stride);
else
vp8_idct_dequant_0_2x_sse2(q + 32, dq, dst + 8, stride);
}
q += 64;
dst += stride * 4;
eobs += 4;
}
}
void vp8_dequant_idct_add_uv_block_sse2(short *q, short *dq,
unsigned char *dstu,
unsigned char *dstv, int stride,
char *eobs) {
if (((short *)(eobs))[0]) {
if (((short *)(eobs))[0] & 0xfefe)
vp8_idct_dequant_full_2x_sse2(q, dq, dstu, stride);
else
vp8_idct_dequant_0_2x_sse2(q, dq, dstu, stride);
}
q += 32;
dstu += stride * 4;
if (((short *)(eobs))[1]) {
if (((short *)(eobs))[1] & 0xfefe)
vp8_idct_dequant_full_2x_sse2(q, dq, dstu, stride);
else
vp8_idct_dequant_0_2x_sse2(q, dq, dstu, stride);
}
q += 32;
if (((short *)(eobs))[2]) {
if (((short *)(eobs))[2] & 0xfefe)
vp8_idct_dequant_full_2x_sse2(q, dq, dstv, stride);
else
vp8_idct_dequant_0_2x_sse2(q, dq, dstv, stride);
}
q += 32;
dstv += stride * 4;
if (((short *)(eobs))[3]) {
if (((short *)(eobs))[3] & 0xfefe)
vp8_idct_dequant_full_2x_sse2(q, dq, dstv, stride);
else
vp8_idct_dequant_0_2x_sse2(q, dq, dstv, stride);
}
}

View File

@ -8,20 +8,19 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8/common/loopfilter.h"
#define prototype_loopfilter(sym) \
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
const unsigned char *limit, const unsigned char *thresh, int count)
#define prototype_loopfilter(sym) \
void sym(unsigned char *src, int pitch, const unsigned char *blimit, \
const unsigned char *limit, const unsigned char *thresh, int count)
#define prototype_loopfilter_nc(sym) \
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
const unsigned char *limit, const unsigned char *thresh)
#define prototype_loopfilter_nc(sym) \
void sym(unsigned char *src, int pitch, const unsigned char *blimit, \
const unsigned char *limit, const unsigned char *thresh)
#define prototype_simple_loopfilter(sym) \
void sym(unsigned char *y, int ystride, const unsigned char *blimit)
void sym(unsigned char *y, int ystride, const unsigned char *blimit)
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx);
@ -47,152 +46,178 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
#if HAVE_MMX
/* Horizontal MB filtering */
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim,
lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim,
lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim,
lfi->lim, lfi->hev_thr, 1);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim,
lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim,
lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim,
lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride,
lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (u_ptr)
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride,
lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride,
lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit);
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit) {
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride,
blimit);
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride,
blimit);
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride,
blimit);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim,
lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim,
lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim,
lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (u_ptr)
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim,
lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim,
lfi->hev_thr, 1);
}
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit);
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit) {
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit);
}
#endif
/* Horizontal MB filtering */
#if HAVE_SSE2
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr);
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim,
lfi->hev_thr);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim,
lfi->lim, lfi->hev_thr, v_ptr);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr);
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim,
lfi->hev_thr);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim,
lfi->lim, lfi->hev_thr, v_ptr);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
#if ARCH_X86_64
vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr,
2);
#else
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride,
lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride,
lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride,
lfi->blim, lfi->lim, lfi->hev_thr);
#endif
if (u_ptr)
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride);
if (u_ptr)
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride,
lfi->blim, lfi->lim, lfi->hev_thr,
v_ptr + 4 * uv_stride);
}
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit);
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit) {
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride,
blimit);
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride,
blimit);
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride,
blimit);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
#if ARCH_X86_64
vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr,
2);
#else
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim,
lfi->hev_thr);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim,
lfi->hev_thr);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim,
lfi->hev_thr);
#endif
if (u_ptr)
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4);
if (u_ptr)
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim,
lfi->lim, lfi->hev_thr, v_ptr + 4);
}
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit);
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit) {
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit);
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -8,70 +8,61 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "dboolhuff.h"
#include "vp8/common/common.h"
#include "vpx_dsp/vpx_dsp_common.h"
int vp8dx_start_decode(BOOL_DECODER *br,
const unsigned char *source,
unsigned int source_sz,
vpx_decrypt_cb decrypt_cb,
void *decrypt_state)
{
br->user_buffer_end = source+source_sz;
br->user_buffer = source;
br->value = 0;
br->count = -8;
br->range = 255;
br->decrypt_cb = decrypt_cb;
br->decrypt_state = decrypt_state;
int vp8dx_start_decode(BOOL_DECODER *br, const unsigned char *source,
unsigned int source_sz, vpx_decrypt_cb decrypt_cb,
void *decrypt_state) {
br->user_buffer_end = source + source_sz;
br->user_buffer = source;
br->value = 0;
br->count = -8;
br->range = 255;
br->decrypt_cb = decrypt_cb;
br->decrypt_state = decrypt_state;
if (source_sz && !source)
return 1;
if (source_sz && !source) return 1;
/* Populate the buffer */
vp8dx_bool_decoder_fill(br);
/* Populate the buffer */
vp8dx_bool_decoder_fill(br);
return 0;
return 0;
}
void vp8dx_bool_decoder_fill(BOOL_DECODER *br)
{
const unsigned char *bufptr = br->user_buffer;
VP8_BD_VALUE value = br->value;
int count = br->count;
int shift = VP8_BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT);
size_t bytes_left = br->user_buffer_end - bufptr;
size_t bits_left = bytes_left * CHAR_BIT;
int x = shift + CHAR_BIT - (int)bits_left;
int loop_end = 0;
unsigned char decrypted[sizeof(VP8_BD_VALUE) + 1];
void vp8dx_bool_decoder_fill(BOOL_DECODER *br) {
const unsigned char *bufptr = br->user_buffer;
VP8_BD_VALUE value = br->value;
int count = br->count;
int shift = VP8_BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT);
size_t bytes_left = br->user_buffer_end - bufptr;
size_t bits_left = bytes_left * CHAR_BIT;
int x = shift + CHAR_BIT - (int)bits_left;
int loop_end = 0;
unsigned char decrypted[sizeof(VP8_BD_VALUE) + 1];
if (br->decrypt_cb) {
size_t n = VPXMIN(sizeof(decrypted), bytes_left);
br->decrypt_cb(br->decrypt_state, bufptr, decrypted, (int)n);
bufptr = decrypted;
if (br->decrypt_cb) {
size_t n = VPXMIN(sizeof(decrypted), bytes_left);
br->decrypt_cb(br->decrypt_state, bufptr, decrypted, (int)n);
bufptr = decrypted;
}
if (x >= 0) {
count += VP8_LOTS_OF_BITS;
loop_end = x;
}
if (x < 0 || bits_left) {
while (shift >= loop_end) {
count += CHAR_BIT;
value |= (VP8_BD_VALUE)*bufptr << shift;
++bufptr;
++br->user_buffer;
shift -= CHAR_BIT;
}
}
if(x >= 0)
{
count += VP8_LOTS_OF_BITS;
loop_end = x;
}
if (x < 0 || bits_left)
{
while(shift >= loop_end)
{
count += CHAR_BIT;
value |= (VP8_BD_VALUE)*bufptr << shift;
++bufptr;
++br->user_buffer;
shift -= CHAR_BIT;
}
}
br->value = value;
br->count = count;
br->value = value;
br->count = count;
}

Some files were not shown because too many files have changed in this diff Show More