vp8: apply clang-format
Change-Id: I7605b6678014a5426ceb45c27b54885e0c4e06ed
This commit is contained in:
parent
65daa41378
commit
81a6739533
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "alloccommon.h"
|
||||
#include "blockd.h"
|
||||
@ -18,176 +17,166 @@
|
||||
#include "entropymode.h"
|
||||
#include "systemdependent.h"
|
||||
|
||||
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
|
||||
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) {
|
||||
int i;
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
|
||||
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
|
||||
#if CONFIG_POSTPROC
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
|
||||
if (oci->post_proc_buffer_int_used)
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
|
||||
if (oci->post_proc_buffer_int_used)
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
|
||||
|
||||
vpx_free(oci->pp_limits_buffer);
|
||||
oci->pp_limits_buffer = NULL;
|
||||
vpx_free(oci->pp_limits_buffer);
|
||||
oci->pp_limits_buffer = NULL;
|
||||
|
||||
vpx_free(oci->postproc_state.generated_noise);
|
||||
oci->postproc_state.generated_noise = NULL;
|
||||
vpx_free(oci->postproc_state.generated_noise);
|
||||
oci->postproc_state.generated_noise = NULL;
|
||||
#endif
|
||||
|
||||
vpx_free(oci->above_context);
|
||||
vpx_free(oci->mip);
|
||||
vpx_free(oci->above_context);
|
||||
vpx_free(oci->mip);
|
||||
#if CONFIG_ERROR_CONCEALMENT
|
||||
vpx_free(oci->prev_mip);
|
||||
oci->prev_mip = NULL;
|
||||
vpx_free(oci->prev_mip);
|
||||
oci->prev_mip = NULL;
|
||||
#endif
|
||||
|
||||
oci->above_context = NULL;
|
||||
oci->mip = NULL;
|
||||
oci->above_context = NULL;
|
||||
oci->mip = NULL;
|
||||
}
|
||||
|
||||
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
||||
{
|
||||
int i;
|
||||
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) {
|
||||
int i;
|
||||
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
|
||||
/* our internal buffers are always multiples of 16 */
|
||||
if ((width & 0xf) != 0)
|
||||
width += 16 - (width & 0xf);
|
||||
/* our internal buffers are always multiples of 16 */
|
||||
if ((width & 0xf) != 0) width += 16 - (width & 0xf);
|
||||
|
||||
if ((height & 0xf) != 0)
|
||||
height += 16 - (height & 0xf);
|
||||
if ((height & 0xf) != 0) height += 16 - (height & 0xf);
|
||||
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++) {
|
||||
oci->fb_idx_ref_cnt[i] = 0;
|
||||
oci->yv12_fb[i].flags = 0;
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height,
|
||||
VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
{
|
||||
oci->fb_idx_ref_cnt[i] = 0;
|
||||
oci->yv12_fb[i].flags = 0;
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
}
|
||||
oci->new_fb_idx = 0;
|
||||
oci->lst_fb_idx = 1;
|
||||
oci->gld_fb_idx = 2;
|
||||
oci->alt_fb_idx = 3;
|
||||
|
||||
oci->new_fb_idx = 0;
|
||||
oci->lst_fb_idx = 1;
|
||||
oci->gld_fb_idx = 2;
|
||||
oci->alt_fb_idx = 3;
|
||||
oci->fb_idx_ref_cnt[0] = 1;
|
||||
oci->fb_idx_ref_cnt[1] = 1;
|
||||
oci->fb_idx_ref_cnt[2] = 1;
|
||||
oci->fb_idx_ref_cnt[3] = 1;
|
||||
|
||||
oci->fb_idx_ref_cnt[0] = 1;
|
||||
oci->fb_idx_ref_cnt[1] = 1;
|
||||
oci->fb_idx_ref_cnt[2] = 1;
|
||||
oci->fb_idx_ref_cnt[3] = 1;
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16,
|
||||
VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
oci->mb_rows = height >> 4;
|
||||
oci->mb_cols = width >> 4;
|
||||
oci->MBs = oci->mb_rows * oci->mb_cols;
|
||||
oci->mode_info_stride = oci->mb_cols + 1;
|
||||
oci->mip =
|
||||
vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
|
||||
|
||||
oci->mb_rows = height >> 4;
|
||||
oci->mb_cols = width >> 4;
|
||||
oci->MBs = oci->mb_rows * oci->mb_cols;
|
||||
oci->mode_info_stride = oci->mb_cols + 1;
|
||||
oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
|
||||
if (!oci->mip) goto allocation_fail;
|
||||
|
||||
if (!oci->mip)
|
||||
goto allocation_fail;
|
||||
oci->mi = oci->mip + oci->mode_info_stride + 1;
|
||||
|
||||
oci->mi = oci->mip + oci->mode_info_stride + 1;
|
||||
/* Allocation of previous mode info will be done in vp8_decode_frame()
|
||||
* as it is a decoder only data */
|
||||
|
||||
/* Allocation of previous mode info will be done in vp8_decode_frame()
|
||||
* as it is a decoder only data */
|
||||
oci->above_context =
|
||||
vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
|
||||
|
||||
oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
|
||||
|
||||
if (!oci->above_context)
|
||||
goto allocation_fail;
|
||||
if (!oci->above_context) goto allocation_fail;
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height,
|
||||
VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->post_proc_buffer_int_used = 0;
|
||||
memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
|
||||
memset(oci->post_proc_buffer.buffer_alloc, 128,
|
||||
oci->post_proc_buffer.frame_size);
|
||||
oci->post_proc_buffer_int_used = 0;
|
||||
memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
|
||||
memset(oci->post_proc_buffer.buffer_alloc, 128,
|
||||
oci->post_proc_buffer.frame_size);
|
||||
|
||||
/* Allocate buffer to store post-processing filter coefficients.
|
||||
*
|
||||
* Note: Round up mb_cols to support SIMD reads
|
||||
*/
|
||||
oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1));
|
||||
if (!oci->pp_limits_buffer)
|
||||
goto allocation_fail;
|
||||
/* Allocate buffer to store post-processing filter coefficients.
|
||||
*
|
||||
* Note: Round up mb_cols to support SIMD reads
|
||||
*/
|
||||
oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1));
|
||||
if (!oci->pp_limits_buffer) goto allocation_fail;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
|
||||
allocation_fail:
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return 1;
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void vp8_setup_version(VP8_COMMON *cm)
|
||||
{
|
||||
switch (cm->version)
|
||||
{
|
||||
void vp8_setup_version(VP8_COMMON *cm) {
|
||||
switch (cm->version) {
|
||||
case 0:
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 1:
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = SIMPLE_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = SIMPLE_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 2:
|
||||
cm->no_lpf = 1;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
cm->no_lpf = 1;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 3:
|
||||
cm->no_lpf = 1;
|
||||
cm->filter_type = SIMPLE_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 1;
|
||||
break;
|
||||
cm->no_lpf = 1;
|
||||
cm->filter_type = SIMPLE_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 1;
|
||||
break;
|
||||
default:
|
||||
/*4,5,6,7 are reserved for future use*/
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
}
|
||||
/*4,5,6,7 are reserved for future use*/
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
void vp8_create_common(VP8_COMMON *oci)
|
||||
{
|
||||
vp8_machine_specific_config(oci);
|
||||
void vp8_create_common(VP8_COMMON *oci) {
|
||||
vp8_machine_specific_config(oci);
|
||||
|
||||
vp8_init_mbmode_probs(oci);
|
||||
vp8_default_bmode_probs(oci->fc.bmode_prob);
|
||||
vp8_init_mbmode_probs(oci);
|
||||
vp8_default_bmode_probs(oci->fc.bmode_prob);
|
||||
|
||||
oci->mb_no_coeff_skip = 1;
|
||||
oci->no_lpf = 0;
|
||||
oci->filter_type = NORMAL_LOOPFILTER;
|
||||
oci->use_bilinear_mc_filter = 0;
|
||||
oci->full_pixel = 0;
|
||||
oci->multi_token_partition = ONE_PARTITION;
|
||||
oci->clamp_type = RECON_CLAMP_REQUIRED;
|
||||
oci->mb_no_coeff_skip = 1;
|
||||
oci->no_lpf = 0;
|
||||
oci->filter_type = NORMAL_LOOPFILTER;
|
||||
oci->use_bilinear_mc_filter = 0;
|
||||
oci->full_pixel = 0;
|
||||
oci->multi_token_partition = ONE_PARTITION;
|
||||
oci->clamp_type = RECON_CLAMP_REQUIRED;
|
||||
|
||||
/* Initialize reference frame sign bias structure to defaults */
|
||||
memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
|
||||
/* Initialize reference frame sign bias structure to defaults */
|
||||
memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
|
||||
|
||||
/* Default disable buffer to buffer copying */
|
||||
oci->copy_buffer_to_gf = 0;
|
||||
oci->copy_buffer_to_arf = 0;
|
||||
/* Default disable buffer to buffer copying */
|
||||
oci->copy_buffer_to_gf = 0;
|
||||
oci->copy_buffer_to_arf = 0;
|
||||
}
|
||||
|
||||
void vp8_remove_common(VP8_COMMON *oci)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
}
|
||||
void vp8_remove_common(VP8_COMMON *oci) { vp8_de_alloc_frame_buffers(oci); }
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ALLOCCOMMON_H_
|
||||
#define VP8_COMMON_ALLOCCOMMON_H_
|
||||
|
||||
|
@ -11,105 +11,90 @@
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
void vp8_dequant_idct_add_y_block_v6(short *q, short *dq, unsigned char *dst,
|
||||
int stride, char *eobs) {
|
||||
int i;
|
||||
|
||||
void vp8_dequant_idct_add_y_block_v6(short *q, short *dq,
|
||||
unsigned char *dst,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6 (q, dq, dst, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dst, stride, dst, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+16, dq, dst+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dst+4, stride, dst+4, stride);
|
||||
((int *)(q+16))[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[2] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+32, dq, dst+8, stride);
|
||||
else if (eobs[2] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[32]*dq[0], dst+8, stride, dst+8, stride);
|
||||
((int *)(q+32))[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[3] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+48, dq, dst+12, stride);
|
||||
else if (eobs[3] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[48]*dq[0], dst+12, stride,dst+12,stride);
|
||||
((int *)(q+48))[0] = 0;
|
||||
}
|
||||
|
||||
q += 64;
|
||||
dst += 4*stride;
|
||||
eobs += 4;
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6(q, dq, dst, stride);
|
||||
else if (eobs[0] == 1) {
|
||||
vp8_dc_only_idct_add_v6(q[0] * dq[0], dst, stride, dst, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6(q + 16, dq, dst + 4, stride);
|
||||
else if (eobs[1] == 1) {
|
||||
vp8_dc_only_idct_add_v6(q[16] * dq[0], dst + 4, stride, dst + 4, stride);
|
||||
((int *)(q + 16))[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[2] > 1)
|
||||
vp8_dequant_idct_add_v6(q + 32, dq, dst + 8, stride);
|
||||
else if (eobs[2] == 1) {
|
||||
vp8_dc_only_idct_add_v6(q[32] * dq[0], dst + 8, stride, dst + 8, stride);
|
||||
((int *)(q + 32))[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[3] > 1)
|
||||
vp8_dequant_idct_add_v6(q + 48, dq, dst + 12, stride);
|
||||
else if (eobs[3] == 1) {
|
||||
vp8_dc_only_idct_add_v6(q[48] * dq[0], dst + 12, stride, dst + 12,
|
||||
stride);
|
||||
((int *)(q + 48))[0] = 0;
|
||||
}
|
||||
|
||||
q += 64;
|
||||
dst += 4 * stride;
|
||||
eobs += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_v6(short *q, short *dq,
|
||||
unsigned char *dstu,
|
||||
unsigned char *dstv,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
void vp8_dequant_idct_add_uv_block_v6(short *q, short *dq, unsigned char *dstu,
|
||||
unsigned char *dstv, int stride,
|
||||
char *eobs) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6 (q, dq, dstu, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+16, dq, dstu+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstu+4, stride,
|
||||
dstu+4, stride);
|
||||
((int *)(q+16))[0] = 0;
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += 4*stride;
|
||||
eobs += 2;
|
||||
for (i = 0; i < 2; i++) {
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6(q, dq, dstu, stride);
|
||||
else if (eobs[0] == 1) {
|
||||
vp8_dc_only_idct_add_v6(q[0] * dq[0], dstu, stride, dstu, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6 (q, dq, dstv, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+16, dq, dstv+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstv+4, stride,
|
||||
dstv+4, stride);
|
||||
((int *)(q+16))[0] = 0;
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += 4*stride;
|
||||
eobs += 2;
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6(q + 16, dq, dstu + 4, stride);
|
||||
else if (eobs[1] == 1) {
|
||||
vp8_dc_only_idct_add_v6(q[16] * dq[0], dstu + 4, stride, dstu + 4,
|
||||
stride);
|
||||
((int *)(q + 16))[0] = 0;
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += 4 * stride;
|
||||
eobs += 2;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6(q, dq, dstv, stride);
|
||||
else if (eobs[0] == 1) {
|
||||
vp8_dc_only_idct_add_v6(q[0] * dq[0], dstv, stride, dstv, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6(q + 16, dq, dstv + 4, stride);
|
||||
else if (eobs[1] == 1) {
|
||||
vp8_dc_only_idct_add_v6(q[16] * dq[0], dstv + 4, stride, dstv + 4,
|
||||
stride);
|
||||
((int *)(q + 16))[0] = 0;
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += 4 * stride;
|
||||
eobs += 2;
|
||||
}
|
||||
}
|
||||
|
@ -14,100 +14,74 @@
|
||||
#include "vp8/common/filter.h"
|
||||
#include "bilinearfilter_arm.h"
|
||||
|
||||
void vp8_filter_block2d_bil_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
unsigned short FData[36*16]; /* Temp data buffer used in filtering */
|
||||
void vp8_filter_block2d_bil_armv6(unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch, const short *HFilter,
|
||||
const short *VFilter, int Width, int Height) {
|
||||
unsigned short FData[36 * 16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1,
|
||||
Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height,
|
||||
Width, VFilter);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict4x4_armv6(unsigned char *src_ptr,
|
||||
int src_pixels_per_line, int xoffset,
|
||||
int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
void vp8_bilinear_predict4x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch,
|
||||
HFilter, VFilter, 4, 4);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
void vp8_bilinear_predict8x8_armv6(unsigned char *src_ptr,
|
||||
int src_pixels_per_line, int xoffset,
|
||||
int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch,
|
||||
HFilter, VFilter, 8, 8);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
void vp8_bilinear_predict8x4_armv6(unsigned char *src_ptr,
|
||||
int src_pixels_per_line, int xoffset,
|
||||
int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch,
|
||||
HFilter, VFilter, 8, 4);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
void vp8_bilinear_predict16x16_armv6(unsigned char *src_ptr,
|
||||
int src_pixels_per_line, int xoffset,
|
||||
int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch,
|
||||
HFilter, VFilter, 16, 16);
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
|
||||
#define VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
|
||||
|
||||
@ -16,25 +15,14 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern void vp8_filter_block2d_bil_first_pass_armv6
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block2d_bil_first_pass_armv6(
|
||||
const unsigned char *src_ptr, unsigned short *dst_ptr,
|
||||
unsigned int src_pitch, unsigned int height, unsigned int width,
|
||||
const short *vp8_filter);
|
||||
|
||||
extern void vp8_filter_block2d_bil_second_pass_armv6
|
||||
(
|
||||
const unsigned short *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block2d_bil_second_pass_armv6(
|
||||
const unsigned short *src_ptr, unsigned char *dst_ptr, int dst_pitch,
|
||||
unsigned int height, unsigned int width, const short *vp8_filter);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
@ -8,18 +8,16 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
#if HAVE_MEDIA
|
||||
extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
|
||||
|
||||
void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)
|
||||
{
|
||||
short *DQ = d->dqcoeff;
|
||||
short *Q = d->qcoeff;
|
||||
void vp8_dequantize_b_v6(BLOCKD *d, short *DQC) {
|
||||
short *DQ = d->dqcoeff;
|
||||
short *Q = d->qcoeff;
|
||||
|
||||
vp8_dequantize_b_loop_v6(Q, DQC, DQ);
|
||||
vp8_dequantize_b_loop_v6(Q, DQC, DQ);
|
||||
}
|
||||
#endif
|
||||
|
@ -8,214 +8,169 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include <math.h>
|
||||
#include "vp8/common/filter.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
extern void vp8_filter_block2d_first_pass_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block2d_first_pass_armv6(
|
||||
unsigned char *src_ptr, short *output_ptr, unsigned int src_pixels_per_line,
|
||||
unsigned int output_width, unsigned int output_height,
|
||||
const short *vp8_filter);
|
||||
|
||||
// 8x8
|
||||
extern void vp8_filter_block2d_first_pass_8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block2d_first_pass_8x8_armv6(
|
||||
unsigned char *src_ptr, short *output_ptr, unsigned int src_pixels_per_line,
|
||||
unsigned int output_width, unsigned int output_height,
|
||||
const short *vp8_filter);
|
||||
|
||||
// 16x16
|
||||
extern void vp8_filter_block2d_first_pass_16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block2d_first_pass_16x16_armv6(
|
||||
unsigned char *src_ptr, short *output_ptr, unsigned int src_pixels_per_line,
|
||||
unsigned int output_width, unsigned int output_height,
|
||||
const short *vp8_filter);
|
||||
|
||||
extern void vp8_filter_block2d_second_pass_armv6
|
||||
(
|
||||
short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int cnt,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block2d_second_pass_armv6(short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int cnt,
|
||||
const short *vp8_filter);
|
||||
|
||||
extern void vp8_filter4_block2d_second_pass_armv6
|
||||
(
|
||||
short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int cnt,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter4_block2d_second_pass_armv6(short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int cnt,
|
||||
const short *vp8_filter);
|
||||
|
||||
extern void vp8_filter_block2d_first_pass_only_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int cnt,
|
||||
unsigned int output_pitch,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block2d_first_pass_only_armv6(
|
||||
unsigned char *src_ptr, unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line, unsigned int cnt,
|
||||
unsigned int output_pitch, const short *vp8_filter);
|
||||
|
||||
|
||||
extern void vp8_filter_block2d_second_pass_only_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int cnt,
|
||||
unsigned int output_pitch,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block2d_second_pass_only_armv6(
|
||||
unsigned char *src_ptr, unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line, unsigned int cnt,
|
||||
unsigned int output_pitch, const short *vp8_filter);
|
||||
|
||||
#if HAVE_MEDIA
|
||||
void vp8_sixtap_predict4x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short, FData[12*4]); /* Temp data buffer used in filtering */
|
||||
void vp8_sixtap_predict4x4_armv6(unsigned char *src_ptr,
|
||||
int src_pixels_per_line, int xoffset,
|
||||
int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short,
|
||||
FData[12 * 4]); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
/* Vfilter is null. First pass only */
|
||||
if (xoffset && !yoffset) {
|
||||
/*vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2,
|
||||
src_pixels_per_line, 4, 4, HFilter );
|
||||
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4,
|
||||
VFilter );*/
|
||||
|
||||
/* Vfilter is null. First pass only */
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
/*vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
|
||||
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );*/
|
||||
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter);
|
||||
vp8_filter_block2d_first_pass_only_armv6(
|
||||
src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset) {
|
||||
vp8_filter_block2d_second_pass_only_armv6(
|
||||
src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter);
|
||||
} else {
|
||||
/* Vfilter is a 4 tap filter */
|
||||
if (yoffset & 0x1) {
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line,
|
||||
FData + 1, src_pixels_per_line, 4, 7,
|
||||
HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4,
|
||||
VFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Vfilter is a 4 tap filter */
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 4, 7, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
|
||||
}
|
||||
/* Vfilter is 6 tap filter */
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 4, 9, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
|
||||
}
|
||||
/* Vfilter is 6 tap filter */
|
||||
else {
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line),
|
||||
FData, src_pixels_per_line, 4, 9,
|
||||
HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4,
|
||||
VFilter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short, FData[16*8]); /* Temp data buffer used in filtering */
|
||||
void vp8_sixtap_predict8x8_armv6(unsigned char *src_ptr,
|
||||
int src_pixels_per_line, int xoffset,
|
||||
int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short,
|
||||
FData[16 * 8]); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
|
||||
}
|
||||
if (xoffset && !yoffset) {
|
||||
vp8_filter_block2d_first_pass_only_armv6(
|
||||
src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset) {
|
||||
vp8_filter_block2d_second_pass_only_armv6(
|
||||
src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter);
|
||||
} else {
|
||||
if (yoffset & 0x1) {
|
||||
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - src_pixels_per_line,
|
||||
FData + 1, src_pixels_per_line, 8,
|
||||
11, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8,
|
||||
VFilter);
|
||||
} else {
|
||||
vp8_filter_block2d_first_pass_8x8_armv6(
|
||||
src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8,
|
||||
13, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8,
|
||||
VFilter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict16x16_armv6(unsigned char *src_ptr,
|
||||
int src_pixels_per_line, int xoffset,
|
||||
int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short,
|
||||
FData[24 * 16]); /* Temp data buffer used in filtering */
|
||||
|
||||
void vp8_sixtap_predict16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short, FData[24*16]); /* Temp data buffer used in filtering */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter);
|
||||
if (xoffset && !yoffset) {
|
||||
vp8_filter_block2d_first_pass_only_armv6(
|
||||
src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset) {
|
||||
vp8_filter_block2d_second_pass_only_armv6(
|
||||
src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter);
|
||||
} else {
|
||||
if (yoffset & 0x1) {
|
||||
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - src_pixels_per_line,
|
||||
FData + 1, src_pixels_per_line,
|
||||
16, 19, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16,
|
||||
VFilter);
|
||||
} else {
|
||||
vp8_filter_block2d_first_pass_16x16_armv6(
|
||||
src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16,
|
||||
21, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16,
|
||||
VFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -8,15 +8,14 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
#define prototype_loopfilter(sym) \
|
||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
||||
const unsigned char *limit, const unsigned char *thresh, int count)
|
||||
#define prototype_loopfilter(sym) \
|
||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit, \
|
||||
const unsigned char *limit, const unsigned char *thresh, int count)
|
||||
|
||||
#if HAVE_MEDIA
|
||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
|
||||
@ -27,10 +26,11 @@ extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
|
||||
|
||||
#if HAVE_NEON
|
||||
typedef void loopfilter_y_neon(unsigned char *src, int pitch,
|
||||
unsigned char blimit, unsigned char limit, unsigned char thresh);
|
||||
unsigned char blimit, unsigned char limit,
|
||||
unsigned char thresh);
|
||||
typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
|
||||
unsigned char blimit, unsigned char limit, unsigned char thresh,
|
||||
unsigned char *v);
|
||||
unsigned char blimit, unsigned char limit,
|
||||
unsigned char thresh, unsigned char *v);
|
||||
|
||||
extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
|
||||
@ -46,136 +46,163 @@ extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
|
||||
#if HAVE_MEDIA
|
||||
/* ARMV6/MEDIA loopfilter functions*/
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride,
|
||||
int uv_stride, loop_filter_info *lfi) {
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim,
|
||||
lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim,
|
||||
lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride,
|
||||
int uv_stride, loop_filter_info *lfi) {
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim,
|
||||
lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim,
|
||||
lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride,
|
||||
lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride,
|
||||
lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride,
|
||||
lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride,
|
||||
lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride,
|
||||
lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride,
|
||||
blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride,
|
||||
blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride,
|
||||
blimit);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim,
|
||||
lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim,
|
||||
lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
/* NEON loopfilter functions */
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim,
|
||||
hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim,
|
||||
hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim,
|
||||
hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim,
|
||||
lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim,
|
||||
lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim,
|
||||
lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride);
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride,
|
||||
blim, lim, hev_thr,
|
||||
v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim,
|
||||
hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4);
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim,
|
||||
hev_thr, v_ptr + 4);
|
||||
}
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -10,50 +10,41 @@
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void vp8_copy_mem8x4_neon(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride) {
|
||||
uint8x8_t vtmp;
|
||||
int r;
|
||||
void vp8_copy_mem8x4_neon(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride) {
|
||||
uint8x8_t vtmp;
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 4; r++) {
|
||||
vtmp = vld1_u8(src);
|
||||
vst1_u8(dst, vtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
for (r = 0; r < 4; r++) {
|
||||
vtmp = vld1_u8(src);
|
||||
vst1_u8(dst, vtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x8_neon(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride) {
|
||||
uint8x8_t vtmp;
|
||||
int r;
|
||||
void vp8_copy_mem8x8_neon(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride) {
|
||||
uint8x8_t vtmp;
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 8; r++) {
|
||||
vtmp = vld1_u8(src);
|
||||
vst1_u8(dst, vtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
for (r = 0; r < 8; r++) {
|
||||
vtmp = vld1_u8(src);
|
||||
vst1_u8(dst, vtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_copy_mem16x16_neon(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride) {
|
||||
int r;
|
||||
uint8x16_t qtmp;
|
||||
void vp8_copy_mem16x16_neon(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride) {
|
||||
int r;
|
||||
uint8x16_t qtmp;
|
||||
|
||||
for (r = 0; r < 16; r++) {
|
||||
qtmp = vld1q_u8(src);
|
||||
vst1q_u8(dst, qtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
for (r = 0; r < 16; r++) {
|
||||
qtmp = vld1q_u8(src);
|
||||
vst1q_u8(dst, qtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
@ -10,33 +10,30 @@
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void vp8_dc_only_idct_add_neon(
|
||||
int16_t input_dc,
|
||||
unsigned char *pred_ptr,
|
||||
int pred_stride,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_stride) {
|
||||
int i;
|
||||
uint16_t a1 = ((input_dc + 4) >> 3);
|
||||
uint32x2_t d2u32 = vdup_n_u32(0);
|
||||
uint8x8_t d2u8;
|
||||
uint16x8_t q1u16;
|
||||
uint16x8_t qAdd;
|
||||
void vp8_dc_only_idct_add_neon(int16_t input_dc, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride) {
|
||||
int i;
|
||||
uint16_t a1 = ((input_dc + 4) >> 3);
|
||||
uint32x2_t d2u32 = vdup_n_u32(0);
|
||||
uint8x8_t d2u8;
|
||||
uint16x8_t q1u16;
|
||||
uint16x8_t qAdd;
|
||||
|
||||
qAdd = vdupq_n_u16(a1);
|
||||
qAdd = vdupq_n_u16(a1);
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0);
|
||||
pred_ptr += pred_stride;
|
||||
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1);
|
||||
pred_ptr += pred_stride;
|
||||
for (i = 0; i < 2; i++) {
|
||||
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0);
|
||||
pred_ptr += pred_stride;
|
||||
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1);
|
||||
pred_ptr += pred_stride;
|
||||
|
||||
q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32));
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32));
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
|
||||
dst_ptr += dst_stride;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
|
||||
dst_ptr += dst_stride;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
||||
|
@ -11,132 +11,129 @@
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const int16_t cospi8sqrt2minus1 = 20091;
|
||||
static const int16_t sinpi8sqrt2 = 35468;
|
||||
static const int16_t sinpi8sqrt2 = 35468;
|
||||
|
||||
void vp8_dequant_idct_add_neon(
|
||||
int16_t *input,
|
||||
int16_t *dq,
|
||||
unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0;
|
||||
int32x2_t d14, d15;
|
||||
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
|
||||
int16x8_t q1, q2, q3, q4, q5, q6;
|
||||
int16x8_t qEmpty = vdupq_n_s16(0);
|
||||
int32x2x2_t d2tmp0, d2tmp1;
|
||||
int16x4x2_t d2tmp2, d2tmp3;
|
||||
void vp8_dequant_idct_add_neon(int16_t *input, int16_t *dq, unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0;
|
||||
int32x2_t d14, d15;
|
||||
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
|
||||
int16x8_t q1, q2, q3, q4, q5, q6;
|
||||
int16x8_t qEmpty = vdupq_n_s16(0);
|
||||
int32x2x2_t d2tmp0, d2tmp1;
|
||||
int16x4x2_t d2tmp2, d2tmp3;
|
||||
|
||||
d14 = d15 = vdup_n_s32(0);
|
||||
d14 = d15 = vdup_n_s32(0);
|
||||
|
||||
// load input
|
||||
q3 = vld1q_s16(input);
|
||||
vst1q_s16(input, qEmpty);
|
||||
input += 8;
|
||||
q4 = vld1q_s16(input);
|
||||
vst1q_s16(input, qEmpty);
|
||||
// load input
|
||||
q3 = vld1q_s16(input);
|
||||
vst1q_s16(input, qEmpty);
|
||||
input += 8;
|
||||
q4 = vld1q_s16(input);
|
||||
vst1q_s16(input, qEmpty);
|
||||
|
||||
// load dq
|
||||
q5 = vld1q_s16(dq);
|
||||
dq += 8;
|
||||
q6 = vld1q_s16(dq);
|
||||
// load dq
|
||||
q5 = vld1q_s16(dq);
|
||||
dq += 8;
|
||||
q6 = vld1q_s16(dq);
|
||||
|
||||
// load src from dst
|
||||
dst0 = dst;
|
||||
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0);
|
||||
dst0 += stride;
|
||||
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1);
|
||||
dst0 += stride;
|
||||
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0);
|
||||
dst0 += stride;
|
||||
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1);
|
||||
// load src from dst
|
||||
dst0 = dst;
|
||||
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0);
|
||||
dst0 += stride;
|
||||
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1);
|
||||
dst0 += stride;
|
||||
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0);
|
||||
dst0 += stride;
|
||||
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1);
|
||||
|
||||
q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3),
|
||||
vreinterpretq_u16_s16(q5)));
|
||||
q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4),
|
||||
vreinterpretq_u16_s16(q6)));
|
||||
q1 = vreinterpretq_s16_u16(
|
||||
vmulq_u16(vreinterpretq_u16_s16(q3), vreinterpretq_u16_s16(q5)));
|
||||
q2 = vreinterpretq_s16_u16(
|
||||
vmulq_u16(vreinterpretq_u16_s16(q4), vreinterpretq_u16_s16(q6)));
|
||||
|
||||
d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2));
|
||||
d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2));
|
||||
d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2));
|
||||
d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2));
|
||||
|
||||
q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2));
|
||||
q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2));
|
||||
|
||||
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
|
||||
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
|
||||
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
|
||||
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
|
||||
|
||||
q3 = vshrq_n_s16(q3, 1);
|
||||
q4 = vshrq_n_s16(q4, 1);
|
||||
q3 = vshrq_n_s16(q3, 1);
|
||||
q4 = vshrq_n_s16(q4, 1);
|
||||
|
||||
q3 = vqaddq_s16(q3, q2);
|
||||
q4 = vqaddq_s16(q4, q2);
|
||||
q3 = vqaddq_s16(q3, q2);
|
||||
q4 = vqaddq_s16(q4, q2);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
|
||||
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
|
||||
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
|
||||
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[0]));
|
||||
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[1]));
|
||||
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[0]));
|
||||
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[1]));
|
||||
|
||||
// loop 2
|
||||
q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]);
|
||||
// loop 2
|
||||
q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]);
|
||||
|
||||
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
|
||||
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
|
||||
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
|
||||
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
|
||||
|
||||
d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]);
|
||||
d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]);
|
||||
d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]);
|
||||
d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]);
|
||||
|
||||
q3 = vshrq_n_s16(q3, 1);
|
||||
q4 = vshrq_n_s16(q4, 1);
|
||||
q3 = vshrq_n_s16(q3, 1);
|
||||
q4 = vshrq_n_s16(q4, 1);
|
||||
|
||||
q3 = vqaddq_s16(q3, q2);
|
||||
q4 = vqaddq_s16(q4, q2);
|
||||
q3 = vqaddq_s16(q3, q2);
|
||||
q4 = vqaddq_s16(q4, q2);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
|
||||
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
|
||||
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
|
||||
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
d2 = vrshr_n_s16(d2, 3);
|
||||
d3 = vrshr_n_s16(d3, 3);
|
||||
d4 = vrshr_n_s16(d4, 3);
|
||||
d5 = vrshr_n_s16(d5, 3);
|
||||
d2 = vrshr_n_s16(d2, 3);
|
||||
d3 = vrshr_n_s16(d3, 3);
|
||||
d4 = vrshr_n_s16(d4, 3);
|
||||
d5 = vrshr_n_s16(d5, 3);
|
||||
|
||||
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[0]));
|
||||
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[1]));
|
||||
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[0]));
|
||||
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[1]));
|
||||
|
||||
q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]);
|
||||
q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]);
|
||||
q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]);
|
||||
q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]);
|
||||
|
||||
q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1),
|
||||
vreinterpret_u8_s32(d14)));
|
||||
q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2),
|
||||
vreinterpret_u8_s32(d15)));
|
||||
q1 = vreinterpretq_s16_u16(
|
||||
vaddw_u8(vreinterpretq_u16_s16(q1), vreinterpret_u8_s32(d14)));
|
||||
q2 = vreinterpretq_s16_u16(
|
||||
vaddw_u8(vreinterpretq_u16_s16(q2), vreinterpret_u8_s32(d15)));
|
||||
|
||||
d14 = vreinterpret_s32_u8(vqmovun_s16(q1));
|
||||
d15 = vreinterpret_s32_u8(vqmovun_s16(q2));
|
||||
d14 = vreinterpret_s32_u8(vqmovun_s16(q1));
|
||||
d15 = vreinterpret_s32_u8(vqmovun_s16(q2));
|
||||
|
||||
dst0 = dst;
|
||||
vst1_lane_s32((int32_t *)dst0, d14, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d14, 1);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d15, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d15, 1);
|
||||
return;
|
||||
dst0 = dst;
|
||||
vst1_lane_s32((int32_t *)dst0, d14, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d14, 1);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d15, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d15, 1);
|
||||
return;
|
||||
}
|
||||
|
@ -13,13 +13,13 @@
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) {
|
||||
int16x8x2_t qQ, qDQC, qDQ;
|
||||
int16x8x2_t qQ, qDQC, qDQ;
|
||||
|
||||
qQ = vld2q_s16(d->qcoeff);
|
||||
qDQC = vld2q_s16(DQC);
|
||||
qQ = vld2q_s16(d->qcoeff);
|
||||
qDQC = vld2q_s16(DQC);
|
||||
|
||||
qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
|
||||
qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
|
||||
qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
|
||||
qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
|
||||
|
||||
vst2q_s16(d->dqcoeff, qDQ);
|
||||
vst2q_s16(d->dqcoeff, qDQ);
|
||||
}
|
||||
|
@ -14,83 +14,71 @@
|
||||
/* place these declarations here because we don't want to maintain them
|
||||
* outside of this scope
|
||||
*/
|
||||
void idct_dequant_full_2x_neon(short *q, short *dq,
|
||||
unsigned char *dst, int stride);
|
||||
void idct_dequant_0_2x_neon(short *q, short dq,
|
||||
unsigned char *dst, int stride);
|
||||
void idct_dequant_full_2x_neon(short *q, short *dq, unsigned char *dst,
|
||||
int stride);
|
||||
void idct_dequant_0_2x_neon(short *q, short dq, unsigned char *dst, int stride);
|
||||
|
||||
void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, unsigned char *dst,
|
||||
int stride, char *eobs) {
|
||||
int i;
|
||||
|
||||
void vp8_dequant_idct_add_y_block_neon(short *q, short *dq,
|
||||
unsigned char *dst,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (((short *)(eobs))[0])
|
||||
{
|
||||
if (((short *)eobs)[0] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dst, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dst, stride);
|
||||
}
|
||||
|
||||
if (((short *)(eobs))[1])
|
||||
{
|
||||
if (((short *)eobs)[1] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q+32, dq, dst+8, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q+32, dq[0], dst+8, stride);
|
||||
}
|
||||
q += 64;
|
||||
dst += 4*stride;
|
||||
eobs += 4;
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (((short *)(eobs))[0]) {
|
||||
if (((short *)eobs)[0] & 0xfefe)
|
||||
idct_dequant_full_2x_neon(q, dq, dst, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon(q, dq[0], dst, stride);
|
||||
}
|
||||
|
||||
if (((short *)(eobs))[1]) {
|
||||
if (((short *)eobs)[1] & 0xfefe)
|
||||
idct_dequant_full_2x_neon(q + 32, dq, dst + 8, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon(q + 32, dq[0], dst + 8, stride);
|
||||
}
|
||||
q += 64;
|
||||
dst += 4 * stride;
|
||||
eobs += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq,
|
||||
unsigned char *dstu,
|
||||
unsigned char *dstv,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
if (((short *)(eobs))[0])
|
||||
{
|
||||
if (((short *)eobs)[0] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstu, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
|
||||
}
|
||||
unsigned char *dstv, int stride,
|
||||
char *eobs) {
|
||||
if (((short *)(eobs))[0]) {
|
||||
if (((short *)eobs)[0] & 0xfefe)
|
||||
idct_dequant_full_2x_neon(q, dq, dstu, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon(q, dq[0], dstu, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += 4*stride;
|
||||
q += 32;
|
||||
dstu += 4 * stride;
|
||||
|
||||
if (((short *)(eobs))[1])
|
||||
{
|
||||
if (((short *)eobs)[1] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstu, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
|
||||
}
|
||||
if (((short *)(eobs))[1]) {
|
||||
if (((short *)eobs)[1] & 0xfefe)
|
||||
idct_dequant_full_2x_neon(q, dq, dstu, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon(q, dq[0], dstu, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
q += 32;
|
||||
|
||||
if (((short *)(eobs))[2])
|
||||
{
|
||||
if (((short *)eobs)[2] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstv, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
|
||||
}
|
||||
if (((short *)(eobs))[2]) {
|
||||
if (((short *)eobs)[2] & 0xfefe)
|
||||
idct_dequant_full_2x_neon(q, dq, dstv, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon(q, dq[0], dstv, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += 4*stride;
|
||||
q += 32;
|
||||
dstv += 4 * stride;
|
||||
|
||||
if (((short *)(eobs))[3])
|
||||
{
|
||||
if (((short *)eobs)[3] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstv, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
|
||||
}
|
||||
if (((short *)(eobs))[3]) {
|
||||
if (((short *)eobs)[3] & 0xfefe)
|
||||
idct_dequant_full_2x_neon(q, dq, dstv, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon(q, dq[0], dstv, stride);
|
||||
}
|
||||
}
|
||||
|
@ -10,54 +10,50 @@
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void idct_dequant_0_2x_neon(
|
||||
int16_t *q,
|
||||
int16_t dq,
|
||||
unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0;
|
||||
int i, a0, a1;
|
||||
int16x8x2_t q2Add;
|
||||
int32x2_t d2s32 = vdup_n_s32(0),
|
||||
d4s32 = vdup_n_s32(0);
|
||||
uint8x8_t d2u8, d4u8;
|
||||
uint16x8_t q1u16, q2u16;
|
||||
void idct_dequant_0_2x_neon(int16_t *q, int16_t dq, unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0;
|
||||
int i, a0, a1;
|
||||
int16x8x2_t q2Add;
|
||||
int32x2_t d2s32 = vdup_n_s32(0), d4s32 = vdup_n_s32(0);
|
||||
uint8x8_t d2u8, d4u8;
|
||||
uint16x8_t q1u16, q2u16;
|
||||
|
||||
a0 = ((q[0] * dq) + 4) >> 3;
|
||||
a1 = ((q[16] * dq) + 4) >> 3;
|
||||
q[0] = q[16] = 0;
|
||||
q2Add.val[0] = vdupq_n_s16((int16_t)a0);
|
||||
q2Add.val[1] = vdupq_n_s16((int16_t)a1);
|
||||
a0 = ((q[0] * dq) + 4) >> 3;
|
||||
a1 = ((q[16] * dq) + 4) >> 3;
|
||||
q[0] = q[16] = 0;
|
||||
q2Add.val[0] = vdupq_n_s16((int16_t)a0);
|
||||
q2Add.val[1] = vdupq_n_s16((int16_t)a1);
|
||||
|
||||
for (i = 0; i < 2; i++, dst += 4) {
|
||||
dst0 = dst;
|
||||
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0);
|
||||
dst0 += stride;
|
||||
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1);
|
||||
dst0 += stride;
|
||||
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0);
|
||||
dst0 += stride;
|
||||
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1);
|
||||
for (i = 0; i < 2; i++, dst += 4) {
|
||||
dst0 = dst;
|
||||
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0);
|
||||
dst0 += stride;
|
||||
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1);
|
||||
dst0 += stride;
|
||||
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0);
|
||||
dst0 += stride;
|
||||
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1);
|
||||
|
||||
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
|
||||
vreinterpret_u8_s32(d2s32));
|
||||
q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
|
||||
vreinterpret_u8_s32(d4s32));
|
||||
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
|
||||
vreinterpret_u8_s32(d2s32));
|
||||
q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
|
||||
vreinterpret_u8_s32(d4s32));
|
||||
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16));
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16));
|
||||
|
||||
d2s32 = vreinterpret_s32_u8(d2u8);
|
||||
d4s32 = vreinterpret_s32_u8(d4u8);
|
||||
d2s32 = vreinterpret_s32_u8(d2u8);
|
||||
d4s32 = vreinterpret_s32_u8(d4u8);
|
||||
|
||||
dst0 = dst;
|
||||
vst1_lane_s32((int32_t *)dst0, d2s32, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d2s32, 1);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d4s32, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d4s32, 1);
|
||||
}
|
||||
return;
|
||||
dst0 = dst;
|
||||
vst1_lane_s32((int32_t *)dst0, d2s32, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d2s32, 1);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d4s32, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d4s32, 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -11,175 +11,172 @@
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const int16_t cospi8sqrt2minus1 = 20091;
|
||||
static const int16_t sinpi8sqrt2 = 17734;
|
||||
static const int16_t sinpi8sqrt2 = 17734;
|
||||
// because the lowest bit in 0x8a8c is 0, we can pre-shift this
|
||||
|
||||
void idct_dequant_full_2x_neon(
|
||||
int16_t *q,
|
||||
int16_t *dq,
|
||||
unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0, *dst1;
|
||||
int32x2_t d28, d29, d30, d31;
|
||||
int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11;
|
||||
int16x8_t qEmpty = vdupq_n_s16(0);
|
||||
int32x4x2_t q2tmp0, q2tmp1;
|
||||
int16x8x2_t q2tmp2, q2tmp3;
|
||||
int16x4_t dLow0, dLow1, dHigh0, dHigh1;
|
||||
void idct_dequant_full_2x_neon(int16_t *q, int16_t *dq, unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0, *dst1;
|
||||
int32x2_t d28, d29, d30, d31;
|
||||
int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11;
|
||||
int16x8_t qEmpty = vdupq_n_s16(0);
|
||||
int32x4x2_t q2tmp0, q2tmp1;
|
||||
int16x8x2_t q2tmp2, q2tmp3;
|
||||
int16x4_t dLow0, dLow1, dHigh0, dHigh1;
|
||||
|
||||
d28 = d29 = d30 = d31 = vdup_n_s32(0);
|
||||
d28 = d29 = d30 = d31 = vdup_n_s32(0);
|
||||
|
||||
// load dq
|
||||
q0 = vld1q_s16(dq);
|
||||
dq += 8;
|
||||
q1 = vld1q_s16(dq);
|
||||
// load dq
|
||||
q0 = vld1q_s16(dq);
|
||||
dq += 8;
|
||||
q1 = vld1q_s16(dq);
|
||||
|
||||
// load q
|
||||
q2 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q3 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q4 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q5 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
// load q
|
||||
q2 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q3 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q4 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q5 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
|
||||
// load src from dst
|
||||
dst0 = dst;
|
||||
dst1 = dst + 4;
|
||||
d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0);
|
||||
dst0 += stride;
|
||||
d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1);
|
||||
dst1 += stride;
|
||||
d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0);
|
||||
dst0 += stride;
|
||||
d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1);
|
||||
dst1 += stride;
|
||||
// load src from dst
|
||||
dst0 = dst;
|
||||
dst1 = dst + 4;
|
||||
d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0);
|
||||
dst0 += stride;
|
||||
d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1);
|
||||
dst1 += stride;
|
||||
d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0);
|
||||
dst0 += stride;
|
||||
d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1);
|
||||
dst1 += stride;
|
||||
|
||||
d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0);
|
||||
dst0 += stride;
|
||||
d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1);
|
||||
dst1 += stride;
|
||||
d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0);
|
||||
d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1);
|
||||
d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0);
|
||||
dst0 += stride;
|
||||
d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1);
|
||||
dst1 += stride;
|
||||
d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0);
|
||||
d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1);
|
||||
|
||||
q2 = vmulq_s16(q2, q0);
|
||||
q3 = vmulq_s16(q3, q1);
|
||||
q4 = vmulq_s16(q4, q0);
|
||||
q5 = vmulq_s16(q5, q1);
|
||||
q2 = vmulq_s16(q2, q0);
|
||||
q3 = vmulq_s16(q3, q1);
|
||||
q4 = vmulq_s16(q4, q0);
|
||||
q5 = vmulq_s16(q5, q1);
|
||||
|
||||
// vswp
|
||||
dLow0 = vget_low_s16(q2);
|
||||
dHigh0 = vget_high_s16(q2);
|
||||
dLow1 = vget_low_s16(q4);
|
||||
dHigh1 = vget_high_s16(q4);
|
||||
q2 = vcombine_s16(dLow0, dLow1);
|
||||
q4 = vcombine_s16(dHigh0, dHigh1);
|
||||
// vswp
|
||||
dLow0 = vget_low_s16(q2);
|
||||
dHigh0 = vget_high_s16(q2);
|
||||
dLow1 = vget_low_s16(q4);
|
||||
dHigh1 = vget_high_s16(q4);
|
||||
q2 = vcombine_s16(dLow0, dLow1);
|
||||
q4 = vcombine_s16(dHigh0, dHigh1);
|
||||
|
||||
dLow0 = vget_low_s16(q3);
|
||||
dHigh0 = vget_high_s16(q3);
|
||||
dLow1 = vget_low_s16(q5);
|
||||
dHigh1 = vget_high_s16(q5);
|
||||
q3 = vcombine_s16(dLow0, dLow1);
|
||||
q5 = vcombine_s16(dHigh0, dHigh1);
|
||||
dLow0 = vget_low_s16(q3);
|
||||
dHigh0 = vget_high_s16(q3);
|
||||
dLow1 = vget_low_s16(q5);
|
||||
dHigh1 = vget_high_s16(q5);
|
||||
q3 = vcombine_s16(dLow0, dLow1);
|
||||
q5 = vcombine_s16(dHigh0, dHigh1);
|
||||
|
||||
q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2);
|
||||
q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2);
|
||||
q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1);
|
||||
q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1);
|
||||
q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2);
|
||||
q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2);
|
||||
q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1);
|
||||
q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1);
|
||||
|
||||
q10 = vqaddq_s16(q2, q3);
|
||||
q11 = vqsubq_s16(q2, q3);
|
||||
q10 = vqaddq_s16(q2, q3);
|
||||
q11 = vqsubq_s16(q2, q3);
|
||||
|
||||
q8 = vshrq_n_s16(q8, 1);
|
||||
q9 = vshrq_n_s16(q9, 1);
|
||||
q8 = vshrq_n_s16(q8, 1);
|
||||
q9 = vshrq_n_s16(q9, 1);
|
||||
|
||||
q4 = vqaddq_s16(q4, q8);
|
||||
q5 = vqaddq_s16(q5, q9);
|
||||
q4 = vqaddq_s16(q4, q8);
|
||||
q5 = vqaddq_s16(q5, q9);
|
||||
|
||||
q2 = vqsubq_s16(q6, q5);
|
||||
q3 = vqaddq_s16(q7, q4);
|
||||
q2 = vqsubq_s16(q6, q5);
|
||||
q3 = vqaddq_s16(q7, q4);
|
||||
|
||||
q4 = vqaddq_s16(q10, q3);
|
||||
q5 = vqaddq_s16(q11, q2);
|
||||
q6 = vqsubq_s16(q11, q2);
|
||||
q7 = vqsubq_s16(q10, q3);
|
||||
q4 = vqaddq_s16(q10, q3);
|
||||
q5 = vqaddq_s16(q11, q2);
|
||||
q6 = vqsubq_s16(q11, q2);
|
||||
q7 = vqsubq_s16(q10, q3);
|
||||
|
||||
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
|
||||
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
|
||||
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[0]));
|
||||
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[1]));
|
||||
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
|
||||
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
|
||||
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[0]));
|
||||
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[1]));
|
||||
|
||||
// loop 2
|
||||
q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2);
|
||||
q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2);
|
||||
q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1);
|
||||
q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1);
|
||||
// loop 2
|
||||
q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2);
|
||||
q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2);
|
||||
q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1);
|
||||
q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1);
|
||||
|
||||
q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]);
|
||||
q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]);
|
||||
q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]);
|
||||
q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]);
|
||||
|
||||
q10 = vshrq_n_s16(q10, 1);
|
||||
q11 = vshrq_n_s16(q11, 1);
|
||||
q10 = vshrq_n_s16(q10, 1);
|
||||
q11 = vshrq_n_s16(q11, 1);
|
||||
|
||||
q10 = vqaddq_s16(q2tmp2.val[1], q10);
|
||||
q11 = vqaddq_s16(q2tmp3.val[1], q11);
|
||||
q10 = vqaddq_s16(q2tmp2.val[1], q10);
|
||||
q11 = vqaddq_s16(q2tmp3.val[1], q11);
|
||||
|
||||
q8 = vqsubq_s16(q8, q11);
|
||||
q9 = vqaddq_s16(q9, q10);
|
||||
q8 = vqsubq_s16(q8, q11);
|
||||
q9 = vqaddq_s16(q9, q10);
|
||||
|
||||
q4 = vqaddq_s16(q2, q9);
|
||||
q5 = vqaddq_s16(q3, q8);
|
||||
q6 = vqsubq_s16(q3, q8);
|
||||
q7 = vqsubq_s16(q2, q9);
|
||||
q4 = vqaddq_s16(q2, q9);
|
||||
q5 = vqaddq_s16(q3, q8);
|
||||
q6 = vqsubq_s16(q3, q8);
|
||||
q7 = vqsubq_s16(q2, q9);
|
||||
|
||||
q4 = vrshrq_n_s16(q4, 3);
|
||||
q5 = vrshrq_n_s16(q5, 3);
|
||||
q6 = vrshrq_n_s16(q6, 3);
|
||||
q7 = vrshrq_n_s16(q7, 3);
|
||||
q4 = vrshrq_n_s16(q4, 3);
|
||||
q5 = vrshrq_n_s16(q5, 3);
|
||||
q6 = vrshrq_n_s16(q6, 3);
|
||||
q7 = vrshrq_n_s16(q7, 3);
|
||||
|
||||
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
|
||||
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
|
||||
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[0]));
|
||||
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[1]));
|
||||
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
|
||||
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
|
||||
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[0]));
|
||||
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[1]));
|
||||
|
||||
q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]),
|
||||
vreinterpret_u8_s32(d28)));
|
||||
q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]),
|
||||
vreinterpret_u8_s32(d29)));
|
||||
q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]),
|
||||
vreinterpret_u8_s32(d30)));
|
||||
q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]),
|
||||
vreinterpret_u8_s32(d31)));
|
||||
q4 = vreinterpretq_s16_u16(
|
||||
vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]), vreinterpret_u8_s32(d28)));
|
||||
q5 = vreinterpretq_s16_u16(
|
||||
vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]), vreinterpret_u8_s32(d29)));
|
||||
q6 = vreinterpretq_s16_u16(
|
||||
vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]), vreinterpret_u8_s32(d30)));
|
||||
q7 = vreinterpretq_s16_u16(
|
||||
vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]), vreinterpret_u8_s32(d31)));
|
||||
|
||||
d28 = vreinterpret_s32_u8(vqmovun_s16(q4));
|
||||
d29 = vreinterpret_s32_u8(vqmovun_s16(q5));
|
||||
d30 = vreinterpret_s32_u8(vqmovun_s16(q6));
|
||||
d31 = vreinterpret_s32_u8(vqmovun_s16(q7));
|
||||
d28 = vreinterpret_s32_u8(vqmovun_s16(q4));
|
||||
d29 = vreinterpret_s32_u8(vqmovun_s16(q5));
|
||||
d30 = vreinterpret_s32_u8(vqmovun_s16(q6));
|
||||
d31 = vreinterpret_s32_u8(vqmovun_s16(q7));
|
||||
|
||||
dst0 = dst;
|
||||
dst1 = dst + 4;
|
||||
vst1_lane_s32((int32_t *)dst0, d28, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d28, 1);
|
||||
dst1 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d29, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d29, 1);
|
||||
dst1 += stride;
|
||||
dst0 = dst;
|
||||
dst1 = dst + 4;
|
||||
vst1_lane_s32((int32_t *)dst0, d28, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d28, 1);
|
||||
dst1 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d29, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d29, 1);
|
||||
dst1 += stride;
|
||||
|
||||
vst1_lane_s32((int32_t *)dst0, d30, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d30, 1);
|
||||
dst1 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d31, 0);
|
||||
vst1_lane_s32((int32_t *)dst1, d31, 1);
|
||||
return;
|
||||
vst1_lane_s32((int32_t *)dst0, d30, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d30, 1);
|
||||
dst1 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d31, 0);
|
||||
vst1_lane_s32((int32_t *)dst1, d31, 1);
|
||||
return;
|
||||
}
|
||||
|
@ -10,93 +10,91 @@
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void vp8_short_inv_walsh4x4_neon(
|
||||
int16_t *input,
|
||||
int16_t *mb_dqcoeff) {
|
||||
int16x8_t q0s16, q1s16, q2s16, q3s16;
|
||||
int16x4_t d4s16, d5s16, d6s16, d7s16;
|
||||
int16x4x2_t v2tmp0, v2tmp1;
|
||||
int32x2x2_t v2tmp2, v2tmp3;
|
||||
int16x8_t qAdd3;
|
||||
void vp8_short_inv_walsh4x4_neon(int16_t *input, int16_t *mb_dqcoeff) {
|
||||
int16x8_t q0s16, q1s16, q2s16, q3s16;
|
||||
int16x4_t d4s16, d5s16, d6s16, d7s16;
|
||||
int16x4x2_t v2tmp0, v2tmp1;
|
||||
int32x2x2_t v2tmp2, v2tmp3;
|
||||
int16x8_t qAdd3;
|
||||
|
||||
q0s16 = vld1q_s16(input);
|
||||
q1s16 = vld1q_s16(input + 8);
|
||||
q0s16 = vld1q_s16(input);
|
||||
q1s16 = vld1q_s16(input + 8);
|
||||
|
||||
// 1st for loop
|
||||
d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
|
||||
d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
|
||||
d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
|
||||
d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
|
||||
// 1st for loop
|
||||
d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
|
||||
d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
|
||||
d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
|
||||
d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
|
||||
|
||||
q2s16 = vcombine_s16(d4s16, d5s16);
|
||||
q3s16 = vcombine_s16(d6s16, d7s16);
|
||||
q2s16 = vcombine_s16(d4s16, d5s16);
|
||||
q3s16 = vcombine_s16(d6s16, d7s16);
|
||||
|
||||
q0s16 = vaddq_s16(q2s16, q3s16);
|
||||
q1s16 = vsubq_s16(q2s16, q3s16);
|
||||
q0s16 = vaddq_s16(q2s16, q3s16);
|
||||
q1s16 = vsubq_s16(q2s16, q3s16);
|
||||
|
||||
v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)),
|
||||
vreinterpret_s32_s16(vget_low_s16(q1s16)));
|
||||
v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)),
|
||||
vreinterpret_s32_s16(vget_high_s16(q1s16)));
|
||||
v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp3.val[0]));
|
||||
v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp3.val[1]));
|
||||
v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)),
|
||||
vreinterpret_s32_s16(vget_low_s16(q1s16)));
|
||||
v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)),
|
||||
vreinterpret_s32_s16(vget_high_s16(q1s16)));
|
||||
v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp3.val[0]));
|
||||
v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp3.val[1]));
|
||||
|
||||
// 2nd for loop
|
||||
d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]);
|
||||
d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]);
|
||||
d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]);
|
||||
d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]);
|
||||
q2s16 = vcombine_s16(d4s16, d5s16);
|
||||
q3s16 = vcombine_s16(d6s16, d7s16);
|
||||
// 2nd for loop
|
||||
d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]);
|
||||
d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]);
|
||||
d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]);
|
||||
d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]);
|
||||
q2s16 = vcombine_s16(d4s16, d5s16);
|
||||
q3s16 = vcombine_s16(d6s16, d7s16);
|
||||
|
||||
qAdd3 = vdupq_n_s16(3);
|
||||
qAdd3 = vdupq_n_s16(3);
|
||||
|
||||
q0s16 = vaddq_s16(q2s16, q3s16);
|
||||
q1s16 = vsubq_s16(q2s16, q3s16);
|
||||
q0s16 = vaddq_s16(q2s16, q3s16);
|
||||
q1s16 = vsubq_s16(q2s16, q3s16);
|
||||
|
||||
q0s16 = vaddq_s16(q0s16, qAdd3);
|
||||
q1s16 = vaddq_s16(q1s16, qAdd3);
|
||||
q0s16 = vaddq_s16(q0s16, qAdd3);
|
||||
q1s16 = vaddq_s16(q1s16, qAdd3);
|
||||
|
||||
q0s16 = vshrq_n_s16(q0s16, 3);
|
||||
q1s16 = vshrq_n_s16(q1s16, 3);
|
||||
q0s16 = vshrq_n_s16(q0s16, 3);
|
||||
q1s16 = vshrq_n_s16(q1s16, 3);
|
||||
|
||||
// store
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
// store
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
return;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
return;
|
||||
}
|
||||
|
@ -12,100 +12,93 @@
|
||||
#include "./vpx_config.h"
|
||||
|
||||
static INLINE void vp8_loop_filter_simple_horizontal_edge_neon(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit) {
|
||||
uint8_t *sp;
|
||||
uint8x16_t qblimit, q0u8;
|
||||
uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q3s16, q13s16;
|
||||
int8x8_t d8s8, d9s8;
|
||||
int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8;
|
||||
unsigned char *s, int p, const unsigned char *blimit) {
|
||||
uint8_t *sp;
|
||||
uint8x16_t qblimit, q0u8;
|
||||
uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q3s16, q13s16;
|
||||
int8x8_t d8s8, d9s8;
|
||||
int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8;
|
||||
|
||||
qblimit = vdupq_n_u8(*blimit);
|
||||
qblimit = vdupq_n_u8(*blimit);
|
||||
|
||||
sp = s - (p << 1);
|
||||
q5u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q6u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q7u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q8u8 = vld1q_u8(sp);
|
||||
sp = s - (p << 1);
|
||||
q5u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q6u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q7u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q8u8 = vld1q_u8(sp);
|
||||
|
||||
q15u8 = vabdq_u8(q6u8, q7u8);
|
||||
q14u8 = vabdq_u8(q5u8, q8u8);
|
||||
q15u8 = vabdq_u8(q6u8, q7u8);
|
||||
q14u8 = vabdq_u8(q5u8, q8u8);
|
||||
|
||||
q15u8 = vqaddq_u8(q15u8, q15u8);
|
||||
q14u8 = vshrq_n_u8(q14u8, 1);
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
q13s16 = vdupq_n_s16(3);
|
||||
q15u8 = vqaddq_u8(q15u8, q14u8);
|
||||
q15u8 = vqaddq_u8(q15u8, q15u8);
|
||||
q14u8 = vshrq_n_u8(q14u8, 1);
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
q13s16 = vdupq_n_s16(3);
|
||||
q15u8 = vqaddq_u8(q15u8, q14u8);
|
||||
|
||||
q5u8 = veorq_u8(q5u8, q0u8);
|
||||
q6u8 = veorq_u8(q6u8, q0u8);
|
||||
q7u8 = veorq_u8(q7u8, q0u8);
|
||||
q8u8 = veorq_u8(q8u8, q0u8);
|
||||
q5u8 = veorq_u8(q5u8, q0u8);
|
||||
q6u8 = veorq_u8(q6u8, q0u8);
|
||||
q7u8 = veorq_u8(q7u8, q0u8);
|
||||
q8u8 = veorq_u8(q8u8, q0u8);
|
||||
|
||||
q15u8 = vcgeq_u8(qblimit, q15u8);
|
||||
q15u8 = vcgeq_u8(qblimit, q15u8);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q6u8)));
|
||||
q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q6u8)));
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q6u8)));
|
||||
q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q6u8)));
|
||||
|
||||
q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8),
|
||||
vreinterpretq_s8_u8(q8u8));
|
||||
q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8), vreinterpretq_s8_u8(q8u8));
|
||||
|
||||
q2s16 = vmulq_s16(q2s16, q13s16);
|
||||
q3s16 = vmulq_s16(q3s16, q13s16);
|
||||
q2s16 = vmulq_s16(q2s16, q13s16);
|
||||
q3s16 = vmulq_s16(q3s16, q13s16);
|
||||
|
||||
q10u8 = vdupq_n_u8(3);
|
||||
q9u8 = vdupq_n_u8(4);
|
||||
q10u8 = vdupq_n_u8(3);
|
||||
q9u8 = vdupq_n_u8(4);
|
||||
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8));
|
||||
q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8));
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8));
|
||||
q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8));
|
||||
|
||||
d8s8 = vqmovn_s16(q2s16);
|
||||
d9s8 = vqmovn_s16(q3s16);
|
||||
q4s8 = vcombine_s8(d8s8, d9s8);
|
||||
d8s8 = vqmovn_s16(q2s16);
|
||||
d9s8 = vqmovn_s16(q3s16);
|
||||
q4s8 = vcombine_s8(d8s8, d9s8);
|
||||
|
||||
q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8));
|
||||
q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8));
|
||||
|
||||
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8));
|
||||
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q3s8 = vshrq_n_s8(q3s8, 3);
|
||||
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8));
|
||||
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q3s8 = vshrq_n_s8(q3s8, 3);
|
||||
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8);
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8);
|
||||
|
||||
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
|
||||
vst1q_u8(s, q7u8);
|
||||
s -= p;
|
||||
vst1q_u8(s, q6u8);
|
||||
return;
|
||||
vst1q_u8(s, q7u8);
|
||||
s -= p;
|
||||
vst1q_u8(s, q6u8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
||||
|
@ -15,34 +15,33 @@
|
||||
#ifdef VPX_INCOMPATIBLE_GCC
|
||||
static INLINE void write_2x4(unsigned char *dst, int pitch,
|
||||
const uint8x8x2_t result) {
|
||||
/*
|
||||
* uint8x8x2_t result
|
||||
00 01 02 03 | 04 05 06 07
|
||||
10 11 12 13 | 14 15 16 17
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 02 12 | 04 14 06 16
|
||||
01 11 03 13 | 05 15 07 17
|
||||
*/
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0],
|
||||
result.val[1]);
|
||||
const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]);
|
||||
const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]);
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 2);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 2);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 3);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 3);
|
||||
/*
|
||||
* uint8x8x2_t result
|
||||
00 01 02 03 | 04 05 06 07
|
||||
10 11 12 13 | 14 15 16 17
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 02 12 | 04 14 06 16
|
||||
01 11 03 13 | 05 15 07 17
|
||||
*/
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0], result.val[1]);
|
||||
const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]);
|
||||
const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]);
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 2);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 2);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 3);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 3);
|
||||
}
|
||||
|
||||
static INLINE void write_2x8(unsigned char *dst, int pitch,
|
||||
@ -91,193 +90,183 @@ static INLINE void write_2x8(unsigned char *dst, int pitch,
|
||||
}
|
||||
#endif // VPX_INCOMPATIBLE_GCC
|
||||
|
||||
|
||||
#ifdef VPX_INCOMPATIBLE_GCC
|
||||
static INLINE
|
||||
uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
|
||||
uint8x8x4_t x;
|
||||
const uint8x8_t a = vld1_u8(src);
|
||||
const uint8x8_t b = vld1_u8(src + pitch * 1);
|
||||
const uint8x8_t c = vld1_u8(src + pitch * 2);
|
||||
const uint8x8_t d = vld1_u8(src + pitch * 3);
|
||||
const uint8x8_t e = vld1_u8(src + pitch * 4);
|
||||
const uint8x8_t f = vld1_u8(src + pitch * 5);
|
||||
const uint8x8_t g = vld1_u8(src + pitch * 6);
|
||||
const uint8x8_t h = vld1_u8(src + pitch * 7);
|
||||
const uint32x2x2_t r04_u32 = vtrn_u32(vreinterpret_u32_u8(a),
|
||||
vreinterpret_u32_u8(e));
|
||||
const uint32x2x2_t r15_u32 = vtrn_u32(vreinterpret_u32_u8(b),
|
||||
vreinterpret_u32_u8(f));
|
||||
const uint32x2x2_t r26_u32 = vtrn_u32(vreinterpret_u32_u8(c),
|
||||
vreinterpret_u32_u8(g));
|
||||
const uint32x2x2_t r37_u32 = vtrn_u32(vreinterpret_u32_u8(d),
|
||||
vreinterpret_u32_u8(h));
|
||||
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]),
|
||||
vreinterpret_u16_u32(r26_u32.val[0]));
|
||||
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]),
|
||||
vreinterpret_u16_u32(r37_u32.val[0]));
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
|
||||
vreinterpret_u8_u16(r13_u16.val[0]));
|
||||
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
|
||||
vreinterpret_u8_u16(r13_u16.val[1]));
|
||||
/*
|
||||
* after vtrn_u32
|
||||
00 01 02 03 | 40 41 42 43
|
||||
10 11 12 13 | 50 51 52 53
|
||||
20 21 22 23 | 60 61 62 63
|
||||
30 31 32 33 | 70 71 72 73
|
||||
---
|
||||
* after vtrn_u16
|
||||
00 01 20 21 | 40 41 60 61
|
||||
02 03 22 23 | 42 43 62 63
|
||||
10 11 30 31 | 50 51 70 71
|
||||
12 13 32 33 | 52 52 72 73
|
||||
static INLINE uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
|
||||
uint8x8x4_t x;
|
||||
const uint8x8_t a = vld1_u8(src);
|
||||
const uint8x8_t b = vld1_u8(src + pitch * 1);
|
||||
const uint8x8_t c = vld1_u8(src + pitch * 2);
|
||||
const uint8x8_t d = vld1_u8(src + pitch * 3);
|
||||
const uint8x8_t e = vld1_u8(src + pitch * 4);
|
||||
const uint8x8_t f = vld1_u8(src + pitch * 5);
|
||||
const uint8x8_t g = vld1_u8(src + pitch * 6);
|
||||
const uint8x8_t h = vld1_u8(src + pitch * 7);
|
||||
const uint32x2x2_t r04_u32 =
|
||||
vtrn_u32(vreinterpret_u32_u8(a), vreinterpret_u32_u8(e));
|
||||
const uint32x2x2_t r15_u32 =
|
||||
vtrn_u32(vreinterpret_u32_u8(b), vreinterpret_u32_u8(f));
|
||||
const uint32x2x2_t r26_u32 =
|
||||
vtrn_u32(vreinterpret_u32_u8(c), vreinterpret_u32_u8(g));
|
||||
const uint32x2x2_t r37_u32 =
|
||||
vtrn_u32(vreinterpret_u32_u8(d), vreinterpret_u32_u8(h));
|
||||
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]),
|
||||
vreinterpret_u16_u32(r26_u32.val[0]));
|
||||
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]),
|
||||
vreinterpret_u16_u32(r37_u32.val[0]));
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
|
||||
vreinterpret_u8_u16(r13_u16.val[0]));
|
||||
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
|
||||
vreinterpret_u8_u16(r13_u16.val[1]));
|
||||
/*
|
||||
* after vtrn_u32
|
||||
00 01 02 03 | 40 41 42 43
|
||||
10 11 12 13 | 50 51 52 53
|
||||
20 21 22 23 | 60 61 62 63
|
||||
30 31 32 33 | 70 71 72 73
|
||||
---
|
||||
* after vtrn_u16
|
||||
00 01 20 21 | 40 41 60 61
|
||||
02 03 22 23 | 42 43 62 63
|
||||
10 11 30 31 | 50 51 70 71
|
||||
12 13 32 33 | 52 52 72 73
|
||||
|
||||
00 01 20 21 | 40 41 60 61
|
||||
10 11 30 31 | 50 51 70 71
|
||||
02 03 22 23 | 42 43 62 63
|
||||
12 13 32 33 | 52 52 72 73
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 20 30 | 40 50 60 70
|
||||
01 11 21 31 | 41 51 61 71
|
||||
02 12 22 32 | 42 52 62 72
|
||||
03 13 23 33 | 43 53 63 73
|
||||
*/
|
||||
x.val[0] = r01_u8.val[0];
|
||||
x.val[1] = r01_u8.val[1];
|
||||
x.val[2] = r23_u8.val[0];
|
||||
x.val[3] = r23_u8.val[1];
|
||||
00 01 20 21 | 40 41 60 61
|
||||
10 11 30 31 | 50 51 70 71
|
||||
02 03 22 23 | 42 43 62 63
|
||||
12 13 32 33 | 52 52 72 73
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 20 30 | 40 50 60 70
|
||||
01 11 21 31 | 41 51 61 71
|
||||
02 12 22 32 | 42 52 62 72
|
||||
03 13 23 33 | 43 53 63 73
|
||||
*/
|
||||
x.val[0] = r01_u8.val[0];
|
||||
x.val[1] = r01_u8.val[1];
|
||||
x.val[2] = r23_u8.val[0];
|
||||
x.val[3] = r23_u8.val[1];
|
||||
|
||||
return x;
|
||||
return x;
|
||||
}
|
||||
#else
|
||||
static INLINE
|
||||
uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
|
||||
uint8x8x4_t x;
|
||||
x.val[0] = x.val[1] = x.val[2] = x.val[3] = vdup_n_u8(0);
|
||||
x = vld4_lane_u8(src, x, 0);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 1);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 2);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 3);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 4);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 5);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 6);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 7);
|
||||
return x;
|
||||
static INLINE uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
|
||||
uint8x8x4_t x;
|
||||
x.val[0] = x.val[1] = x.val[2] = x.val[3] = vdup_n_u8(0);
|
||||
x = vld4_lane_u8(src, x, 0);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 1);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 2);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 3);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 4);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 5);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 6);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 7);
|
||||
return x;
|
||||
}
|
||||
#endif // VPX_INCOMPATIBLE_GCC
|
||||
|
||||
static INLINE void vp8_loop_filter_simple_vertical_edge_neon(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit) {
|
||||
unsigned char *src1;
|
||||
uint8x16_t qblimit, q0u8;
|
||||
uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q13s16, q11s16;
|
||||
int8x8_t d28s8, d29s8;
|
||||
int8x16_t q2s8, q3s8, q10s8, q11s8, q14s8;
|
||||
uint8x8x4_t d0u8x4; // d6, d7, d8, d9
|
||||
uint8x8x4_t d1u8x4; // d10, d11, d12, d13
|
||||
uint8x8x2_t d2u8x2; // d12, d13
|
||||
uint8x8x2_t d3u8x2; // d14, d15
|
||||
unsigned char *s, int p, const unsigned char *blimit) {
|
||||
unsigned char *src1;
|
||||
uint8x16_t qblimit, q0u8;
|
||||
uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q13s16, q11s16;
|
||||
int8x8_t d28s8, d29s8;
|
||||
int8x16_t q2s8, q3s8, q10s8, q11s8, q14s8;
|
||||
uint8x8x4_t d0u8x4; // d6, d7, d8, d9
|
||||
uint8x8x4_t d1u8x4; // d10, d11, d12, d13
|
||||
uint8x8x2_t d2u8x2; // d12, d13
|
||||
uint8x8x2_t d3u8x2; // d14, d15
|
||||
|
||||
qblimit = vdupq_n_u8(*blimit);
|
||||
qblimit = vdupq_n_u8(*blimit);
|
||||
|
||||
src1 = s - 2;
|
||||
d0u8x4 = read_4x8(src1, p);
|
||||
src1 += p * 8;
|
||||
d1u8x4 = read_4x8(src1, p);
|
||||
src1 = s - 2;
|
||||
d0u8x4 = read_4x8(src1, p);
|
||||
src1 += p * 8;
|
||||
d1u8x4 = read_4x8(src1, p);
|
||||
|
||||
q3u8 = vcombine_u8(d0u8x4.val[0], d1u8x4.val[0]); // d6 d10
|
||||
q4u8 = vcombine_u8(d0u8x4.val[2], d1u8x4.val[2]); // d8 d12
|
||||
q5u8 = vcombine_u8(d0u8x4.val[1], d1u8x4.val[1]); // d7 d11
|
||||
q6u8 = vcombine_u8(d0u8x4.val[3], d1u8x4.val[3]); // d9 d13
|
||||
q3u8 = vcombine_u8(d0u8x4.val[0], d1u8x4.val[0]); // d6 d10
|
||||
q4u8 = vcombine_u8(d0u8x4.val[2], d1u8x4.val[2]); // d8 d12
|
||||
q5u8 = vcombine_u8(d0u8x4.val[1], d1u8x4.val[1]); // d7 d11
|
||||
q6u8 = vcombine_u8(d0u8x4.val[3], d1u8x4.val[3]); // d9 d13
|
||||
|
||||
q15u8 = vabdq_u8(q5u8, q4u8);
|
||||
q14u8 = vabdq_u8(q3u8, q6u8);
|
||||
q15u8 = vabdq_u8(q5u8, q4u8);
|
||||
q14u8 = vabdq_u8(q3u8, q6u8);
|
||||
|
||||
q15u8 = vqaddq_u8(q15u8, q15u8);
|
||||
q14u8 = vshrq_n_u8(q14u8, 1);
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
q11s16 = vdupq_n_s16(3);
|
||||
q15u8 = vqaddq_u8(q15u8, q14u8);
|
||||
q15u8 = vqaddq_u8(q15u8, q15u8);
|
||||
q14u8 = vshrq_n_u8(q14u8, 1);
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
q11s16 = vdupq_n_s16(3);
|
||||
q15u8 = vqaddq_u8(q15u8, q14u8);
|
||||
|
||||
q3u8 = veorq_u8(q3u8, q0u8);
|
||||
q4u8 = veorq_u8(q4u8, q0u8);
|
||||
q5u8 = veorq_u8(q5u8, q0u8);
|
||||
q6u8 = veorq_u8(q6u8, q0u8);
|
||||
q3u8 = veorq_u8(q3u8, q0u8);
|
||||
q4u8 = veorq_u8(q4u8, q0u8);
|
||||
q5u8 = veorq_u8(q5u8, q0u8);
|
||||
q6u8 = veorq_u8(q6u8, q0u8);
|
||||
|
||||
q15u8 = vcgeq_u8(qblimit, q15u8);
|
||||
q15u8 = vcgeq_u8(qblimit, q15u8);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q4u8)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q5u8)));
|
||||
q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q4u8)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q5u8)));
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q4u8)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q5u8)));
|
||||
q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q4u8)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q5u8)));
|
||||
|
||||
q14s8 = vqsubq_s8(vreinterpretq_s8_u8(q3u8),
|
||||
vreinterpretq_s8_u8(q6u8));
|
||||
q14s8 = vqsubq_s8(vreinterpretq_s8_u8(q3u8), vreinterpretq_s8_u8(q6u8));
|
||||
|
||||
q2s16 = vmulq_s16(q2s16, q11s16);
|
||||
q13s16 = vmulq_s16(q13s16, q11s16);
|
||||
q2s16 = vmulq_s16(q2s16, q11s16);
|
||||
q13s16 = vmulq_s16(q13s16, q11s16);
|
||||
|
||||
q11u8 = vdupq_n_u8(3);
|
||||
q12u8 = vdupq_n_u8(4);
|
||||
q11u8 = vdupq_n_u8(3);
|
||||
q12u8 = vdupq_n_u8(4);
|
||||
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q14s8));
|
||||
q13s16 = vaddw_s8(q13s16, vget_high_s8(q14s8));
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q14s8));
|
||||
q13s16 = vaddw_s8(q13s16, vget_high_s8(q14s8));
|
||||
|
||||
d28s8 = vqmovn_s16(q2s16);
|
||||
d29s8 = vqmovn_s16(q13s16);
|
||||
q14s8 = vcombine_s8(d28s8, d29s8);
|
||||
d28s8 = vqmovn_s16(q2s16);
|
||||
d29s8 = vqmovn_s16(q13s16);
|
||||
q14s8 = vcombine_s8(d28s8, d29s8);
|
||||
|
||||
q14s8 = vandq_s8(q14s8, vreinterpretq_s8_u8(q15u8));
|
||||
q14s8 = vandq_s8(q14s8, vreinterpretq_s8_u8(q15u8));
|
||||
|
||||
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q11u8));
|
||||
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q12u8));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q14s8 = vshrq_n_s8(q3s8, 3);
|
||||
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q11u8));
|
||||
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q12u8));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q14s8 = vshrq_n_s8(q3s8, 3);
|
||||
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q5u8), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q4u8), q14s8);
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q5u8), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q4u8), q14s8);
|
||||
|
||||
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
|
||||
d2u8x2.val[0] = vget_low_u8(q6u8); // d12
|
||||
d2u8x2.val[1] = vget_low_u8(q7u8); // d14
|
||||
d3u8x2.val[0] = vget_high_u8(q6u8); // d13
|
||||
d3u8x2.val[1] = vget_high_u8(q7u8); // d15
|
||||
d2u8x2.val[0] = vget_low_u8(q6u8); // d12
|
||||
d2u8x2.val[1] = vget_low_u8(q7u8); // d14
|
||||
d3u8x2.val[0] = vget_high_u8(q6u8); // d13
|
||||
d3u8x2.val[1] = vget_high_u8(q7u8); // d15
|
||||
|
||||
src1 = s - 1;
|
||||
write_2x8(src1, p, d2u8x2, d3u8x2);
|
||||
src1 = s - 1;
|
||||
write_2x8(src1, p, d2u8x2, d3u8x2);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
void vp8_loop_filter_bvs_neon(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -11,113 +11,109 @@
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const int16_t cospi8sqrt2minus1 = 20091;
|
||||
static const int16_t sinpi8sqrt2 = 35468;
|
||||
static const int16_t sinpi8sqrt2 = 35468;
|
||||
|
||||
void vp8_short_idct4x4llm_neon(
|
||||
int16_t *input,
|
||||
unsigned char *pred_ptr,
|
||||
int pred_stride,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_stride) {
|
||||
int i;
|
||||
uint32x2_t d6u32 = vdup_n_u32(0);
|
||||
uint8x8_t d1u8;
|
||||
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
|
||||
uint16x8_t q1u16;
|
||||
int16x8_t q1s16, q2s16, q3s16, q4s16;
|
||||
int32x2x2_t v2tmp0, v2tmp1;
|
||||
int16x4x2_t v2tmp2, v2tmp3;
|
||||
void vp8_short_idct4x4llm_neon(int16_t *input, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride) {
|
||||
int i;
|
||||
uint32x2_t d6u32 = vdup_n_u32(0);
|
||||
uint8x8_t d1u8;
|
||||
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
|
||||
uint16x8_t q1u16;
|
||||
int16x8_t q1s16, q2s16, q3s16, q4s16;
|
||||
int32x2x2_t v2tmp0, v2tmp1;
|
||||
int16x4x2_t v2tmp2, v2tmp3;
|
||||
|
||||
d2 = vld1_s16(input);
|
||||
d3 = vld1_s16(input + 4);
|
||||
d4 = vld1_s16(input + 8);
|
||||
d5 = vld1_s16(input + 12);
|
||||
d2 = vld1_s16(input);
|
||||
d3 = vld1_s16(input + 4);
|
||||
d4 = vld1_s16(input + 8);
|
||||
d5 = vld1_s16(input + 12);
|
||||
|
||||
// 1st for loop
|
||||
q1s16 = vcombine_s16(d2, d4); // Swap d3 d4 here
|
||||
q2s16 = vcombine_s16(d3, d5);
|
||||
// 1st for loop
|
||||
q1s16 = vcombine_s16(d2, d4); // Swap d3 d4 here
|
||||
q2s16 = vcombine_s16(d3, d5);
|
||||
|
||||
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
|
||||
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
|
||||
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
|
||||
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
|
||||
|
||||
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
|
||||
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
|
||||
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
|
||||
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
|
||||
|
||||
q3s16 = vshrq_n_s16(q3s16, 1);
|
||||
q4s16 = vshrq_n_s16(q4s16, 1);
|
||||
q3s16 = vshrq_n_s16(q3s16, 1);
|
||||
q4s16 = vshrq_n_s16(q4s16, 1);
|
||||
|
||||
q3s16 = vqaddq_s16(q3s16, q2s16);
|
||||
q4s16 = vqaddq_s16(q4s16, q2s16);
|
||||
q3s16 = vqaddq_s16(q3s16, q2s16);
|
||||
q4s16 = vqaddq_s16(q4s16, q2s16);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
|
||||
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
|
||||
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
|
||||
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[0]));
|
||||
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[1]));
|
||||
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[0]));
|
||||
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[1]));
|
||||
|
||||
// 2nd for loop
|
||||
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]);
|
||||
q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]);
|
||||
// 2nd for loop
|
||||
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]);
|
||||
q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]);
|
||||
|
||||
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
|
||||
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
|
||||
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
|
||||
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
|
||||
|
||||
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
|
||||
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
|
||||
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
|
||||
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
|
||||
|
||||
q3s16 = vshrq_n_s16(q3s16, 1);
|
||||
q4s16 = vshrq_n_s16(q4s16, 1);
|
||||
q3s16 = vshrq_n_s16(q3s16, 1);
|
||||
q4s16 = vshrq_n_s16(q4s16, 1);
|
||||
|
||||
q3s16 = vqaddq_s16(q3s16, q2s16);
|
||||
q4s16 = vqaddq_s16(q4s16, q2s16);
|
||||
q3s16 = vqaddq_s16(q3s16, q2s16);
|
||||
q4s16 = vqaddq_s16(q4s16, q2s16);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
|
||||
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
|
||||
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
|
||||
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
d2 = vrshr_n_s16(d2, 3);
|
||||
d3 = vrshr_n_s16(d3, 3);
|
||||
d4 = vrshr_n_s16(d4, 3);
|
||||
d5 = vrshr_n_s16(d5, 3);
|
||||
d2 = vrshr_n_s16(d2, 3);
|
||||
d3 = vrshr_n_s16(d3, 3);
|
||||
d4 = vrshr_n_s16(d4, 3);
|
||||
d5 = vrshr_n_s16(d5, 3);
|
||||
|
||||
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[0]));
|
||||
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[1]));
|
||||
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[0]));
|
||||
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[1]));
|
||||
|
||||
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]);
|
||||
q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]);
|
||||
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]);
|
||||
q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]);
|
||||
|
||||
// dc_only_idct_add
|
||||
for (i = 0; i < 2; i++, q1s16 = q2s16) {
|
||||
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0);
|
||||
pred_ptr += pred_stride;
|
||||
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1);
|
||||
pred_ptr += pred_stride;
|
||||
// dc_only_idct_add
|
||||
for (i = 0; i < 2; i++, q1s16 = q2s16) {
|
||||
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0);
|
||||
pred_ptr += pred_stride;
|
||||
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1);
|
||||
pred_ptr += pred_stride;
|
||||
|
||||
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16),
|
||||
vreinterpret_u8_u32(d6u32));
|
||||
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16), vreinterpret_u8_u32(d6u32));
|
||||
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0);
|
||||
dst_ptr += dst_stride;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
return;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0);
|
||||
dst_ptr += dst_stride;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -12,539 +12,525 @@
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_ports/arm.h"
|
||||
|
||||
static INLINE void vp8_loop_filter_neon(
|
||||
uint8x16_t qblimit, // flimit
|
||||
uint8x16_t qlimit, // limit
|
||||
uint8x16_t qthresh, // thresh
|
||||
uint8x16_t q3, // p3
|
||||
uint8x16_t q4, // p2
|
||||
uint8x16_t q5, // p1
|
||||
uint8x16_t q6, // p0
|
||||
uint8x16_t q7, // q0
|
||||
uint8x16_t q8, // q1
|
||||
uint8x16_t q9, // q2
|
||||
uint8x16_t q10, // q3
|
||||
uint8x16_t *q5r, // p1
|
||||
uint8x16_t *q6r, // p0
|
||||
uint8x16_t *q7r, // q0
|
||||
uint8x16_t *q8r) { // q1
|
||||
uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q11s16;
|
||||
uint16x8_t q4u16;
|
||||
int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8;
|
||||
int8x8_t d2s8, d3s8;
|
||||
static INLINE void vp8_loop_filter_neon(uint8x16_t qblimit, // flimit
|
||||
uint8x16_t qlimit, // limit
|
||||
uint8x16_t qthresh, // thresh
|
||||
uint8x16_t q3, // p3
|
||||
uint8x16_t q4, // p2
|
||||
uint8x16_t q5, // p1
|
||||
uint8x16_t q6, // p0
|
||||
uint8x16_t q7, // q0
|
||||
uint8x16_t q8, // q1
|
||||
uint8x16_t q9, // q2
|
||||
uint8x16_t q10, // q3
|
||||
uint8x16_t *q5r, // p1
|
||||
uint8x16_t *q6r, // p0
|
||||
uint8x16_t *q7r, // q0
|
||||
uint8x16_t *q8r) { // q1
|
||||
uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q11s16;
|
||||
uint16x8_t q4u16;
|
||||
int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8;
|
||||
int8x8_t d2s8, d3s8;
|
||||
|
||||
q11u8 = vabdq_u8(q3, q4);
|
||||
q12u8 = vabdq_u8(q4, q5);
|
||||
q13u8 = vabdq_u8(q5, q6);
|
||||
q14u8 = vabdq_u8(q8, q7);
|
||||
q3 = vabdq_u8(q9, q8);
|
||||
q4 = vabdq_u8(q10, q9);
|
||||
q11u8 = vabdq_u8(q3, q4);
|
||||
q12u8 = vabdq_u8(q4, q5);
|
||||
q13u8 = vabdq_u8(q5, q6);
|
||||
q14u8 = vabdq_u8(q8, q7);
|
||||
q3 = vabdq_u8(q9, q8);
|
||||
q4 = vabdq_u8(q10, q9);
|
||||
|
||||
q11u8 = vmaxq_u8(q11u8, q12u8);
|
||||
q12u8 = vmaxq_u8(q13u8, q14u8);
|
||||
q3 = vmaxq_u8(q3, q4);
|
||||
q15u8 = vmaxq_u8(q11u8, q12u8);
|
||||
q11u8 = vmaxq_u8(q11u8, q12u8);
|
||||
q12u8 = vmaxq_u8(q13u8, q14u8);
|
||||
q3 = vmaxq_u8(q3, q4);
|
||||
q15u8 = vmaxq_u8(q11u8, q12u8);
|
||||
|
||||
q9 = vabdq_u8(q6, q7);
|
||||
q9 = vabdq_u8(q6, q7);
|
||||
|
||||
// vp8_hevmask
|
||||
q13u8 = vcgtq_u8(q13u8, qthresh);
|
||||
q14u8 = vcgtq_u8(q14u8, qthresh);
|
||||
q15u8 = vmaxq_u8(q15u8, q3);
|
||||
// vp8_hevmask
|
||||
q13u8 = vcgtq_u8(q13u8, qthresh);
|
||||
q14u8 = vcgtq_u8(q14u8, qthresh);
|
||||
q15u8 = vmaxq_u8(q15u8, q3);
|
||||
|
||||
q2u8 = vabdq_u8(q5, q8);
|
||||
q9 = vqaddq_u8(q9, q9);
|
||||
q2u8 = vabdq_u8(q5, q8);
|
||||
q9 = vqaddq_u8(q9, q9);
|
||||
|
||||
q15u8 = vcgeq_u8(qlimit, q15u8);
|
||||
q15u8 = vcgeq_u8(qlimit, q15u8);
|
||||
|
||||
// vp8_filter() function
|
||||
// convert to signed
|
||||
q10 = vdupq_n_u8(0x80);
|
||||
q8 = veorq_u8(q8, q10);
|
||||
q7 = veorq_u8(q7, q10);
|
||||
q6 = veorq_u8(q6, q10);
|
||||
q5 = veorq_u8(q5, q10);
|
||||
// vp8_filter() function
|
||||
// convert to signed
|
||||
q10 = vdupq_n_u8(0x80);
|
||||
q8 = veorq_u8(q8, q10);
|
||||
q7 = veorq_u8(q7, q10);
|
||||
q6 = veorq_u8(q6, q10);
|
||||
q5 = veorq_u8(q5, q10);
|
||||
|
||||
q2u8 = vshrq_n_u8(q2u8, 1);
|
||||
q9 = vqaddq_u8(q9, q2u8);
|
||||
q2u8 = vshrq_n_u8(q2u8, 1);
|
||||
q9 = vqaddq_u8(q9, q2u8);
|
||||
|
||||
q10 = vdupq_n_u8(3);
|
||||
q10 = vdupq_n_u8(3);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q6)));
|
||||
q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q6)));
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q6)));
|
||||
q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q6)));
|
||||
|
||||
q9 = vcgeq_u8(qblimit, q9);
|
||||
q9 = vcgeq_u8(qblimit, q9);
|
||||
|
||||
q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
|
||||
vreinterpretq_s8_u8(q8));
|
||||
q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), vreinterpretq_s8_u8(q8));
|
||||
|
||||
q14u8 = vorrq_u8(q13u8, q14u8);
|
||||
q14u8 = vorrq_u8(q13u8, q14u8);
|
||||
|
||||
q4u16 = vmovl_u8(vget_low_u8(q10));
|
||||
q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16));
|
||||
q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16));
|
||||
q4u16 = vmovl_u8(vget_low_u8(q10));
|
||||
q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16));
|
||||
q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16));
|
||||
|
||||
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8);
|
||||
q15u8 = vandq_u8(q15u8, q9);
|
||||
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8);
|
||||
q15u8 = vandq_u8(q15u8, q9);
|
||||
|
||||
q1s8 = vreinterpretq_s8_u8(q1u8);
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
|
||||
q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8));
|
||||
q1s8 = vreinterpretq_s8_u8(q1u8);
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
|
||||
q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8));
|
||||
|
||||
q9 = vdupq_n_u8(4);
|
||||
// vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
d2s8 = vqmovn_s16(q2s16);
|
||||
d3s8 = vqmovn_s16(q11s16);
|
||||
q1s8 = vcombine_s8(d2s8, d3s8);
|
||||
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8);
|
||||
q1s8 = vreinterpretq_s8_u8(q1u8);
|
||||
q9 = vdupq_n_u8(4);
|
||||
// vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
d2s8 = vqmovn_s16(q2s16);
|
||||
d3s8 = vqmovn_s16(q11s16);
|
||||
q1s8 = vcombine_s8(d2s8, d3s8);
|
||||
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8);
|
||||
q1s8 = vreinterpretq_s8_u8(q1u8);
|
||||
|
||||
q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10));
|
||||
q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q1s8 = vshrq_n_s8(q1s8, 3);
|
||||
q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10));
|
||||
q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q1s8 = vshrq_n_s8(q1s8, 3);
|
||||
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8);
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8);
|
||||
|
||||
q1s8 = vrshrq_n_s8(q1s8, 1);
|
||||
q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
|
||||
q1s8 = vrshrq_n_s8(q1s8, 1);
|
||||
q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
|
||||
|
||||
q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8);
|
||||
q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8);
|
||||
q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8);
|
||||
q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8);
|
||||
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
*q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8);
|
||||
*q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
*q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
*q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8);
|
||||
return;
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
*q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8);
|
||||
*q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
*q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
*q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_horizontal_edge_y_neon(
|
||||
unsigned char *src,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
src -= (pitch << 2);
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
src -= (pitch << 2);
|
||||
|
||||
q3 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q4 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q5 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q6 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q7 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q8 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q9 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q10 = vld1q_u8(src);
|
||||
q3 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q4 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q5 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q6 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q7 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q8 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q9 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q10 = vld1q_u8(src);
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9,
|
||||
q10, &q5, &q6, &q7, &q8);
|
||||
|
||||
src -= (pitch * 5);
|
||||
vst1q_u8(src, q5);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q6);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q7);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q8);
|
||||
return;
|
||||
src -= (pitch * 5);
|
||||
vst1q_u8(src, q5);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q6);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q7);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_horizontal_edge_uv_neon(
|
||||
unsigned char *u,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
u -= (pitch << 2);
|
||||
v -= (pitch << 2);
|
||||
u -= (pitch << 2);
|
||||
v -= (pitch << 2);
|
||||
|
||||
d6 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d7 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d8 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d9 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d10 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d11 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d12 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d13 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d14 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d15 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d16 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d17 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d18 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d19 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d20 = vld1_u8(u);
|
||||
d21 = vld1_u8(v);
|
||||
d6 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d7 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d8 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d9 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d10 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d11 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d12 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d13 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d14 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d15 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d16 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d17 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d18 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d19 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d20 = vld1_u8(u);
|
||||
d21 = vld1_u8(v);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9,
|
||||
q10, &q5, &q6, &q7, &q8);
|
||||
|
||||
u -= (pitch * 5);
|
||||
vst1_u8(u, vget_low_u8(q5));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q6));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q7));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q8));
|
||||
u -= (pitch * 5);
|
||||
vst1_u8(u, vget_low_u8(q5));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q6));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q7));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q8));
|
||||
|
||||
v -= (pitch * 5);
|
||||
vst1_u8(v, vget_high_u8(q5));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q6));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q7));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q8));
|
||||
return;
|
||||
v -= (pitch * 5);
|
||||
vst1_u8(v, vget_high_u8(q5));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q6));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q7));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q8));
|
||||
return;
|
||||
}
|
||||
|
||||
static INLINE void write_4x8(unsigned char *dst, int pitch,
|
||||
const uint8x8x4_t result) {
|
||||
#ifdef VPX_INCOMPATIBLE_GCC
|
||||
/*
|
||||
* uint8x8x4_t result
|
||||
00 01 02 03 | 04 05 06 07
|
||||
10 11 12 13 | 14 15 16 17
|
||||
20 21 22 23 | 24 25 26 27
|
||||
30 31 32 33 | 34 35 36 37
|
||||
---
|
||||
* after vtrn_u16
|
||||
00 01 20 21 | 04 05 24 25
|
||||
02 03 22 23 | 06 07 26 27
|
||||
10 11 30 31 | 14 15 34 35
|
||||
12 13 32 33 | 16 17 36 37
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 20 30 | 04 14 24 34
|
||||
01 11 21 31 | 05 15 25 35
|
||||
02 12 22 32 | 06 16 26 36
|
||||
03 13 23 33 | 07 17 27 37
|
||||
*/
|
||||
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
|
||||
vreinterpret_u16_u8(result.val[2]));
|
||||
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
|
||||
vreinterpret_u16_u8(result.val[3]));
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
|
||||
vreinterpret_u8_u16(r13_u16.val[0]));
|
||||
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
|
||||
vreinterpret_u8_u16(r13_u16.val[1]));
|
||||
const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
|
||||
const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
|
||||
const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
|
||||
const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
|
||||
vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
|
||||
/*
|
||||
* uint8x8x4_t result
|
||||
00 01 02 03 | 04 05 06 07
|
||||
10 11 12 13 | 14 15 16 17
|
||||
20 21 22 23 | 24 25 26 27
|
||||
30 31 32 33 | 34 35 36 37
|
||||
---
|
||||
* after vtrn_u16
|
||||
00 01 20 21 | 04 05 24 25
|
||||
02 03 22 23 | 06 07 26 27
|
||||
10 11 30 31 | 14 15 34 35
|
||||
12 13 32 33 | 16 17 36 37
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 20 30 | 04 14 24 34
|
||||
01 11 21 31 | 05 15 25 35
|
||||
02 12 22 32 | 06 16 26 36
|
||||
03 13 23 33 | 07 17 27 37
|
||||
*/
|
||||
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
|
||||
vreinterpret_u16_u8(result.val[2]));
|
||||
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
|
||||
vreinterpret_u16_u8(result.val[3]));
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
|
||||
vreinterpret_u8_u16(r13_u16.val[0]));
|
||||
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
|
||||
vreinterpret_u8_u16(r13_u16.val[1]));
|
||||
const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
|
||||
const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
|
||||
const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
|
||||
const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
|
||||
vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
|
||||
#else
|
||||
vst4_lane_u8(dst, result, 0);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 1);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 2);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 3);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 4);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 5);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 6);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 7);
|
||||
vst4_lane_u8(dst, result, 0);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 1);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 2);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 3);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 4);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 5);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 6);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 7);
|
||||
#endif // VPX_INCOMPATIBLE_GCC
|
||||
}
|
||||
|
||||
void vp8_loop_filter_vertical_edge_y_neon(
|
||||
unsigned char *src,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
unsigned char *s, *d;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
uint8x8x4_t q4ResultH, q4ResultL;
|
||||
void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
unsigned char *s, *d;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
uint8x8x4_t q4ResultH, q4ResultL;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
s = src - 4;
|
||||
d6 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d10 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d12 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d14 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d16 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d18 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d20 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d7 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d9 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d11 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d13 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d15 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d17 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d19 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d21 = vld1_u8(s);
|
||||
s = src - 4;
|
||||
d6 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d10 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d12 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d14 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d16 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d18 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d20 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d7 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d9 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d11 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d13 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d15 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d17 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d19 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d21 = vld1_u8(s);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9,
|
||||
q10, &q5, &q6, &q7, &q8);
|
||||
|
||||
q4ResultL.val[0] = vget_low_u8(q5); // d10
|
||||
q4ResultL.val[1] = vget_low_u8(q6); // d12
|
||||
q4ResultL.val[2] = vget_low_u8(q7); // d14
|
||||
q4ResultL.val[3] = vget_low_u8(q8); // d16
|
||||
q4ResultH.val[0] = vget_high_u8(q5); // d11
|
||||
q4ResultH.val[1] = vget_high_u8(q6); // d13
|
||||
q4ResultH.val[2] = vget_high_u8(q7); // d15
|
||||
q4ResultH.val[3] = vget_high_u8(q8); // d17
|
||||
q4ResultL.val[0] = vget_low_u8(q5); // d10
|
||||
q4ResultL.val[1] = vget_low_u8(q6); // d12
|
||||
q4ResultL.val[2] = vget_low_u8(q7); // d14
|
||||
q4ResultL.val[3] = vget_low_u8(q8); // d16
|
||||
q4ResultH.val[0] = vget_high_u8(q5); // d11
|
||||
q4ResultH.val[1] = vget_high_u8(q6); // d13
|
||||
q4ResultH.val[2] = vget_high_u8(q7); // d15
|
||||
q4ResultH.val[3] = vget_high_u8(q8); // d17
|
||||
|
||||
d = src - 2;
|
||||
write_4x8(d, pitch, q4ResultL);
|
||||
d += pitch * 8;
|
||||
write_4x8(d, pitch, q4ResultH);
|
||||
d = src - 2;
|
||||
write_4x8(d, pitch, q4ResultL);
|
||||
d += pitch * 8;
|
||||
write_4x8(d, pitch, q4ResultH);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_vertical_edge_uv_neon(
|
||||
unsigned char *u,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
unsigned char *us, *ud;
|
||||
unsigned char *vs, *vd;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
uint8x8x4_t q4ResultH, q4ResultL;
|
||||
void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
unsigned char *us, *ud;
|
||||
unsigned char *vs, *vd;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
uint8x8x4_t q4ResultH, q4ResultL;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
us = u - 4;
|
||||
d6 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d8 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d10 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d12 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d14 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d16 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d18 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d20 = vld1_u8(us);
|
||||
us = u - 4;
|
||||
d6 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d8 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d10 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d12 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d14 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d16 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d18 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d20 = vld1_u8(us);
|
||||
|
||||
vs = v - 4;
|
||||
d7 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d9 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d11 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d13 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d15 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d17 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d19 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d21 = vld1_u8(vs);
|
||||
vs = v - 4;
|
||||
d7 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d9 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d11 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d13 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d15 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d17 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d19 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d21 = vld1_u8(vs);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9,
|
||||
q10, &q5, &q6, &q7, &q8);
|
||||
|
||||
q4ResultL.val[0] = vget_low_u8(q5); // d10
|
||||
q4ResultL.val[1] = vget_low_u8(q6); // d12
|
||||
q4ResultL.val[2] = vget_low_u8(q7); // d14
|
||||
q4ResultL.val[3] = vget_low_u8(q8); // d16
|
||||
ud = u - 2;
|
||||
write_4x8(ud, pitch, q4ResultL);
|
||||
q4ResultL.val[0] = vget_low_u8(q5); // d10
|
||||
q4ResultL.val[1] = vget_low_u8(q6); // d12
|
||||
q4ResultL.val[2] = vget_low_u8(q7); // d14
|
||||
q4ResultL.val[3] = vget_low_u8(q8); // d16
|
||||
ud = u - 2;
|
||||
write_4x8(ud, pitch, q4ResultL);
|
||||
|
||||
q4ResultH.val[0] = vget_high_u8(q5); // d11
|
||||
q4ResultH.val[1] = vget_high_u8(q6); // d13
|
||||
q4ResultH.val[2] = vget_high_u8(q7); // d15
|
||||
q4ResultH.val[3] = vget_high_u8(q8); // d17
|
||||
vd = v - 2;
|
||||
write_4x8(vd, pitch, q4ResultH);
|
||||
q4ResultH.val[0] = vget_high_u8(q5); // d11
|
||||
q4ResultH.val[1] = vget_high_u8(q6); // d13
|
||||
q4ResultH.val[2] = vget_high_u8(q7); // d15
|
||||
q4ResultH.val[3] = vget_high_u8(q8); // d17
|
||||
vd = v - 2;
|
||||
write_4x8(vd, pitch, q4ResultH);
|
||||
}
|
||||
|
@ -8,15 +8,12 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
const unsigned char vp8_block2left[25] =
|
||||
{
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
|
||||
const unsigned char vp8_block2left[25] = {
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
|
||||
};
|
||||
const unsigned char vp8_block2above[25] =
|
||||
{
|
||||
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
|
||||
const unsigned char vp8_block2above[25] = {
|
||||
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
|
||||
};
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_BLOCKD_H_
|
||||
#define VP8_COMMON_BLOCKD_H_
|
||||
|
||||
@ -28,280 +27,266 @@ extern "C" {
|
||||
#define DCPREDSIMTHRESH 0
|
||||
#define DCPREDCNTTHRESH 3
|
||||
|
||||
#define MB_FEATURE_TREE_PROBS 3
|
||||
#define MAX_MB_SEGMENTS 4
|
||||
#define MB_FEATURE_TREE_PROBS 3
|
||||
#define MAX_MB_SEGMENTS 4
|
||||
|
||||
#define MAX_REF_LF_DELTAS 4
|
||||
#define MAX_MODE_LF_DELTAS 4
|
||||
#define MAX_REF_LF_DELTAS 4
|
||||
#define MAX_MODE_LF_DELTAS 4
|
||||
|
||||
/* Segment Feature Masks */
|
||||
#define SEGMENT_DELTADATA 0
|
||||
#define SEGMENT_ABSDATA 1
|
||||
#define SEGMENT_DELTADATA 0
|
||||
#define SEGMENT_ABSDATA 1
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int r, c;
|
||||
} POS;
|
||||
|
||||
#define PLANE_TYPE_Y_NO_DC 0
|
||||
#define PLANE_TYPE_Y2 1
|
||||
#define PLANE_TYPE_UV 2
|
||||
#define PLANE_TYPE_Y_WITH_DC 3
|
||||
typedef struct { int r, c; } POS;
|
||||
|
||||
#define PLANE_TYPE_Y_NO_DC 0
|
||||
#define PLANE_TYPE_Y2 1
|
||||
#define PLANE_TYPE_UV 2
|
||||
#define PLANE_TYPE_Y_WITH_DC 3
|
||||
|
||||
typedef char ENTROPY_CONTEXT;
|
||||
typedef struct
|
||||
{
|
||||
ENTROPY_CONTEXT y1[4];
|
||||
ENTROPY_CONTEXT u[2];
|
||||
ENTROPY_CONTEXT v[2];
|
||||
ENTROPY_CONTEXT y2;
|
||||
typedef struct {
|
||||
ENTROPY_CONTEXT y1[4];
|
||||
ENTROPY_CONTEXT u[2];
|
||||
ENTROPY_CONTEXT v[2];
|
||||
ENTROPY_CONTEXT y2;
|
||||
} ENTROPY_CONTEXT_PLANES;
|
||||
|
||||
extern const unsigned char vp8_block2left[25];
|
||||
extern const unsigned char vp8_block2above[25];
|
||||
|
||||
#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
|
||||
Dest = (A)+(B);
|
||||
#define VP8_COMBINEENTROPYCONTEXTS(Dest, A, B) Dest = (A) + (B);
|
||||
|
||||
typedef enum { KEY_FRAME = 0, INTER_FRAME = 1 } FRAME_TYPE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
KEY_FRAME = 0,
|
||||
INTER_FRAME = 1
|
||||
} FRAME_TYPE;
|
||||
typedef enum {
|
||||
DC_PRED, /* average of above and left pixels */
|
||||
V_PRED, /* vertical prediction */
|
||||
H_PRED, /* horizontal prediction */
|
||||
TM_PRED, /* Truemotion prediction */
|
||||
B_PRED, /* block based prediction, each block has its own prediction mode */
|
||||
|
||||
typedef enum
|
||||
{
|
||||
DC_PRED, /* average of above and left pixels */
|
||||
V_PRED, /* vertical prediction */
|
||||
H_PRED, /* horizontal prediction */
|
||||
TM_PRED, /* Truemotion prediction */
|
||||
B_PRED, /* block based prediction, each block has its own prediction mode */
|
||||
NEARESTMV,
|
||||
NEARMV,
|
||||
ZEROMV,
|
||||
NEWMV,
|
||||
SPLITMV,
|
||||
|
||||
NEARESTMV,
|
||||
NEARMV,
|
||||
ZEROMV,
|
||||
NEWMV,
|
||||
SPLITMV,
|
||||
|
||||
MB_MODE_COUNT
|
||||
MB_MODE_COUNT
|
||||
} MB_PREDICTION_MODE;
|
||||
|
||||
/* Macroblock level features */
|
||||
typedef enum
|
||||
{
|
||||
MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */
|
||||
MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */
|
||||
MB_LVL_MAX = 2 /* Number of MB level features supported */
|
||||
typedef enum {
|
||||
MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */
|
||||
MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */
|
||||
MB_LVL_MAX = 2 /* Number of MB level features supported */
|
||||
|
||||
} MB_LVL_FEATURES;
|
||||
|
||||
/* Segment Feature Masks */
|
||||
#define SEGMENT_ALTQ 0x01
|
||||
#define SEGMENT_ALT_LF 0x02
|
||||
#define SEGMENT_ALTQ 0x01
|
||||
#define SEGMENT_ALT_LF 0x02
|
||||
|
||||
#define VP8_YMODES (B_PRED + 1)
|
||||
#define VP8_YMODES (B_PRED + 1)
|
||||
#define VP8_UV_MODES (TM_PRED + 1)
|
||||
|
||||
#define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
|
||||
|
||||
typedef enum
|
||||
{
|
||||
B_DC_PRED, /* average of above and left pixels */
|
||||
B_TM_PRED,
|
||||
typedef enum {
|
||||
B_DC_PRED, /* average of above and left pixels */
|
||||
B_TM_PRED,
|
||||
|
||||
B_VE_PRED, /* vertical prediction */
|
||||
B_HE_PRED, /* horizontal prediction */
|
||||
B_VE_PRED, /* vertical prediction */
|
||||
B_HE_PRED, /* horizontal prediction */
|
||||
|
||||
B_LD_PRED,
|
||||
B_RD_PRED,
|
||||
B_LD_PRED,
|
||||
B_RD_PRED,
|
||||
|
||||
B_VR_PRED,
|
||||
B_VL_PRED,
|
||||
B_HD_PRED,
|
||||
B_HU_PRED,
|
||||
B_VR_PRED,
|
||||
B_VL_PRED,
|
||||
B_HD_PRED,
|
||||
B_HU_PRED,
|
||||
|
||||
LEFT4X4,
|
||||
ABOVE4X4,
|
||||
ZERO4X4,
|
||||
NEW4X4,
|
||||
LEFT4X4,
|
||||
ABOVE4X4,
|
||||
ZERO4X4,
|
||||
NEW4X4,
|
||||
|
||||
B_MODE_COUNT
|
||||
B_MODE_COUNT
|
||||
} B_PREDICTION_MODE;
|
||||
|
||||
#define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */
|
||||
#define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */
|
||||
#define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4)
|
||||
|
||||
/* For keyframes, intra block modes are predicted by the (already decoded)
|
||||
modes for the Y blocks to the left and above us; for interframes, there
|
||||
is a single probability table. */
|
||||
|
||||
union b_mode_info
|
||||
{
|
||||
B_PREDICTION_MODE as_mode;
|
||||
int_mv mv;
|
||||
union b_mode_info {
|
||||
B_PREDICTION_MODE as_mode;
|
||||
int_mv mv;
|
||||
};
|
||||
|
||||
typedef enum
|
||||
{
|
||||
INTRA_FRAME = 0,
|
||||
LAST_FRAME = 1,
|
||||
GOLDEN_FRAME = 2,
|
||||
ALTREF_FRAME = 3,
|
||||
MAX_REF_FRAMES = 4
|
||||
typedef enum {
|
||||
INTRA_FRAME = 0,
|
||||
LAST_FRAME = 1,
|
||||
GOLDEN_FRAME = 2,
|
||||
ALTREF_FRAME = 3,
|
||||
MAX_REF_FRAMES = 4
|
||||
} MV_REFERENCE_FRAME;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8_t mode, uv_mode;
|
||||
uint8_t ref_frame;
|
||||
uint8_t is_4x4;
|
||||
int_mv mv;
|
||||
typedef struct {
|
||||
uint8_t mode, uv_mode;
|
||||
uint8_t ref_frame;
|
||||
uint8_t is_4x4;
|
||||
int_mv mv;
|
||||
|
||||
uint8_t partitioning;
|
||||
uint8_t mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
|
||||
uint8_t need_to_clamp_mvs;
|
||||
uint8_t segment_id; /* Which set of segmentation parameters should be used for this MB */
|
||||
uint8_t partitioning;
|
||||
/* does this mb has coefficients at all, 1=no coefficients, 0=need decode
|
||||
tokens */
|
||||
uint8_t mb_skip_coeff;
|
||||
uint8_t need_to_clamp_mvs;
|
||||
/* Which set of segmentation parameters should be used for this MB */
|
||||
uint8_t segment_id;
|
||||
} MB_MODE_INFO;
|
||||
|
||||
typedef struct modeinfo
|
||||
{
|
||||
MB_MODE_INFO mbmi;
|
||||
union b_mode_info bmi[16];
|
||||
typedef struct modeinfo {
|
||||
MB_MODE_INFO mbmi;
|
||||
union b_mode_info bmi[16];
|
||||
} MODE_INFO;
|
||||
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
/* The mb-level information needed to be stored for higher-resolution encoder */
|
||||
typedef struct
|
||||
{
|
||||
MB_PREDICTION_MODE mode;
|
||||
MV_REFERENCE_FRAME ref_frame;
|
||||
int_mv mv;
|
||||
int dissim; /* dissimilarity level of the macroblock */
|
||||
typedef struct {
|
||||
MB_PREDICTION_MODE mode;
|
||||
MV_REFERENCE_FRAME ref_frame;
|
||||
int_mv mv;
|
||||
int dissim; /* dissimilarity level of the macroblock */
|
||||
} LOWER_RES_MB_INFO;
|
||||
|
||||
/* The frame-level information needed to be stored for higher-resolution
|
||||
* encoder */
|
||||
typedef struct
|
||||
{
|
||||
FRAME_TYPE frame_type;
|
||||
int is_frame_dropped;
|
||||
// The frame rate for the lowest resolution.
|
||||
double low_res_framerate;
|
||||
/* The frame number of each reference frames */
|
||||
unsigned int low_res_ref_frames[MAX_REF_FRAMES];
|
||||
// The video frame counter value for the key frame, for lowest resolution.
|
||||
unsigned int key_frame_counter_value;
|
||||
LOWER_RES_MB_INFO *mb_info;
|
||||
typedef struct {
|
||||
FRAME_TYPE frame_type;
|
||||
int is_frame_dropped;
|
||||
// The frame rate for the lowest resolution.
|
||||
double low_res_framerate;
|
||||
/* The frame number of each reference frames */
|
||||
unsigned int low_res_ref_frames[MAX_REF_FRAMES];
|
||||
// The video frame counter value for the key frame, for lowest resolution.
|
||||
unsigned int key_frame_counter_value;
|
||||
LOWER_RES_MB_INFO *mb_info;
|
||||
} LOWER_RES_FRAME_INFO;
|
||||
#endif
|
||||
|
||||
typedef struct blockd
|
||||
{
|
||||
short *qcoeff;
|
||||
short *dqcoeff;
|
||||
unsigned char *predictor;
|
||||
short *dequant;
|
||||
typedef struct blockd {
|
||||
short *qcoeff;
|
||||
short *dqcoeff;
|
||||
unsigned char *predictor;
|
||||
short *dequant;
|
||||
|
||||
int offset;
|
||||
char *eob;
|
||||
int offset;
|
||||
char *eob;
|
||||
|
||||
union b_mode_info bmi;
|
||||
union b_mode_info bmi;
|
||||
} BLOCKD;
|
||||
|
||||
typedef void (*vp8_subpix_fn_t)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
typedef void (*vp8_subpix_fn_t)(unsigned char *src, int src_pitch, int xofst,
|
||||
int yofst, unsigned char *dst, int dst_pitch);
|
||||
|
||||
typedef struct macroblockd
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
|
||||
DECLARE_ALIGNED(16, short, qcoeff[400]);
|
||||
DECLARE_ALIGNED(16, short, dqcoeff[400]);
|
||||
DECLARE_ALIGNED(16, char, eobs[25]);
|
||||
typedef struct macroblockd {
|
||||
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
|
||||
DECLARE_ALIGNED(16, short, qcoeff[400]);
|
||||
DECLARE_ALIGNED(16, short, dqcoeff[400]);
|
||||
DECLARE_ALIGNED(16, char, eobs[25]);
|
||||
|
||||
DECLARE_ALIGNED(16, short, dequant_y1[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_y1_dc[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_y2[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_uv[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_y1[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_y1_dc[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_y2[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_uv[16]);
|
||||
|
||||
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
|
||||
BLOCKD block[25];
|
||||
int fullpixel_mask;
|
||||
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
|
||||
BLOCKD block[25];
|
||||
int fullpixel_mask;
|
||||
|
||||
YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
|
||||
YV12_BUFFER_CONFIG dst;
|
||||
YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
|
||||
YV12_BUFFER_CONFIG dst;
|
||||
|
||||
MODE_INFO *mode_info_context;
|
||||
int mode_info_stride;
|
||||
MODE_INFO *mode_info_context;
|
||||
int mode_info_stride;
|
||||
|
||||
FRAME_TYPE frame_type;
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int up_available;
|
||||
int left_available;
|
||||
int up_available;
|
||||
int left_available;
|
||||
|
||||
unsigned char *recon_above[3];
|
||||
unsigned char *recon_left[3];
|
||||
int recon_left_stride[2];
|
||||
unsigned char *recon_above[3];
|
||||
unsigned char *recon_left[3];
|
||||
int recon_left_stride[2];
|
||||
|
||||
/* Y,U,V,Y2 */
|
||||
ENTROPY_CONTEXT_PLANES *above_context;
|
||||
ENTROPY_CONTEXT_PLANES *left_context;
|
||||
/* Y,U,V,Y2 */
|
||||
ENTROPY_CONTEXT_PLANES *above_context;
|
||||
ENTROPY_CONTEXT_PLANES *left_context;
|
||||
|
||||
/* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
|
||||
unsigned char segmentation_enabled;
|
||||
/* 0 indicates segmentation at MB level is not enabled. Otherwise the
|
||||
* individual bits indicate which features are active. */
|
||||
unsigned char segmentation_enabled;
|
||||
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation map. */
|
||||
unsigned char update_mb_segmentation_map;
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation map. */
|
||||
unsigned char update_mb_segmentation_map;
|
||||
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
|
||||
unsigned char update_mb_segmentation_data;
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
|
||||
unsigned char update_mb_segmentation_data;
|
||||
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
|
||||
unsigned char mb_segement_abs_delta;
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
|
||||
unsigned char mb_segement_abs_delta;
|
||||
|
||||
/* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
|
||||
/* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
|
||||
vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */
|
||||
/* Per frame flags that define which MB level features (such as quantizer or
|
||||
* loop filter level) */
|
||||
/* are enabled and when enabled the proabilities used to decode the per MB
|
||||
* flags in MB_MODE_INFO */
|
||||
/* Probability Tree used to code Segment number */
|
||||
vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];
|
||||
/* Segment parameters */
|
||||
signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];
|
||||
|
||||
signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */
|
||||
/* mode_based Loop filter adjustment */
|
||||
unsigned char mode_ref_lf_delta_enabled;
|
||||
unsigned char mode_ref_lf_delta_update;
|
||||
|
||||
/* mode_based Loop filter adjustment */
|
||||
unsigned char mode_ref_lf_delta_enabled;
|
||||
unsigned char mode_ref_lf_delta_update;
|
||||
/* Delta values have the range +/- MAX_LOOP_FILTER */
|
||||
signed char
|
||||
last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
|
||||
signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
|
||||
/* 0 = BPRED, ZERO_MV, MV, SPLIT */
|
||||
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
|
||||
signed char
|
||||
mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
|
||||
|
||||
/* Delta values have the range +/- MAX_LOOP_FILTER */
|
||||
signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
|
||||
signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
|
||||
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
|
||||
signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
|
||||
/* Distance of MB away from frame edges */
|
||||
int mb_to_left_edge;
|
||||
int mb_to_right_edge;
|
||||
int mb_to_top_edge;
|
||||
int mb_to_bottom_edge;
|
||||
|
||||
/* Distance of MB away from frame edges */
|
||||
int mb_to_left_edge;
|
||||
int mb_to_right_edge;
|
||||
int mb_to_top_edge;
|
||||
int mb_to_bottom_edge;
|
||||
vp8_subpix_fn_t subpixel_predict;
|
||||
vp8_subpix_fn_t subpixel_predict8x4;
|
||||
vp8_subpix_fn_t subpixel_predict8x8;
|
||||
vp8_subpix_fn_t subpixel_predict16x16;
|
||||
|
||||
void *current_bc;
|
||||
|
||||
|
||||
vp8_subpix_fn_t subpixel_predict;
|
||||
vp8_subpix_fn_t subpixel_predict8x4;
|
||||
vp8_subpix_fn_t subpixel_predict8x8;
|
||||
vp8_subpix_fn_t subpixel_predict16x16;
|
||||
|
||||
void *current_bc;
|
||||
|
||||
int corrupted;
|
||||
int corrupted;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
/* This is an intermediate buffer currently used in sub-pixel motion search
|
||||
* to keep a copy of the reference area. This buffer can be used for other
|
||||
* purpose.
|
||||
*/
|
||||
DECLARE_ALIGNED(32, unsigned char, y_buf[22*32]);
|
||||
/* This is an intermediate buffer currently used in sub-pixel motion search
|
||||
* to keep a copy of the reference area. This buffer can be used for other
|
||||
* purpose.
|
||||
*/
|
||||
DECLARE_ALIGNED(32, unsigned char, y_buf[22 * 32]);
|
||||
#endif
|
||||
} MACROBLOCKD;
|
||||
|
||||
|
||||
extern void vp8_build_block_doffsets(MACROBLOCKD *x);
|
||||
extern void vp8_setup_block_dptrs(MACROBLOCKD *x);
|
||||
|
||||
|
@ -18,177 +18,177 @@ extern "C" {
|
||||
/* Update probabilities for the nodes in the token entropy tree.
|
||||
Generated file included by entropy.c */
|
||||
|
||||
const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] =
|
||||
{
|
||||
{
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, },
|
||||
{234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
};
|
||||
const vp8_prob vp8_coef_update_probs
|
||||
[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES] = {
|
||||
{
|
||||
{
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 },
|
||||
{ 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 },
|
||||
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{ 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 },
|
||||
{ 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{ 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{ 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
{
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_COMMON_H_
|
||||
#define VP8_COMMON_COMMON_H_
|
||||
|
||||
@ -24,22 +23,23 @@ extern "C" {
|
||||
|
||||
/* Only need this for fixed-size arrays, for structs just assign. */
|
||||
|
||||
#define vp8_copy( Dest, Src) { \
|
||||
assert( sizeof( Dest) == sizeof( Src)); \
|
||||
memcpy( Dest, Src, sizeof( Src)); \
|
||||
}
|
||||
#define vp8_copy(Dest, Src) \
|
||||
{ \
|
||||
assert(sizeof(Dest) == sizeof(Src)); \
|
||||
memcpy(Dest, Src, sizeof(Src)); \
|
||||
}
|
||||
|
||||
/* Use this for variably-sized arrays. */
|
||||
|
||||
#define vp8_copy_array( Dest, Src, N) { \
|
||||
assert( sizeof( *Dest) == sizeof( *Src)); \
|
||||
memcpy( Dest, Src, N * sizeof( *Src)); \
|
||||
}
|
||||
#define vp8_copy_array(Dest, Src, N) \
|
||||
{ \
|
||||
assert(sizeof(*Dest) == sizeof(*Src)); \
|
||||
memcpy(Dest, Src, N * sizeof(*Src)); \
|
||||
}
|
||||
|
||||
#define vp8_zero( Dest) memset( &Dest, 0, sizeof( Dest));
|
||||
|
||||
#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *Dest));
|
||||
#define vp8_zero(Dest) memset(&Dest, 0, sizeof(Dest));
|
||||
|
||||
#define vp8_zero_array(Dest, N) memset(Dest, 0, N * sizeof(*Dest));
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "entropy.h"
|
||||
|
||||
/* *** GENERATED FILE: DO NOT EDIT *** */
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
@ -16,17 +15,13 @@
|
||||
|
||||
/* Copy 2 macroblocks to a buffer */
|
||||
void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride,
|
||||
unsigned char *dst_ptr, int dst_stride,
|
||||
int height)
|
||||
{
|
||||
int r;
|
||||
unsigned char *dst_ptr, int dst_stride, int height) {
|
||||
int r;
|
||||
|
||||
for (r = 0; r < height; r++)
|
||||
{
|
||||
memcpy(dst_ptr, src_ptr, 32);
|
||||
for (r = 0; r < height; r++) {
|
||||
memcpy(dst_ptr, src_ptr, 32);
|
||||
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
}
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
||||
|
@ -8,148 +8,128 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include "blockd.h"
|
||||
|
||||
void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols,
|
||||
int frame) {
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
int mb_index = 0;
|
||||
FILE *mvs = fopen("mvs.stt", "a");
|
||||
|
||||
void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int frame)
|
||||
{
|
||||
/* print out the macroblock Y modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "Mb Modes for Frame %d\n", frame);
|
||||
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
int mb_index = 0;
|
||||
FILE *mvs = fopen("mvs.stt", "a");
|
||||
for (mb_row = 0; mb_row < rows; mb_row++) {
|
||||
for (mb_col = 0; mb_col < cols; mb_col++) {
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode);
|
||||
|
||||
/* print out the macroblock Y modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "Mb Modes for Frame %d\n", frame);
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
mb_index++;
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "Mb mv ref for Frame %d\n", frame);
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "Mb mv ref for Frame %d\n", frame);
|
||||
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame);
|
||||
for (mb_row = 0; mb_row < rows; mb_row++) {
|
||||
for (mb_col = 0; mb_col < cols; mb_col++) {
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
mb_index++;
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
/* print out the macroblock UV modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "UV Modes for Frame %d\n", frame);
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
/* print out the macroblock UV modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "UV Modes for Frame %d\n", frame);
|
||||
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode);
|
||||
for (mb_row = 0; mb_row < rows; mb_row++) {
|
||||
for (mb_col = 0; mb_col < cols; mb_col++) {
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
mb_index++;
|
||||
fprintf(mvs, "\n");
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
mb_index++;
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
|
||||
/* print out the block modes */
|
||||
fprintf(mvs, "Mbs for Frame %d\n", frame);
|
||||
{
|
||||
int b_row;
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
for (b_row = 0; b_row < 4 * rows; b_row++)
|
||||
{
|
||||
int b_col;
|
||||
int bindex;
|
||||
/* print out the block modes */
|
||||
fprintf(mvs, "Mbs for Frame %d\n", frame);
|
||||
{
|
||||
int b_row;
|
||||
|
||||
for (b_col = 0; b_col < 4 * cols; b_col++)
|
||||
{
|
||||
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
|
||||
bindex = (b_row & 3) * 4 + (b_col & 3);
|
||||
for (b_row = 0; b_row < 4 * rows; b_row++) {
|
||||
int b_col;
|
||||
int bindex;
|
||||
|
||||
if (mi[mb_index].mbmi.mode == B_PRED)
|
||||
fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode);
|
||||
else
|
||||
fprintf(mvs, "xx ");
|
||||
for (b_col = 0; b_col < 4 * cols; b_col++) {
|
||||
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
|
||||
bindex = (b_row & 3) * 4 + (b_col & 3);
|
||||
|
||||
}
|
||||
if (mi[mb_index].mbmi.mode == B_PRED)
|
||||
fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode);
|
||||
else
|
||||
fprintf(mvs, "xx ");
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
/* print out the macroblock mvs */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "MVs for Frame %d\n", frame);
|
||||
/* print out the macroblock mvs */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "MVs for Frame %d\n", frame);
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv.as_mv.row / 2, mi[mb_index].mbmi.mv.as_mv.col / 2);
|
||||
for (mb_row = 0; mb_row < rows; mb_row++) {
|
||||
for (mb_col = 0; mb_col < cols; mb_col++) {
|
||||
fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv.as_mv.row / 2,
|
||||
mi[mb_index].mbmi.mv.as_mv.col / 2);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
mb_index++;
|
||||
fprintf(mvs, "\n");
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
mb_index++;
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
/* print out the block modes */
|
||||
fprintf(mvs, "MVs for Frame %d\n", frame);
|
||||
{
|
||||
int b_row;
|
||||
/* print out the block modes */
|
||||
fprintf(mvs, "MVs for Frame %d\n", frame);
|
||||
{
|
||||
int b_row;
|
||||
|
||||
for (b_row = 0; b_row < 4 * rows; b_row++)
|
||||
{
|
||||
int b_col;
|
||||
int bindex;
|
||||
for (b_row = 0; b_row < 4 * rows; b_row++) {
|
||||
int b_col;
|
||||
int bindex;
|
||||
|
||||
for (b_col = 0; b_col < 4 * cols; b_col++)
|
||||
{
|
||||
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
|
||||
bindex = (b_row & 3) * 4 + (b_col & 3);
|
||||
fprintf(mvs, "%3d:%-3d ", mi[mb_index].bmi[bindex].mv.as_mv.row, mi[mb_index].bmi[bindex].mv.as_mv.col);
|
||||
for (b_col = 0; b_col < 4 * cols; b_col++) {
|
||||
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
|
||||
bindex = (b_row & 3) * 4 + (b_col & 3);
|
||||
fprintf(mvs, "%3d:%-3d ", mi[mb_index].bmi[bindex].mv.as_mv.row,
|
||||
mi[mb_index].bmi[bindex].mv.as_mv.col);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
|
||||
fclose(mvs);
|
||||
fclose(mvs);
|
||||
}
|
||||
|
@ -17,181 +17,141 @@ extern "C" {
|
||||
|
||||
/*Generated file, included by entropy.c*/
|
||||
|
||||
|
||||
static const vp8_prob default_coef_probs [BLOCK_TYPES]
|
||||
[COEF_BANDS]
|
||||
[PREV_COEF_CONTEXTS]
|
||||
[ENTROPY_NODES] =
|
||||
{
|
||||
{ /* Block Type ( 0 ) */
|
||||
static const vp8_prob default_coef_probs
|
||||
[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES] = {
|
||||
{ /* Block Type ( 0 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
|
||||
{ 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
|
||||
{ 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
|
||||
{ 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
|
||||
{ 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
|
||||
{ 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
|
||||
{ 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
|
||||
{ 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
|
||||
{ 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
|
||||
{ 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
|
||||
{ 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
|
||||
{ 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
|
||||
{ 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
|
||||
{ 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
|
||||
{ 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
|
||||
{ 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
|
||||
{ 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
|
||||
{ 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
|
||||
{ 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
|
||||
{ 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
|
||||
{ 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
|
||||
{ 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
|
||||
{ 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
|
||||
{ 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
|
||||
{ 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
},
|
||||
{ /* Block Type ( 1 ) */
|
||||
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } } },
|
||||
{ /* Block Type ( 1 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
|
||||
{ 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
|
||||
{ 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
|
||||
},
|
||||
{ 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
|
||||
{ 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
|
||||
{ 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 } },
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
|
||||
{ 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
|
||||
{ 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
|
||||
},
|
||||
{ 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
|
||||
{ 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
|
||||
{ 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 } },
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
|
||||
{ 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
|
||||
{ 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
|
||||
},
|
||||
{ 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
|
||||
{ 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
|
||||
{ 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 } },
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
|
||||
{ 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
|
||||
{ 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
|
||||
{ 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
|
||||
{ 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
|
||||
{ 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
|
||||
{ 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
|
||||
{ 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
|
||||
{ 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
|
||||
{ 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
|
||||
{ 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
|
||||
{ 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
|
||||
{ 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
|
||||
{ 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
|
||||
{ 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
|
||||
{ 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
|
||||
{ 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
|
||||
{ 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
},
|
||||
{ /* Block Type ( 2 ) */
|
||||
{ 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
|
||||
{ 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 } } },
|
||||
{ /* Block Type ( 2 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
|
||||
{ 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
|
||||
{ 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
|
||||
},
|
||||
{ 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
|
||||
{ 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
|
||||
{ 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 } },
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
|
||||
{ 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
|
||||
{ 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
|
||||
{ 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
|
||||
{ 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
|
||||
{ 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
|
||||
{ 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
|
||||
{ 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
|
||||
{ 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
|
||||
{ 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
|
||||
{ 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
|
||||
{ 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
|
||||
{ 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
},
|
||||
{ /* Block Type ( 3 ) */
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } } },
|
||||
{ /* Block Type ( 3 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
|
||||
{ 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
|
||||
{ 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
|
||||
},
|
||||
{ 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
|
||||
{ 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
|
||||
{ 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 } },
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
|
||||
{ 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
|
||||
{ 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
|
||||
},
|
||||
{ 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
|
||||
{ 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
|
||||
{ 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 } },
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
|
||||
{ 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
|
||||
{ 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
|
||||
},
|
||||
{ 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
|
||||
{ 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
|
||||
{ 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 } },
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
|
||||
{ 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
|
||||
{ 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
|
||||
},
|
||||
{ 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
|
||||
{ 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
|
||||
{ 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 } },
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
|
||||
{ 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
|
||||
{ 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
|
||||
{ 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
|
||||
{ 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
|
||||
{ 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
|
||||
{ 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
|
||||
{ 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
|
||||
{ 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
|
||||
{ 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
|
||||
{ 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
|
||||
},
|
||||
{ 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
|
||||
{ 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
|
||||
{ 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 } },
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
}
|
||||
};
|
||||
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } } }
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
@ -8,36 +8,30 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
void vp8_dequantize_b_c(BLOCKD *d, short *DQC)
|
||||
{
|
||||
int i;
|
||||
short *DQ = d->dqcoeff;
|
||||
short *Q = d->qcoeff;
|
||||
void vp8_dequantize_b_c(BLOCKD *d, short *DQC) {
|
||||
int i;
|
||||
short *DQ = d->dqcoeff;
|
||||
short *Q = d->qcoeff;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
DQ[i] = Q[i] * DQC[i];
|
||||
}
|
||||
for (i = 0; i < 16; i++) {
|
||||
DQ[i] = Q[i] * DQC[i];
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_c(short *input, short *dq,
|
||||
unsigned char *dest, int stride)
|
||||
{
|
||||
int i;
|
||||
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *dest,
|
||||
int stride) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
input[i] = dq[i] * input[i];
|
||||
}
|
||||
for (i = 0; i < 16; i++) {
|
||||
input[i] = dq[i] * input[i];
|
||||
}
|
||||
|
||||
vp8_short_idct4x4llm_c(input, dest, stride, dest, stride);
|
||||
|
||||
memset(input, 0, 32);
|
||||
vp8_short_idct4x4llm_c(input, dest, stride, dest, stride);
|
||||
|
||||
memset(input, 0, 32);
|
||||
}
|
||||
|
@ -15,47 +15,34 @@
|
||||
|
||||
#include "coefupdateprobs.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) =
|
||||
{
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) = {
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]) =
|
||||
{ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7};
|
||||
DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]) = {
|
||||
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const unsigned char,
|
||||
vp8_prev_token_class[MAX_ENTROPY_TOKENS]) =
|
||||
{ 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0};
|
||||
|
||||
DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
|
||||
{
|
||||
0, 1, 4, 8,
|
||||
5, 2, 3, 6,
|
||||
9, 12, 13, 10,
|
||||
7, 11, 14, 15,
|
||||
vp8_prev_token_class[MAX_ENTROPY_TOKENS]) = {
|
||||
0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
|
||||
{
|
||||
1, 2, 6, 7,
|
||||
3, 5, 8, 13,
|
||||
4, 9, 12, 14,
|
||||
10, 11, 15, 16
|
||||
DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) = {
|
||||
0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) = {
|
||||
1, 2, 6, 7, 3, 5, 8, 13, 4, 9, 12, 14, 10, 11, 15, 16
|
||||
};
|
||||
|
||||
/* vp8_default_zig_zag_mask generated with:
|
||||
@ -71,87 +58,69 @@ DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
|
||||
|
||||
}
|
||||
*/
|
||||
DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]) =
|
||||
{
|
||||
1, 2, 32, 64,
|
||||
4, 16, 128, 4096,
|
||||
8, 256, 2048, 8192,
|
||||
512, 1024, 16384, -32768
|
||||
DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]) = {
|
||||
1, 2, 32, 64, 4, 16, 128, 4096, 8, 256, 2048, 8192, 512, 1024, 16384, -32768
|
||||
};
|
||||
|
||||
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
|
||||
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = { 7, 6 };
|
||||
|
||||
/* Array indices are identical to previously-existing CONTEXT_NODE indices */
|
||||
|
||||
const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
|
||||
{
|
||||
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
|
||||
-ZERO_TOKEN, 4, /* 1 = ZERO */
|
||||
-ONE_TOKEN, 6, /* 2 = ONE */
|
||||
8, 12, /* 3 = LOW_VAL */
|
||||
-TWO_TOKEN, 10, /* 4 = TWO */
|
||||
-THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */
|
||||
14, 16, /* 6 = HIGH_LOW */
|
||||
-DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */
|
||||
18, 20, /* 8 = CAT_THREEFOUR */
|
||||
-DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */
|
||||
-DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
|
||||
/* corresponding _CONTEXT_NODEs */
|
||||
/* clang-format off */
|
||||
const vp8_tree_index vp8_coef_tree[22] = {
|
||||
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
|
||||
-ZERO_TOKEN, 4, /* 1 = ZERO */
|
||||
-ONE_TOKEN, 6, /* 2 = ONE */
|
||||
8, 12, /* 3 = LOW_VAL */
|
||||
-TWO_TOKEN, 10, /* 4 = TWO */
|
||||
-THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */
|
||||
14, 16, /* 6 = HIGH_LOW */
|
||||
-DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */
|
||||
18, 20, /* 8 = CAT_THREEFOUR */
|
||||
-DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */
|
||||
-DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
|
||||
};
|
||||
/* clang-format on */
|
||||
|
||||
/* vp8_coef_encodings generated with:
|
||||
vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree);
|
||||
*/
|
||||
vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] =
|
||||
{
|
||||
{2, 2},
|
||||
{6, 3},
|
||||
{28, 5},
|
||||
{58, 6},
|
||||
{59, 6},
|
||||
{60, 6},
|
||||
{61, 6},
|
||||
{124, 7},
|
||||
{125, 7},
|
||||
{126, 7},
|
||||
{127, 7},
|
||||
{0, 1}
|
||||
vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] = {
|
||||
{ 2, 2 }, { 6, 3 }, { 28, 5 }, { 58, 6 }, { 59, 6 }, { 60, 6 },
|
||||
{ 61, 6 }, { 124, 7 }, { 125, 7 }, { 126, 7 }, { 127, 7 }, { 0, 1 }
|
||||
};
|
||||
|
||||
/* Trees for extra bits. Probabilities are constant and
|
||||
do not depend on previously encoded bits */
|
||||
|
||||
static const vp8_prob Pcat1[] = { 159};
|
||||
static const vp8_prob Pcat2[] = { 165, 145};
|
||||
static const vp8_prob Pcat3[] = { 173, 148, 140};
|
||||
static const vp8_prob Pcat4[] = { 176, 155, 140, 135};
|
||||
static const vp8_prob Pcat5[] = { 180, 157, 141, 134, 130};
|
||||
static const vp8_prob Pcat6[] =
|
||||
{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129};
|
||||
|
||||
static const vp8_prob Pcat1[] = { 159 };
|
||||
static const vp8_prob Pcat2[] = { 165, 145 };
|
||||
static const vp8_prob Pcat3[] = { 173, 148, 140 };
|
||||
static const vp8_prob Pcat4[] = { 176, 155, 140, 135 };
|
||||
static const vp8_prob Pcat5[] = { 180, 157, 141, 134, 130 };
|
||||
static const vp8_prob Pcat6[] = { 254, 254, 243, 230, 196, 177,
|
||||
153, 140, 133, 130, 129 };
|
||||
|
||||
/* tree index tables generated with:
|
||||
|
||||
void init_bit_tree(vp8_tree_index *p, int n)
|
||||
{
|
||||
int i = 0;
|
||||
void init_bit_tree(vp8_tree_index *p, int n) {
|
||||
int i = 0;
|
||||
|
||||
while (++i < n)
|
||||
{
|
||||
p[0] = p[1] = i << 1;
|
||||
p += 2;
|
||||
}
|
||||
while (++i < n) {
|
||||
p[0] = p[1] = i << 1;
|
||||
p += 2;
|
||||
}
|
||||
|
||||
p[0] = p[1] = 0;
|
||||
p[0] = p[1] = 0;
|
||||
}
|
||||
|
||||
void init_bit_trees()
|
||||
{
|
||||
init_bit_tree(cat1, 1);
|
||||
init_bit_tree(cat2, 2);
|
||||
init_bit_tree(cat3, 3);
|
||||
init_bit_tree(cat4, 4);
|
||||
init_bit_tree(cat5, 5);
|
||||
init_bit_tree(cat6, 11);
|
||||
void init_bit_trees() {
|
||||
init_bit_tree(cat1, 1);
|
||||
init_bit_tree(cat2, 2);
|
||||
init_bit_tree(cat3, 3);
|
||||
init_bit_tree(cat4, 4);
|
||||
init_bit_tree(cat5, 5);
|
||||
init_bit_tree(cat6, 11);
|
||||
}
|
||||
*/
|
||||
|
||||
@ -160,29 +129,19 @@ static const vp8_tree_index cat2[4] = { 2, 2, 0, 0 };
|
||||
static const vp8_tree_index cat3[6] = { 2, 2, 4, 4, 0, 0 };
|
||||
static const vp8_tree_index cat4[8] = { 2, 2, 4, 4, 6, 6, 0, 0 };
|
||||
static const vp8_tree_index cat5[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 };
|
||||
static const vp8_tree_index cat6[22] = { 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12,
|
||||
14, 14, 16, 16, 18, 18, 20, 20, 0, 0 };
|
||||
static const vp8_tree_index cat6[22] = {
|
||||
2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, 16, 16, 18, 18, 20, 20, 0, 0
|
||||
};
|
||||
|
||||
const vp8_extra_bit_struct vp8_extra_bits[12] =
|
||||
{
|
||||
{ 0, 0, 0, 0},
|
||||
{ 0, 0, 0, 1},
|
||||
{ 0, 0, 0, 2},
|
||||
{ 0, 0, 0, 3},
|
||||
{ 0, 0, 0, 4},
|
||||
{ cat1, Pcat1, 1, 5},
|
||||
{ cat2, Pcat2, 2, 7},
|
||||
{ cat3, Pcat3, 3, 11},
|
||||
{ cat4, Pcat4, 4, 19},
|
||||
{ cat5, Pcat5, 5, 35},
|
||||
{ cat6, Pcat6, 11, 67},
|
||||
{ 0, 0, 0, 0}
|
||||
const vp8_extra_bit_struct vp8_extra_bits[12] = {
|
||||
{ 0, 0, 0, 0 }, { 0, 0, 0, 1 }, { 0, 0, 0, 2 },
|
||||
{ 0, 0, 0, 3 }, { 0, 0, 0, 4 }, { cat1, Pcat1, 1, 5 },
|
||||
{ cat2, Pcat2, 2, 7 }, { cat3, Pcat3, 3, 11 }, { cat4, Pcat4, 4, 19 },
|
||||
{ cat5, Pcat5, 5, 35 }, { cat6, Pcat6, 11, 67 }, { 0, 0, 0, 0 }
|
||||
};
|
||||
|
||||
#include "default_coef_probs.h"
|
||||
|
||||
void vp8_default_coef_probs(VP8_COMMON *pc)
|
||||
{
|
||||
memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs));
|
||||
void vp8_default_coef_probs(VP8_COMMON *pc) {
|
||||
memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs));
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ENTROPY_H_
|
||||
#define VP8_COMMON_ENTROPY_H_
|
||||
|
||||
@ -21,18 +20,18 @@ extern "C" {
|
||||
|
||||
/* Coefficient token alphabet */
|
||||
|
||||
#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */
|
||||
#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */
|
||||
#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */
|
||||
#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */
|
||||
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
|
||||
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
|
||||
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
|
||||
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
|
||||
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
|
||||
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
|
||||
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
|
||||
#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */
|
||||
#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */
|
||||
#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */
|
||||
#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */
|
||||
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
|
||||
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
|
||||
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
|
||||
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
|
||||
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
|
||||
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
|
||||
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
|
||||
|
||||
#define MAX_ENTROPY_TOKENS 12
|
||||
#define ENTROPY_NODES 11
|
||||
@ -41,21 +40,20 @@ extern const vp8_tree_index vp8_coef_tree[];
|
||||
|
||||
extern const struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS];
|
||||
|
||||
typedef struct
|
||||
{
|
||||
vp8_tree_p tree;
|
||||
const vp8_prob *prob;
|
||||
int Len;
|
||||
int base_val;
|
||||
typedef struct {
|
||||
vp8_tree_p tree;
|
||||
const vp8_prob *prob;
|
||||
int Len;
|
||||
int base_val;
|
||||
} vp8_extra_bit_struct;
|
||||
|
||||
extern const vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */
|
||||
extern const vp8_extra_bit_struct
|
||||
vp8_extra_bits[12]; /* indexed by token value */
|
||||
|
||||
#define PROB_UPDATE_BASELINE_COST 7
|
||||
|
||||
#define MAX_PROB 255
|
||||
#define DCT_MAX_VALUE 2048
|
||||
#define PROB_UPDATE_BASELINE_COST 7
|
||||
|
||||
#define MAX_PROB 255
|
||||
#define DCT_MAX_VALUE 2048
|
||||
|
||||
/* Coefficients are predicted via a 3-dimensional probability table. */
|
||||
|
||||
@ -86,12 +84,13 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
|
||||
distinct bands). */
|
||||
|
||||
/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
|
||||
# define PREV_COEF_CONTEXTS 3
|
||||
#define PREV_COEF_CONTEXTS 3
|
||||
|
||||
extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]);
|
||||
|
||||
extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
|
||||
extern DECLARE_ALIGNED(16, const unsigned char,
|
||||
vp8_prev_token_class[MAX_ENTROPY_TOKENS]);
|
||||
|
||||
extern const vp8_prob vp8_coef_update_probs[BLOCK_TYPES][COEF_BANDS]
|
||||
[PREV_COEF_CONTEXTS][ENTROPY_NODES];
|
||||
|
||||
struct VP8Common;
|
||||
void vp8_default_coef_probs(struct VP8Common *);
|
||||
|
@ -16,156 +16,97 @@
|
||||
|
||||
#include "vp8_entropymodedata.h"
|
||||
|
||||
int vp8_mv_cont(const int_mv *l, const int_mv *a)
|
||||
{
|
||||
int lez = (l->as_int == 0);
|
||||
int aez = (a->as_int == 0);
|
||||
int lea = (l->as_int == a->as_int);
|
||||
int vp8_mv_cont(const int_mv *l, const int_mv *a) {
|
||||
int lez = (l->as_int == 0);
|
||||
int aez = (a->as_int == 0);
|
||||
int lea = (l->as_int == a->as_int);
|
||||
|
||||
if (lea && lez)
|
||||
return SUBMVREF_LEFT_ABOVE_ZED;
|
||||
if (lea && lez) return SUBMVREF_LEFT_ABOVE_ZED;
|
||||
|
||||
if (lea)
|
||||
return SUBMVREF_LEFT_ABOVE_SAME;
|
||||
if (lea) return SUBMVREF_LEFT_ABOVE_SAME;
|
||||
|
||||
if (aez)
|
||||
return SUBMVREF_ABOVE_ZED;
|
||||
if (aez) return SUBMVREF_ABOVE_ZED;
|
||||
|
||||
if (lez)
|
||||
return SUBMVREF_LEFT_ZED;
|
||||
if (lez) return SUBMVREF_LEFT_ZED;
|
||||
|
||||
return SUBMVREF_NORMAL;
|
||||
return SUBMVREF_NORMAL;
|
||||
}
|
||||
|
||||
static const vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1] = { 180, 162, 25};
|
||||
static const vp8_prob sub_mv_ref_prob[VP8_SUBMVREFS - 1] = { 180, 162, 25 };
|
||||
|
||||
const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1] =
|
||||
{
|
||||
{ 147, 136, 18 },
|
||||
{ 106, 145, 1 },
|
||||
{ 179, 121, 1 },
|
||||
{ 223, 1 , 34 },
|
||||
{ 208, 1 , 1 }
|
||||
const vp8_prob vp8_sub_mv_ref_prob2[SUBMVREF_COUNT][VP8_SUBMVREFS - 1] = {
|
||||
{ 147, 136, 18 },
|
||||
{ 106, 145, 1 },
|
||||
{ 179, 121, 1 },
|
||||
{ 223, 1, 34 },
|
||||
{ 208, 1, 1 }
|
||||
};
|
||||
|
||||
|
||||
|
||||
const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS] =
|
||||
{
|
||||
{
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
1, 1, 1, 1,
|
||||
1, 1, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
2, 2, 3, 3,
|
||||
2, 2, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 2, 3,
|
||||
4, 5, 6, 7,
|
||||
8, 9, 10, 11,
|
||||
12, 13, 14, 15,
|
||||
}
|
||||
const vp8_mbsplit vp8_mbsplits[VP8_NUMMBSPLITS] = {
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
}
|
||||
};
|
||||
|
||||
const int vp8_mbsplit_count [VP8_NUMMBSPLITS] = { 2, 2, 4, 16};
|
||||
|
||||
const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1] = { 110, 111, 150};
|
||||
const int vp8_mbsplit_count[VP8_NUMMBSPLITS] = { 2, 2, 4, 16 };
|
||||
|
||||
const vp8_prob vp8_mbsplit_probs[VP8_NUMMBSPLITS - 1] = { 110, 111, 150 };
|
||||
|
||||
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
|
||||
|
||||
const vp8_tree_index vp8_bmode_tree[18] = /* INTRAMODECONTEXTNODE value */
|
||||
{
|
||||
-B_DC_PRED, 2, /* 0 = DC_NODE */
|
||||
-B_TM_PRED, 4, /* 1 = TM_NODE */
|
||||
-B_VE_PRED, 6, /* 2 = VE_NODE */
|
||||
8, 12, /* 3 = COM_NODE */
|
||||
-B_HE_PRED, 10, /* 4 = HE_NODE */
|
||||
-B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */
|
||||
-B_LD_PRED, 14, /* 6 = LD_NODE */
|
||||
-B_VL_PRED, 16, /* 7 = VL_NODE */
|
||||
-B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */
|
||||
};
|
||||
const vp8_tree_index vp8_bmode_tree[18] = /* INTRAMODECONTEXTNODE value */
|
||||
{
|
||||
-B_DC_PRED, 2, /* 0 = DC_NODE */
|
||||
-B_TM_PRED, 4, /* 1 = TM_NODE */
|
||||
-B_VE_PRED, 6, /* 2 = VE_NODE */
|
||||
8, 12, /* 3 = COM_NODE */
|
||||
-B_HE_PRED, 10, /* 4 = HE_NODE */
|
||||
-B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */
|
||||
-B_LD_PRED, 14, /* 6 = LD_NODE */
|
||||
-B_VL_PRED, 16, /* 7 = VL_NODE */
|
||||
-B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */
|
||||
};
|
||||
|
||||
/* Again, these trees use the same probability indices as their
|
||||
explicitly-programmed predecessors. */
|
||||
|
||||
const vp8_tree_index vp8_ymode_tree[8] =
|
||||
{
|
||||
-DC_PRED, 2,
|
||||
4, 6,
|
||||
-V_PRED, -H_PRED,
|
||||
-TM_PRED, -B_PRED
|
||||
const vp8_tree_index vp8_ymode_tree[8] = {
|
||||
-DC_PRED, 2, 4, 6, -V_PRED, -H_PRED, -TM_PRED, -B_PRED
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_kf_ymode_tree[8] =
|
||||
{
|
||||
-B_PRED, 2,
|
||||
4, 6,
|
||||
-DC_PRED, -V_PRED,
|
||||
-H_PRED, -TM_PRED
|
||||
const vp8_tree_index vp8_kf_ymode_tree[8] = {
|
||||
-B_PRED, 2, 4, 6, -DC_PRED, -V_PRED, -H_PRED, -TM_PRED
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_uv_mode_tree[6] =
|
||||
{
|
||||
-DC_PRED, 2,
|
||||
-V_PRED, 4,
|
||||
-H_PRED, -TM_PRED
|
||||
};
|
||||
const vp8_tree_index vp8_uv_mode_tree[6] = { -DC_PRED, 2, -V_PRED,
|
||||
4, -H_PRED, -TM_PRED };
|
||||
|
||||
const vp8_tree_index vp8_mbsplit_tree[6] =
|
||||
{
|
||||
-3, 2,
|
||||
-2, 4,
|
||||
-0, -1
|
||||
};
|
||||
const vp8_tree_index vp8_mbsplit_tree[6] = { -3, 2, -2, 4, -0, -1 };
|
||||
|
||||
const vp8_tree_index vp8_mv_ref_tree[8] =
|
||||
{
|
||||
-ZEROMV, 2,
|
||||
-NEARESTMV, 4,
|
||||
-NEARMV, 6,
|
||||
-NEWMV, -SPLITMV
|
||||
};
|
||||
const vp8_tree_index vp8_mv_ref_tree[8] = { -ZEROMV, 2, -NEARESTMV, 4,
|
||||
-NEARMV, 6, -NEWMV, -SPLITMV };
|
||||
|
||||
const vp8_tree_index vp8_sub_mv_ref_tree[6] =
|
||||
{
|
||||
-LEFT4X4, 2,
|
||||
-ABOVE4X4, 4,
|
||||
-ZERO4X4, -NEW4X4
|
||||
};
|
||||
const vp8_tree_index vp8_sub_mv_ref_tree[6] = { -LEFT4X4, 2, -ABOVE4X4,
|
||||
4, -ZERO4X4, -NEW4X4 };
|
||||
|
||||
const vp8_tree_index vp8_small_mvtree [14] =
|
||||
{
|
||||
2, 8,
|
||||
4, 6,
|
||||
-0, -1,
|
||||
-2, -3,
|
||||
10, 12,
|
||||
-4, -5,
|
||||
-6, -7
|
||||
};
|
||||
const vp8_tree_index vp8_small_mvtree[14] = { 2, 8, 4, 6, -0, -1, -2,
|
||||
-3, 10, 12, -4, -5, -6, -7 };
|
||||
|
||||
void vp8_init_mbmode_probs(VP8_COMMON *x)
|
||||
{
|
||||
memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
|
||||
memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
|
||||
memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
|
||||
void vp8_init_mbmode_probs(VP8_COMMON *x) {
|
||||
memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
|
||||
memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
|
||||
memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
|
||||
}
|
||||
|
||||
void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1])
|
||||
{
|
||||
memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
|
||||
void vp8_default_bmode_probs(vp8_prob p[VP8_BINTRAMODES - 1]) {
|
||||
memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ENTROPYMODE_H_
|
||||
#define VP8_COMMON_ENTROPYMODE_H_
|
||||
|
||||
@ -19,42 +18,40 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SUBMVREF_NORMAL,
|
||||
SUBMVREF_LEFT_ZED,
|
||||
SUBMVREF_ABOVE_ZED,
|
||||
SUBMVREF_LEFT_ABOVE_SAME,
|
||||
SUBMVREF_LEFT_ABOVE_ZED
|
||||
typedef enum {
|
||||
SUBMVREF_NORMAL,
|
||||
SUBMVREF_LEFT_ZED,
|
||||
SUBMVREF_ABOVE_ZED,
|
||||
SUBMVREF_LEFT_ABOVE_SAME,
|
||||
SUBMVREF_LEFT_ABOVE_ZED
|
||||
} sumvfref_t;
|
||||
|
||||
typedef int vp8_mbsplit[16];
|
||||
|
||||
#define VP8_NUMMBSPLITS 4
|
||||
|
||||
extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS];
|
||||
extern const vp8_mbsplit vp8_mbsplits[VP8_NUMMBSPLITS];
|
||||
|
||||
extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */
|
||||
extern const int vp8_mbsplit_count[VP8_NUMMBSPLITS]; /* # of subsets */
|
||||
|
||||
extern const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1];
|
||||
extern const vp8_prob vp8_mbsplit_probs[VP8_NUMMBSPLITS - 1];
|
||||
|
||||
extern int vp8_mv_cont(const int_mv *l, const int_mv *a);
|
||||
#define SUBMVREF_COUNT 5
|
||||
extern const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1];
|
||||
|
||||
|
||||
extern const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES];
|
||||
extern const vp8_prob vp8_sub_mv_ref_prob2[SUBMVREF_COUNT][VP8_SUBMVREFS - 1];
|
||||
|
||||
extern const unsigned int vp8_kf_default_bmode_counts
|
||||
[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES];
|
||||
|
||||
extern const vp8_tree_index vp8_bmode_tree[];
|
||||
|
||||
extern const vp8_tree_index vp8_ymode_tree[];
|
||||
extern const vp8_tree_index vp8_kf_ymode_tree[];
|
||||
extern const vp8_tree_index vp8_uv_mode_tree[];
|
||||
extern const vp8_tree_index vp8_ymode_tree[];
|
||||
extern const vp8_tree_index vp8_kf_ymode_tree[];
|
||||
extern const vp8_tree_index vp8_uv_mode_tree[];
|
||||
|
||||
extern const vp8_tree_index vp8_mbsplit_tree[];
|
||||
extern const vp8_tree_index vp8_mv_ref_tree[];
|
||||
extern const vp8_tree_index vp8_sub_mv_ref_tree[];
|
||||
extern const vp8_tree_index vp8_mbsplit_tree[];
|
||||
extern const vp8_tree_index vp8_mv_ref_tree[];
|
||||
extern const vp8_tree_index vp8_sub_mv_ref_tree[];
|
||||
|
||||
extern const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES];
|
||||
extern const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES];
|
||||
@ -65,21 +62,23 @@ extern const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS];
|
||||
/* Inter mode values do not start at zero */
|
||||
|
||||
extern const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS];
|
||||
extern const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS];
|
||||
extern const struct vp8_token_struct
|
||||
vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS];
|
||||
|
||||
extern const vp8_tree_index vp8_small_mvtree[];
|
||||
|
||||
extern const struct vp8_token_struct vp8_small_mvencodings[8];
|
||||
|
||||
/* Key frame default mode probs */
|
||||
extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES]
|
||||
[VP8_BINTRAMODES-1];
|
||||
extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1];
|
||||
extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1];
|
||||
extern const vp8_prob
|
||||
vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES - 1];
|
||||
extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES - 1];
|
||||
extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES - 1];
|
||||
|
||||
void vp8_init_mbmode_probs(VP8_COMMON *x);
|
||||
void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]);
|
||||
void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]);
|
||||
void vp8_default_bmode_probs(vp8_prob dest[VP8_BINTRAMODES - 1]);
|
||||
void vp8_kf_default_bmode_probs(
|
||||
vp8_prob dest[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES - 1]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
@ -8,42 +8,40 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "entropymv.h"
|
||||
|
||||
const MV_CONTEXT vp8_mv_update_probs[2] =
|
||||
{
|
||||
{{
|
||||
237,
|
||||
246,
|
||||
253, 253, 254, 254, 254, 254, 254,
|
||||
254, 254, 254, 254, 254, 250, 250, 252, 254, 254
|
||||
}},
|
||||
{{
|
||||
231,
|
||||
243,
|
||||
245, 253, 254, 254, 254, 254, 254,
|
||||
254, 254, 254, 254, 254, 251, 251, 254, 254, 254
|
||||
}}
|
||||
/* clang-format off */
|
||||
const MV_CONTEXT vp8_mv_update_probs[2] = {
|
||||
{ {
|
||||
237,
|
||||
246,
|
||||
253, 253, 254, 254, 254, 254, 254,
|
||||
254, 254, 254, 254, 254, 250, 250, 252, 254, 254
|
||||
} },
|
||||
{ {
|
||||
231,
|
||||
243,
|
||||
245, 253, 254, 254, 254, 254, 254,
|
||||
254, 254, 254, 254, 254, 251, 251, 254, 254, 254
|
||||
} }
|
||||
};
|
||||
const MV_CONTEXT vp8_default_mv_context[2] =
|
||||
{
|
||||
{{
|
||||
/* row */
|
||||
162, /* is short */
|
||||
128, /* sign */
|
||||
225, 146, 172, 147, 214, 39, 156, /* short tree */
|
||||
128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */
|
||||
}},
|
||||
/* clang-format on */
|
||||
|
||||
const MV_CONTEXT vp8_default_mv_context[2] = {
|
||||
{ {
|
||||
/* row */
|
||||
162, /* is short */
|
||||
128, /* sign */
|
||||
225, 146, 172, 147, 214, 39, 156, /* short tree */
|
||||
128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */
|
||||
} },
|
||||
|
||||
{ {
|
||||
/* same for column */
|
||||
164, /* is short */
|
||||
128, /**/
|
||||
204, 170, 119, 235, 140, 230, 228, /**/
|
||||
128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */
|
||||
|
||||
{{
|
||||
/* same for column */
|
||||
164, /* is short */
|
||||
128,
|
||||
204, 170, 119, 235, 140, 230, 228,
|
||||
128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */
|
||||
|
||||
}}
|
||||
} }
|
||||
};
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ENTROPYMV_H_
|
||||
#define VP8_COMMON_ENTROPYMV_H_
|
||||
|
||||
@ -18,29 +17,27 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum
|
||||
{
|
||||
mv_max = 1023, /* max absolute value of a MV component */
|
||||
MVvals = (2 * mv_max) + 1, /* # possible values "" */
|
||||
mvfp_max = 255, /* max absolute value of a full pixel MV component */
|
||||
MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */
|
||||
enum {
|
||||
mv_max = 1023, /* max absolute value of a MV component */
|
||||
MVvals = (2 * mv_max) + 1, /* # possible values "" */
|
||||
mvfp_max = 255, /* max absolute value of a full pixel MV component */
|
||||
MVfpvals = (2 * mvfp_max) + 1, /* # possible full pixel MV values */
|
||||
|
||||
mvlong_width = 10, /* Large MVs have 9 bit magnitudes */
|
||||
mvnum_short = 8, /* magnitudes 0 through 7 */
|
||||
mvlong_width = 10, /* Large MVs have 9 bit magnitudes */
|
||||
mvnum_short = 8, /* magnitudes 0 through 7 */
|
||||
|
||||
/* probability offsets for coding each MV component */
|
||||
/* probability offsets for coding each MV component */
|
||||
|
||||
mvpis_short = 0, /* short (<= 7) vs long (>= 8) */
|
||||
MVPsign, /* sign for non-zero */
|
||||
MVPshort, /* 8 short values = 7-position tree */
|
||||
mvpis_short = 0, /* short (<= 7) vs long (>= 8) */
|
||||
MVPsign, /* sign for non-zero */
|
||||
MVPshort, /* 8 short values = 7-position tree */
|
||||
|
||||
MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */
|
||||
MVPcount = MVPbits + mvlong_width /* (with independent probabilities) */
|
||||
MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */
|
||||
MVPcount = MVPbits + mvlong_width /* (with independent probabilities) */
|
||||
};
|
||||
|
||||
typedef struct mv_context
|
||||
{
|
||||
vp8_prob prob[MVPcount]; /* often come in row, col pairs */
|
||||
typedef struct mv_context {
|
||||
vp8_prob prob[MVPcount]; /* often come in row, col pairs */
|
||||
} MV_CONTEXT;
|
||||
|
||||
extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2];
|
||||
|
@ -8,181 +8,146 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "extend.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
static void copy_and_extend_plane(unsigned char *s, /* source */
|
||||
int sp, /* source pitch */
|
||||
unsigned char *d, /* destination */
|
||||
int dp, /* destination pitch */
|
||||
int h, /* height */
|
||||
int w, /* width */
|
||||
int et, /* extend top border */
|
||||
int el, /* extend left border */
|
||||
int eb, /* extend bottom border */
|
||||
int er /* extend right border */
|
||||
) {
|
||||
int i;
|
||||
unsigned char *src_ptr1, *src_ptr2;
|
||||
unsigned char *dest_ptr1, *dest_ptr2;
|
||||
int linesize;
|
||||
|
||||
static void copy_and_extend_plane
|
||||
(
|
||||
unsigned char *s, /* source */
|
||||
int sp, /* source pitch */
|
||||
unsigned char *d, /* destination */
|
||||
int dp, /* destination pitch */
|
||||
int h, /* height */
|
||||
int w, /* width */
|
||||
int et, /* extend top border */
|
||||
int el, /* extend left border */
|
||||
int eb, /* extend bottom border */
|
||||
int er /* extend right border */
|
||||
)
|
||||
{
|
||||
int i;
|
||||
unsigned char *src_ptr1, *src_ptr2;
|
||||
unsigned char *dest_ptr1, *dest_ptr2;
|
||||
int linesize;
|
||||
/* copy the left and right most columns out */
|
||||
src_ptr1 = s;
|
||||
src_ptr2 = s + w - 1;
|
||||
dest_ptr1 = d - el;
|
||||
dest_ptr2 = d + w;
|
||||
|
||||
/* copy the left and right most columns out */
|
||||
src_ptr1 = s;
|
||||
src_ptr2 = s + w - 1;
|
||||
dest_ptr1 = d - el;
|
||||
dest_ptr2 = d + w;
|
||||
for (i = 0; i < h; i++) {
|
||||
memset(dest_ptr1, src_ptr1[0], el);
|
||||
memcpy(dest_ptr1 + el, src_ptr1, w);
|
||||
memset(dest_ptr2, src_ptr2[0], er);
|
||||
src_ptr1 += sp;
|
||||
src_ptr2 += sp;
|
||||
dest_ptr1 += dp;
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
|
||||
for (i = 0; i < h; i++)
|
||||
{
|
||||
memset(dest_ptr1, src_ptr1[0], el);
|
||||
memcpy(dest_ptr1 + el, src_ptr1, w);
|
||||
memset(dest_ptr2, src_ptr2[0], er);
|
||||
src_ptr1 += sp;
|
||||
src_ptr2 += sp;
|
||||
dest_ptr1 += dp;
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
/* Now copy the top and bottom lines into each line of the respective
|
||||
* borders
|
||||
*/
|
||||
src_ptr1 = d - el;
|
||||
src_ptr2 = d + dp * (h - 1) - el;
|
||||
dest_ptr1 = d + dp * (-et) - el;
|
||||
dest_ptr2 = d + dp * (h)-el;
|
||||
linesize = el + er + w;
|
||||
|
||||
/* Now copy the top and bottom lines into each line of the respective
|
||||
* borders
|
||||
*/
|
||||
src_ptr1 = d - el;
|
||||
src_ptr2 = d + dp * (h - 1) - el;
|
||||
dest_ptr1 = d + dp * (-et) - el;
|
||||
dest_ptr2 = d + dp * (h) - el;
|
||||
linesize = el + er + w;
|
||||
for (i = 0; i < et; i++) {
|
||||
memcpy(dest_ptr1, src_ptr1, linesize);
|
||||
dest_ptr1 += dp;
|
||||
}
|
||||
|
||||
for (i = 0; i < et; i++)
|
||||
{
|
||||
memcpy(dest_ptr1, src_ptr1, linesize);
|
||||
dest_ptr1 += dp;
|
||||
}
|
||||
|
||||
for (i = 0; i < eb; i++)
|
||||
{
|
||||
memcpy(dest_ptr2, src_ptr2, linesize);
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
for (i = 0; i < eb; i++) {
|
||||
memcpy(dest_ptr2, src_ptr2, linesize);
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst)
|
||||
{
|
||||
int et = dst->border;
|
||||
int el = dst->border;
|
||||
int eb = dst->border + dst->y_height - src->y_height;
|
||||
int er = dst->border + dst->y_width - src->y_width;
|
||||
YV12_BUFFER_CONFIG *dst) {
|
||||
int et = dst->border;
|
||||
int el = dst->border;
|
||||
int eb = dst->border + dst->y_height - src->y_height;
|
||||
int er = dst->border + dst->y_width - src->y_width;
|
||||
|
||||
copy_and_extend_plane(src->y_buffer, src->y_stride,
|
||||
dst->y_buffer, dst->y_stride,
|
||||
src->y_height, src->y_width,
|
||||
et, el, eb, er);
|
||||
copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer,
|
||||
dst->y_stride, src->y_height, src->y_width, et, el, eb,
|
||||
er);
|
||||
|
||||
et = dst->border >> 1;
|
||||
el = dst->border >> 1;
|
||||
eb = (dst->border >> 1) + dst->uv_height - src->uv_height;
|
||||
er = (dst->border >> 1) + dst->uv_width - src->uv_width;
|
||||
et = dst->border >> 1;
|
||||
el = dst->border >> 1;
|
||||
eb = (dst->border >> 1) + dst->uv_height - src->uv_height;
|
||||
er = (dst->border >> 1) + dst->uv_width - src->uv_width;
|
||||
|
||||
copy_and_extend_plane(src->u_buffer, src->uv_stride,
|
||||
dst->u_buffer, dst->uv_stride,
|
||||
src->uv_height, src->uv_width,
|
||||
et, el, eb, er);
|
||||
copy_and_extend_plane(src->u_buffer, src->uv_stride, dst->u_buffer,
|
||||
dst->uv_stride, src->uv_height, src->uv_width, et, el,
|
||||
eb, er);
|
||||
|
||||
copy_and_extend_plane(src->v_buffer, src->uv_stride,
|
||||
dst->v_buffer, dst->uv_stride,
|
||||
src->uv_height, src->uv_width,
|
||||
et, el, eb, er);
|
||||
copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer,
|
||||
dst->uv_stride, src->uv_height, src->uv_width, et, el,
|
||||
eb, er);
|
||||
}
|
||||
|
||||
|
||||
void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst,
|
||||
int srcy, int srcx,
|
||||
int srch, int srcw)
|
||||
{
|
||||
int et = dst->border;
|
||||
int el = dst->border;
|
||||
int eb = dst->border + dst->y_height - src->y_height;
|
||||
int er = dst->border + dst->y_width - src->y_width;
|
||||
int src_y_offset = srcy * src->y_stride + srcx;
|
||||
int dst_y_offset = srcy * dst->y_stride + srcx;
|
||||
int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
|
||||
int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
|
||||
YV12_BUFFER_CONFIG *dst, int srcy,
|
||||
int srcx, int srch, int srcw) {
|
||||
int et = dst->border;
|
||||
int el = dst->border;
|
||||
int eb = dst->border + dst->y_height - src->y_height;
|
||||
int er = dst->border + dst->y_width - src->y_width;
|
||||
int src_y_offset = srcy * src->y_stride + srcx;
|
||||
int dst_y_offset = srcy * dst->y_stride + srcx;
|
||||
int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
|
||||
int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
|
||||
|
||||
/* If the side is not touching the bounder then don't extend. */
|
||||
if (srcy)
|
||||
et = 0;
|
||||
if (srcx)
|
||||
el = 0;
|
||||
if (srcy + srch != src->y_height)
|
||||
eb = 0;
|
||||
if (srcx + srcw != src->y_width)
|
||||
er = 0;
|
||||
/* If the side is not touching the bounder then don't extend. */
|
||||
if (srcy) et = 0;
|
||||
if (srcx) el = 0;
|
||||
if (srcy + srch != src->y_height) eb = 0;
|
||||
if (srcx + srcw != src->y_width) er = 0;
|
||||
|
||||
copy_and_extend_plane(src->y_buffer + src_y_offset,
|
||||
src->y_stride,
|
||||
dst->y_buffer + dst_y_offset,
|
||||
dst->y_stride,
|
||||
srch, srcw,
|
||||
et, el, eb, er);
|
||||
copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride,
|
||||
dst->y_buffer + dst_y_offset, dst->y_stride, srch, srcw,
|
||||
et, el, eb, er);
|
||||
|
||||
et = (et + 1) >> 1;
|
||||
el = (el + 1) >> 1;
|
||||
eb = (eb + 1) >> 1;
|
||||
er = (er + 1) >> 1;
|
||||
srch = (srch + 1) >> 1;
|
||||
srcw = (srcw + 1) >> 1;
|
||||
et = (et + 1) >> 1;
|
||||
el = (el + 1) >> 1;
|
||||
eb = (eb + 1) >> 1;
|
||||
er = (er + 1) >> 1;
|
||||
srch = (srch + 1) >> 1;
|
||||
srcw = (srcw + 1) >> 1;
|
||||
|
||||
copy_and_extend_plane(src->u_buffer + src_uv_offset,
|
||||
src->uv_stride,
|
||||
dst->u_buffer + dst_uv_offset,
|
||||
dst->uv_stride,
|
||||
srch, srcw,
|
||||
et, el, eb, er);
|
||||
copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride,
|
||||
dst->u_buffer + dst_uv_offset, dst->uv_stride, srch,
|
||||
srcw, et, el, eb, er);
|
||||
|
||||
copy_and_extend_plane(src->v_buffer + src_uv_offset,
|
||||
src->uv_stride,
|
||||
dst->v_buffer + dst_uv_offset,
|
||||
dst->uv_stride,
|
||||
srch, srcw,
|
||||
et, el, eb, er);
|
||||
copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride,
|
||||
dst->v_buffer + dst_uv_offset, dst->uv_stride, srch,
|
||||
srcw, et, el, eb, er);
|
||||
}
|
||||
|
||||
|
||||
/* note the extension is only for the last row, for intra prediction purpose */
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf,
|
||||
unsigned char *YPtr,
|
||||
unsigned char *UPtr,
|
||||
unsigned char *VPtr)
|
||||
{
|
||||
int i;
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr,
|
||||
unsigned char *UPtr, unsigned char *VPtr) {
|
||||
int i;
|
||||
|
||||
YPtr += ybf->y_stride * 14;
|
||||
UPtr += ybf->uv_stride * 6;
|
||||
VPtr += ybf->uv_stride * 6;
|
||||
YPtr += ybf->y_stride * 14;
|
||||
UPtr += ybf->uv_stride * 6;
|
||||
VPtr += ybf->uv_stride * 6;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
YPtr[i] = YPtr[-1];
|
||||
UPtr[i] = UPtr[-1];
|
||||
VPtr[i] = VPtr[-1];
|
||||
}
|
||||
for (i = 0; i < 4; i++) {
|
||||
YPtr[i] = YPtr[-1];
|
||||
UPtr[i] = UPtr[-1];
|
||||
VPtr[i] = VPtr[-1];
|
||||
}
|
||||
|
||||
YPtr += ybf->y_stride;
|
||||
UPtr += ybf->uv_stride;
|
||||
VPtr += ybf->uv_stride;
|
||||
YPtr += ybf->y_stride;
|
||||
UPtr += ybf->uv_stride;
|
||||
VPtr += ybf->uv_stride;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
YPtr[i] = YPtr[-1];
|
||||
UPtr[i] = UPtr[-1];
|
||||
VPtr[i] = VPtr[-1];
|
||||
}
|
||||
for (i = 0; i < 4; i++) {
|
||||
YPtr[i] = YPtr[-1];
|
||||
UPtr[i] = UPtr[-1];
|
||||
VPtr[i] = VPtr[-1];
|
||||
}
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_EXTEND_H_
|
||||
#define VP8_COMMON_EXTEND_H_
|
||||
|
||||
@ -18,13 +17,13 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr);
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr,
|
||||
unsigned char *UPtr, unsigned char *VPtr);
|
||||
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst);
|
||||
void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst,
|
||||
int srcy, int srcx,
|
||||
int srch, int srcw);
|
||||
YV12_BUFFER_CONFIG *dst, int srcy,
|
||||
int srcx, int srch, int srcw);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
@ -8,243 +8,186 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "filter.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
|
||||
{
|
||||
{ 128, 0 },
|
||||
{ 112, 16 },
|
||||
{ 96, 32 },
|
||||
{ 80, 48 },
|
||||
{ 64, 64 },
|
||||
{ 48, 80 },
|
||||
{ 32, 96 },
|
||||
{ 16, 112 }
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = {
|
||||
{ 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
|
||||
{ 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
|
||||
{
|
||||
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = {
|
||||
|
||||
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
{ 0, -6, 123, 12, -1, 0 },
|
||||
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -9, 93, 50, -6, 0 },
|
||||
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
|
||||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
{ 0, 0, 128, 0, 0,
|
||||
0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
{ 0, -6, 123, 12, -1, 0 },
|
||||
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -9, 93, 50, -6, 0 },
|
||||
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
|
||||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
};
|
||||
|
||||
static void filter_block2d_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
static void filter_block2d_first_pass(unsigned char *src_ptr, int *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter) {
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
|
||||
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[0] * vp8_filter[2]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
|
||||
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
for (i = 0; i < output_height; i++) {
|
||||
for (j = 0; j < output_width; j++) {
|
||||
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
|
||||
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[0] * vp8_filter[2]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[2 * pixel_step] * vp8_filter[4]) +
|
||||
((int)src_ptr[3 * pixel_step] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if (Temp > 255)
|
||||
Temp = 255;
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if (Temp > 255)
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[j] = Temp;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
output_ptr[j] = Temp;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
}
|
||||
|
||||
static void filter_block2d_second_pass
|
||||
(
|
||||
int *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
static void filter_block2d_second_pass(int *src_ptr, unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter) {
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
|
||||
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[0] * vp8_filter[2]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
|
||||
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
for (i = 0; i < output_height; i++) {
|
||||
for (j = 0; j < output_width; j++) {
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
|
||||
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[0] * vp8_filter[2]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[2 * pixel_step] * vp8_filter[4]) +
|
||||
((int)src_ptr[3 * pixel_step] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if (Temp > 255)
|
||||
Temp = 255;
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if (Temp > 255)
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[j] = (unsigned char)Temp;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Start next row */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_pitch;
|
||||
output_ptr[j] = (unsigned char)Temp;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Start next row */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
static void filter_block2d(unsigned char *src_ptr, unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line, int output_pitch,
|
||||
const short *HFilter, const short *VFilter) {
|
||||
int FData[9 * 4]; /* Temp data buffer used in filtering */
|
||||
|
||||
static void filter_block2d
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
int output_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter
|
||||
)
|
||||
{
|
||||
int FData[9*4]; /* Temp data buffer used in filtering */
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData,
|
||||
src_pixels_per_line, 1, 9, 4, HFilter);
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4,
|
||||
VFilter);
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict4x4_c(unsigned char *src_ptr, int src_pixels_per_line,
|
||||
int xoffset, int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
void vp8_sixtap_predict4x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
|
||||
filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
|
||||
VFilter);
|
||||
}
|
||||
void vp8_sixtap_predict8x8_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
void vp8_sixtap_predict8x8_c(unsigned char *src_ptr, int src_pixels_per_line,
|
||||
int xoffset, int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13 * 16]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData,
|
||||
src_pixels_per_line, 1, 13, 8, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8,
|
||||
VFilter);
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
void vp8_sixtap_predict8x4_c(unsigned char *src_ptr, int src_pixels_per_line,
|
||||
int xoffset, int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13 * 16]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData,
|
||||
src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8,
|
||||
VFilter);
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict16x16_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[21*24]; /* Temp data buffer used in filtering */
|
||||
void vp8_sixtap_predict16x16_c(unsigned char *src_ptr, int src_pixels_per_line,
|
||||
int xoffset, int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[21 * 24]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData,
|
||||
src_pixels_per_line, 1, 21, 16, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16,
|
||||
VFilter);
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_first_pass
|
||||
@ -267,33 +210,25 @@ void vp8_sixtap_predict16x16_c
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
unsigned int src_stride,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
static void filter_block2d_bil_first_pass(
|
||||
unsigned char *src_ptr, unsigned short *dst_ptr, unsigned int src_stride,
|
||||
unsigned int height, unsigned int width, const short *vp8_filter) {
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
/* Apply bilinear filter */
|
||||
dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[1] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_stride - width;
|
||||
dst_ptr += width;
|
||||
for (i = 0; i < height; i++) {
|
||||
for (j = 0; j < width; j++) {
|
||||
/* Apply bilinear filter */
|
||||
dst_ptr[j] =
|
||||
(((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[1] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2)) >>
|
||||
VP8_FILTER_SHIFT;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_stride - width;
|
||||
dst_ptr += width;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
@ -312,42 +247,35 @@ static void filter_block2d_bil_first_pass
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
|
||||
* in the vertical direction to produce the filtered output
|
||||
* block. Used to implement second-pass of 2-D separable filter.
|
||||
* block. Used to implement second-pass of 2-D separable
|
||||
* filter.
|
||||
*
|
||||
* SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
|
||||
* SPECIAL NOTES : Requires 32-bit input as produced by
|
||||
* filter_block2d_bil_first_pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil_second_pass
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
static void filter_block2d_bil_second_pass(unsigned short *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch, unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter) {
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[width] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2);
|
||||
dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
dst_ptr += dst_pitch;
|
||||
for (i = 0; i < height; i++) {
|
||||
for (j = 0; j < width; j++) {
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[width] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2);
|
||||
dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
src_ptr++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
dst_ptr += dst_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
@ -356,7 +284,8 @@ static void filter_block2d_bil_second_pass
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pitch : Stride of source block.
|
||||
* UINT32 dst_pitch : Stride of destination block.
|
||||
* INT32 *HFilter : Array of 2 horizontal filter taps.
|
||||
* INT32 *HFilter : Array of 2 horizontal filter
|
||||
* taps.
|
||||
* INT32 *VFilter : Array of 2 vertical filter taps.
|
||||
* INT32 Width : Block width
|
||||
* INT32 Height : Block height
|
||||
@ -372,44 +301,29 @@ static void filter_block2d_bil_second_pass
|
||||
* SPECIAL NOTES : The largest block size can be handled here is 16x16
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
static void filter_block2d_bil(unsigned char *src_ptr, unsigned char *dst_ptr,
|
||||
unsigned int src_pitch, unsigned int dst_pitch,
|
||||
const short *HFilter, const short *VFilter,
|
||||
int Width, int Height) {
|
||||
unsigned short FData[17 * 16]; /* Temp data buffer used in filtering */
|
||||
|
||||
unsigned short FData[17*16]; /* Temp data buffer used in filtering */
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width,
|
||||
HFilter);
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
/* then 1-D vertically... */
|
||||
filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width,
|
||||
VFilter);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict4x4_c(unsigned char *src_ptr, int src_pixels_per_line,
|
||||
int xoffset, int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
void vp8_bilinear_predict4x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
#if 0
|
||||
{
|
||||
int i;
|
||||
@ -429,65 +343,46 @@ void vp8_bilinear_predict4x4_c
|
||||
}
|
||||
}
|
||||
#endif
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
|
||||
VFilter, 4, 4);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
void vp8_bilinear_predict8x8_c(unsigned char *src_ptr, int src_pixels_per_line,
|
||||
int xoffset, int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
|
||||
VFilter, 8, 8);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
void vp8_bilinear_predict8x4_c(unsigned char *src_ptr, int src_pixels_per_line,
|
||||
int xoffset, int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
|
||||
VFilter, 8, 4);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
void vp8_bilinear_predict16x16_c(unsigned char *src_ptr,
|
||||
int src_pixels_per_line, int xoffset,
|
||||
int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
|
||||
VFilter, 16, 16);
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_FILTER_H_
|
||||
#define VP8_COMMON_FILTER_H_
|
||||
|
||||
@ -20,7 +19,7 @@ extern "C" {
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]);
|
||||
|
@ -8,186 +8,147 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "findnearmv.h"
|
||||
|
||||
const unsigned char vp8_mbsplit_offset[4][16] = {
|
||||
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
|
||||
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }
|
||||
};
|
||||
|
||||
/* Predict motion vectors using those from already-decoded nearby blocks.
|
||||
Note that we only consider one 4x4 subblock from each candidate 16x16
|
||||
macroblock. */
|
||||
void vp8_find_near_mvs
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv *nearest,
|
||||
int_mv *nearby,
|
||||
int_mv *best_mv,
|
||||
int cnt[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
)
|
||||
{
|
||||
const MODE_INFO *above = here - xd->mode_info_stride;
|
||||
const MODE_INFO *left = here - 1;
|
||||
const MODE_INFO *aboveleft = above - 1;
|
||||
int_mv near_mvs[4];
|
||||
int_mv *mv = near_mvs;
|
||||
int *cntx = cnt;
|
||||
enum {CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV};
|
||||
void vp8_find_near_mvs(MACROBLOCKD *xd, const MODE_INFO *here, int_mv *nearest,
|
||||
int_mv *nearby, int_mv *best_mv, int cnt[4],
|
||||
int refframe, int *ref_frame_sign_bias) {
|
||||
const MODE_INFO *above = here - xd->mode_info_stride;
|
||||
const MODE_INFO *left = here - 1;
|
||||
const MODE_INFO *aboveleft = above - 1;
|
||||
int_mv near_mvs[4];
|
||||
int_mv *mv = near_mvs;
|
||||
int *cntx = cnt;
|
||||
enum { CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
|
||||
|
||||
/* Zero accumulators */
|
||||
mv[0].as_int = mv[1].as_int = mv[2].as_int = 0;
|
||||
cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0;
|
||||
/* Zero accumulators */
|
||||
mv[0].as_int = mv[1].as_int = mv[2].as_int = 0;
|
||||
cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0;
|
||||
|
||||
/* Process above */
|
||||
if (above->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
if (above->mbmi.mv.as_int)
|
||||
{
|
||||
(++mv)->as_int = above->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias);
|
||||
++cntx;
|
||||
}
|
||||
|
||||
*cntx += 2;
|
||||
/* Process above */
|
||||
if (above->mbmi.ref_frame != INTRA_FRAME) {
|
||||
if (above->mbmi.mv.as_int) {
|
||||
(++mv)->as_int = above->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv,
|
||||
ref_frame_sign_bias);
|
||||
++cntx;
|
||||
}
|
||||
|
||||
/* Process left */
|
||||
if (left->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
if (left->mbmi.mv.as_int)
|
||||
{
|
||||
int_mv this_mv;
|
||||
*cntx += 2;
|
||||
}
|
||||
|
||||
this_mv.as_int = left->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
|
||||
/* Process left */
|
||||
if (left->mbmi.ref_frame != INTRA_FRAME) {
|
||||
if (left->mbmi.mv.as_int) {
|
||||
int_mv this_mv;
|
||||
|
||||
if (this_mv.as_int != mv->as_int)
|
||||
{
|
||||
(++mv)->as_int = this_mv.as_int;
|
||||
++cntx;
|
||||
}
|
||||
this_mv.as_int = left->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv,
|
||||
ref_frame_sign_bias);
|
||||
|
||||
*cntx += 2;
|
||||
}
|
||||
else
|
||||
cnt[CNT_INTRA] += 2;
|
||||
}
|
||||
if (this_mv.as_int != mv->as_int) {
|
||||
(++mv)->as_int = this_mv.as_int;
|
||||
++cntx;
|
||||
}
|
||||
|
||||
/* Process above left */
|
||||
if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
if (aboveleft->mbmi.mv.as_int)
|
||||
{
|
||||
int_mv this_mv;
|
||||
*cntx += 2;
|
||||
} else
|
||||
cnt[CNT_INTRA] += 2;
|
||||
}
|
||||
|
||||
this_mv.as_int = aboveleft->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
|
||||
/* Process above left */
|
||||
if (aboveleft->mbmi.ref_frame != INTRA_FRAME) {
|
||||
if (aboveleft->mbmi.mv.as_int) {
|
||||
int_mv this_mv;
|
||||
|
||||
if (this_mv.as_int != mv->as_int)
|
||||
{
|
||||
(++mv)->as_int = this_mv.as_int;
|
||||
++cntx;
|
||||
}
|
||||
this_mv.as_int = aboveleft->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe,
|
||||
&this_mv, ref_frame_sign_bias);
|
||||
|
||||
*cntx += 1;
|
||||
}
|
||||
else
|
||||
cnt[CNT_INTRA] += 1;
|
||||
}
|
||||
if (this_mv.as_int != mv->as_int) {
|
||||
(++mv)->as_int = this_mv.as_int;
|
||||
++cntx;
|
||||
}
|
||||
|
||||
/* If we have three distinct MV's ... */
|
||||
if (cnt[CNT_SPLITMV])
|
||||
{
|
||||
/* See if above-left MV can be merged with NEAREST */
|
||||
if (mv->as_int == near_mvs[CNT_NEAREST].as_int)
|
||||
cnt[CNT_NEAREST] += 1;
|
||||
}
|
||||
*cntx += 1;
|
||||
} else
|
||||
cnt[CNT_INTRA] += 1;
|
||||
}
|
||||
|
||||
cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV)
|
||||
+ (left->mbmi.mode == SPLITMV)) * 2
|
||||
+ (aboveleft->mbmi.mode == SPLITMV);
|
||||
/* If we have three distinct MV's ... */
|
||||
if (cnt[CNT_SPLITMV]) {
|
||||
/* See if above-left MV can be merged with NEAREST */
|
||||
if (mv->as_int == near_mvs[CNT_NEAREST].as_int) cnt[CNT_NEAREST] += 1;
|
||||
}
|
||||
|
||||
/* Swap near and nearest if necessary */
|
||||
if (cnt[CNT_NEAR] > cnt[CNT_NEAREST])
|
||||
{
|
||||
int tmp;
|
||||
tmp = cnt[CNT_NEAREST];
|
||||
cnt[CNT_NEAREST] = cnt[CNT_NEAR];
|
||||
cnt[CNT_NEAR] = tmp;
|
||||
tmp = near_mvs[CNT_NEAREST].as_int;
|
||||
near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int;
|
||||
near_mvs[CNT_NEAR].as_int = tmp;
|
||||
}
|
||||
cnt[CNT_SPLITMV] =
|
||||
((above->mbmi.mode == SPLITMV) + (left->mbmi.mode == SPLITMV)) * 2 +
|
||||
(aboveleft->mbmi.mode == SPLITMV);
|
||||
|
||||
/* Use near_mvs[0] to store the "best" MV */
|
||||
if (cnt[CNT_NEAREST] >= cnt[CNT_INTRA])
|
||||
near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST];
|
||||
/* Swap near and nearest if necessary */
|
||||
if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
|
||||
int tmp;
|
||||
tmp = cnt[CNT_NEAREST];
|
||||
cnt[CNT_NEAREST] = cnt[CNT_NEAR];
|
||||
cnt[CNT_NEAR] = tmp;
|
||||
tmp = near_mvs[CNT_NEAREST].as_int;
|
||||
near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int;
|
||||
near_mvs[CNT_NEAR].as_int = tmp;
|
||||
}
|
||||
|
||||
/* Set up return values */
|
||||
best_mv->as_int = near_mvs[0].as_int;
|
||||
nearest->as_int = near_mvs[CNT_NEAREST].as_int;
|
||||
nearby->as_int = near_mvs[CNT_NEAR].as_int;
|
||||
/* Use near_mvs[0] to store the "best" MV */
|
||||
if (cnt[CNT_NEAREST] >= cnt[CNT_INTRA])
|
||||
near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST];
|
||||
|
||||
/* Set up return values */
|
||||
best_mv->as_int = near_mvs[0].as_int;
|
||||
nearest->as_int = near_mvs[CNT_NEAREST].as_int;
|
||||
nearby->as_int = near_mvs[CNT_NEAR].as_int;
|
||||
}
|
||||
|
||||
|
||||
static void invert_and_clamp_mvs(int_mv *inv, int_mv *src, MACROBLOCKD *xd)
|
||||
{
|
||||
inv->as_mv.row = src->as_mv.row * -1;
|
||||
inv->as_mv.col = src->as_mv.col * -1;
|
||||
vp8_clamp_mv2(inv, xd);
|
||||
vp8_clamp_mv2(src, xd);
|
||||
static void invert_and_clamp_mvs(int_mv *inv, int_mv *src, MACROBLOCKD *xd) {
|
||||
inv->as_mv.row = src->as_mv.row * -1;
|
||||
inv->as_mv.col = src->as_mv.col * -1;
|
||||
vp8_clamp_mv2(inv, xd);
|
||||
vp8_clamp_mv2(src, xd);
|
||||
}
|
||||
|
||||
int vp8_find_near_mvs_bias(MACROBLOCKD *xd, const MODE_INFO *here,
|
||||
int_mv mode_mv_sb[2][MB_MODE_COUNT],
|
||||
int_mv best_mv_sb[2], int cnt[4], int refframe,
|
||||
int *ref_frame_sign_bias) {
|
||||
int sign_bias = ref_frame_sign_bias[refframe];
|
||||
|
||||
int vp8_find_near_mvs_bias
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv mode_mv_sb[2][MB_MODE_COUNT],
|
||||
int_mv best_mv_sb[2],
|
||||
int cnt[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
)
|
||||
{
|
||||
int sign_bias = ref_frame_sign_bias[refframe];
|
||||
vp8_find_near_mvs(xd, here, &mode_mv_sb[sign_bias][NEARESTMV],
|
||||
&mode_mv_sb[sign_bias][NEARMV], &best_mv_sb[sign_bias], cnt,
|
||||
refframe, ref_frame_sign_bias);
|
||||
|
||||
vp8_find_near_mvs(xd,
|
||||
here,
|
||||
&mode_mv_sb[sign_bias][NEARESTMV],
|
||||
&mode_mv_sb[sign_bias][NEARMV],
|
||||
&best_mv_sb[sign_bias],
|
||||
cnt,
|
||||
refframe,
|
||||
ref_frame_sign_bias);
|
||||
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARESTMV],
|
||||
&mode_mv_sb[sign_bias][NEARESTMV], xd);
|
||||
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARMV],
|
||||
&mode_mv_sb[sign_bias][NEARMV], xd);
|
||||
invert_and_clamp_mvs(&best_mv_sb[!sign_bias], &best_mv_sb[sign_bias], xd);
|
||||
|
||||
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARESTMV],
|
||||
&mode_mv_sb[sign_bias][NEARESTMV], xd);
|
||||
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARMV],
|
||||
&mode_mv_sb[sign_bias][NEARMV], xd);
|
||||
invert_and_clamp_mvs(&best_mv_sb[!sign_bias],
|
||||
&best_mv_sb[sign_bias], xd);
|
||||
|
||||
return sign_bias;
|
||||
return sign_bias;
|
||||
}
|
||||
|
||||
|
||||
vp8_prob *vp8_mv_ref_probs(
|
||||
vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
|
||||
)
|
||||
{
|
||||
p[0] = vp8_mode_contexts [near_mv_ref_ct[0]] [0];
|
||||
p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
|
||||
p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
|
||||
p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
|
||||
/*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
|
||||
return p;
|
||||
vp8_prob *vp8_mv_ref_probs(vp8_prob p[VP8_MVREFS - 1],
|
||||
const int near_mv_ref_ct[4]) {
|
||||
p[0] = vp8_mode_contexts[near_mv_ref_ct[0]][0];
|
||||
p[1] = vp8_mode_contexts[near_mv_ref_ct[1]][1];
|
||||
p[2] = vp8_mode_contexts[near_mv_ref_ct[2]][2];
|
||||
p[3] = vp8_mode_contexts[near_mv_ref_ct[3]][3];
|
||||
/* p[3] = vp8_mode_contexts[near_mv_ref_ct[1] + near_mv_ref_ct[2] +
|
||||
near_mv_ref_ct[3]][3]; */
|
||||
return p;
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_FINDNEARMV_H_
|
||||
#define VP8_COMMON_FINDNEARMV_H_
|
||||
|
||||
@ -22,170 +21,125 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
static INLINE void mv_bias(int refmb_ref_frame_sign_bias, int refframe,
|
||||
int_mv *mvp, const int *ref_frame_sign_bias)
|
||||
{
|
||||
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
|
||||
{
|
||||
mvp->as_mv.row *= -1;
|
||||
mvp->as_mv.col *= -1;
|
||||
}
|
||||
int_mv *mvp, const int *ref_frame_sign_bias) {
|
||||
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe]) {
|
||||
mvp->as_mv.row *= -1;
|
||||
mvp->as_mv.col *= -1;
|
||||
}
|
||||
}
|
||||
|
||||
#define LEFT_TOP_MARGIN (16 << 3)
|
||||
#define RIGHT_BOTTOM_MARGIN (16 << 3)
|
||||
static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
|
||||
static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) {
|
||||
if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
|
||||
|
||||
if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
|
||||
if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
|
||||
}
|
||||
|
||||
static INLINE void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge,
|
||||
int mb_to_right_edge, int mb_to_top_edge,
|
||||
int mb_to_bottom_edge)
|
||||
{
|
||||
mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ?
|
||||
mb_to_left_edge : mv->as_mv.col;
|
||||
mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ?
|
||||
mb_to_right_edge : mv->as_mv.col;
|
||||
mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ?
|
||||
mb_to_top_edge : mv->as_mv.row;
|
||||
mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ?
|
||||
mb_to_bottom_edge : mv->as_mv.row;
|
||||
int mb_to_bottom_edge) {
|
||||
mv->as_mv.col =
|
||||
(mv->as_mv.col < mb_to_left_edge) ? mb_to_left_edge : mv->as_mv.col;
|
||||
mv->as_mv.col =
|
||||
(mv->as_mv.col > mb_to_right_edge) ? mb_to_right_edge : mv->as_mv.col;
|
||||
mv->as_mv.row =
|
||||
(mv->as_mv.row < mb_to_top_edge) ? mb_to_top_edge : mv->as_mv.row;
|
||||
mv->as_mv.row =
|
||||
(mv->as_mv.row > mb_to_bottom_edge) ? mb_to_bottom_edge : mv->as_mv.row;
|
||||
}
|
||||
static INLINE unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge,
|
||||
int mb_to_right_edge,
|
||||
int mb_to_top_edge,
|
||||
int mb_to_bottom_edge)
|
||||
{
|
||||
unsigned int need_to_clamp;
|
||||
need_to_clamp = (mv->as_mv.col < mb_to_left_edge);
|
||||
need_to_clamp |= (mv->as_mv.col > mb_to_right_edge);
|
||||
need_to_clamp |= (mv->as_mv.row < mb_to_top_edge);
|
||||
need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge);
|
||||
return need_to_clamp;
|
||||
int mb_to_bottom_edge) {
|
||||
unsigned int need_to_clamp;
|
||||
need_to_clamp = (mv->as_mv.col < mb_to_left_edge);
|
||||
need_to_clamp |= (mv->as_mv.col > mb_to_right_edge);
|
||||
need_to_clamp |= (mv->as_mv.row < mb_to_top_edge);
|
||||
need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge);
|
||||
return need_to_clamp;
|
||||
}
|
||||
|
||||
void vp8_find_near_mvs
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv *nearest, int_mv *nearby, int_mv *best,
|
||||
int near_mv_ref_cts[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
);
|
||||
void vp8_find_near_mvs(MACROBLOCKD *xd, const MODE_INFO *here, int_mv *nearest,
|
||||
int_mv *nearby, int_mv *best, int near_mv_ref_cts[4],
|
||||
int refframe, int *ref_frame_sign_bias);
|
||||
|
||||
int vp8_find_near_mvs_bias(MACROBLOCKD *xd, const MODE_INFO *here,
|
||||
int_mv mode_mv_sb[2][MB_MODE_COUNT],
|
||||
int_mv best_mv_sb[2], int cnt[4], int refframe,
|
||||
int *ref_frame_sign_bias);
|
||||
|
||||
int vp8_find_near_mvs_bias
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv mode_mv_sb[2][MB_MODE_COUNT],
|
||||
int_mv best_mv_sb[2],
|
||||
int cnt[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
);
|
||||
|
||||
|
||||
vp8_prob *vp8_mv_ref_probs(
|
||||
vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
|
||||
);
|
||||
vp8_prob *vp8_mv_ref_probs(vp8_prob p[VP8_MVREFS - 1],
|
||||
const int near_mv_ref_ct[4]);
|
||||
|
||||
extern const unsigned char vp8_mbsplit_offset[4][16];
|
||||
|
||||
static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b) {
|
||||
if (!(b & 3)) {
|
||||
/* On L edge, get from MB to left of us */
|
||||
--cur_mb;
|
||||
|
||||
static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b)
|
||||
{
|
||||
if (!(b & 3))
|
||||
{
|
||||
/* On L edge, get from MB to left of us */
|
||||
--cur_mb;
|
||||
if (cur_mb->mbmi.mode != SPLITMV) return cur_mb->mbmi.mv.as_int;
|
||||
b += 4;
|
||||
}
|
||||
|
||||
if(cur_mb->mbmi.mode != SPLITMV)
|
||||
return cur_mb->mbmi.mv.as_int;
|
||||
b += 4;
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 1)->mv.as_int;
|
||||
return (cur_mb->bmi + b - 1)->mv.as_int;
|
||||
}
|
||||
|
||||
static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b,
|
||||
int mi_stride)
|
||||
{
|
||||
if (!(b >> 2))
|
||||
{
|
||||
/* On top edge, get from MB above us */
|
||||
cur_mb -= mi_stride;
|
||||
int mi_stride) {
|
||||
if (!(b >> 2)) {
|
||||
/* On top edge, get from MB above us */
|
||||
cur_mb -= mi_stride;
|
||||
|
||||
if(cur_mb->mbmi.mode != SPLITMV)
|
||||
return cur_mb->mbmi.mv.as_int;
|
||||
b += 16;
|
||||
}
|
||||
if (cur_mb->mbmi.mode != SPLITMV) return cur_mb->mbmi.mv.as_int;
|
||||
b += 16;
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + (b - 4))->mv.as_int;
|
||||
return (cur_mb->bmi + (b - 4))->mv.as_int;
|
||||
}
|
||||
static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
|
||||
{
|
||||
if (!(b & 3))
|
||||
{
|
||||
/* On L edge, get from MB to left of us */
|
||||
--cur_mb;
|
||||
switch (cur_mb->mbmi.mode)
|
||||
{
|
||||
case B_PRED:
|
||||
return (cur_mb->bmi + b + 3)->as_mode;
|
||||
case DC_PRED:
|
||||
return B_DC_PRED;
|
||||
case V_PRED:
|
||||
return B_VE_PRED;
|
||||
case H_PRED:
|
||||
return B_HE_PRED;
|
||||
case TM_PRED:
|
||||
return B_TM_PRED;
|
||||
default:
|
||||
return B_DC_PRED;
|
||||
}
|
||||
static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb,
|
||||
int b) {
|
||||
if (!(b & 3)) {
|
||||
/* On L edge, get from MB to left of us */
|
||||
--cur_mb;
|
||||
switch (cur_mb->mbmi.mode) {
|
||||
case B_PRED: return (cur_mb->bmi + b + 3)->as_mode;
|
||||
case DC_PRED: return B_DC_PRED;
|
||||
case V_PRED: return B_VE_PRED;
|
||||
case H_PRED: return B_HE_PRED;
|
||||
case TM_PRED: return B_TM_PRED;
|
||||
default: return B_DC_PRED;
|
||||
}
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 1)->as_mode;
|
||||
return (cur_mb->bmi + b - 1)->as_mode;
|
||||
}
|
||||
|
||||
static INLINE B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b,
|
||||
int mi_stride)
|
||||
{
|
||||
if (!(b >> 2))
|
||||
{
|
||||
/* On top edge, get from MB above us */
|
||||
cur_mb -= mi_stride;
|
||||
int mi_stride) {
|
||||
if (!(b >> 2)) {
|
||||
/* On top edge, get from MB above us */
|
||||
cur_mb -= mi_stride;
|
||||
|
||||
switch (cur_mb->mbmi.mode)
|
||||
{
|
||||
case B_PRED:
|
||||
return (cur_mb->bmi + b + 12)->as_mode;
|
||||
case DC_PRED:
|
||||
return B_DC_PRED;
|
||||
case V_PRED:
|
||||
return B_VE_PRED;
|
||||
case H_PRED:
|
||||
return B_HE_PRED;
|
||||
case TM_PRED:
|
||||
return B_TM_PRED;
|
||||
default:
|
||||
return B_DC_PRED;
|
||||
}
|
||||
switch (cur_mb->mbmi.mode) {
|
||||
case B_PRED: return (cur_mb->bmi + b + 12)->as_mode;
|
||||
case DC_PRED: return B_DC_PRED;
|
||||
case V_PRED: return B_VE_PRED;
|
||||
case H_PRED: return B_HE_PRED;
|
||||
case TM_PRED: return B_TM_PRED;
|
||||
default: return B_DC_PRED;
|
||||
}
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 4)->as_mode;
|
||||
return (cur_mb->bmi + b - 4)->as_mode;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#if ARCH_ARM
|
||||
@ -24,7 +23,7 @@
|
||||
#include <unistd.h>
|
||||
#elif defined(_WIN32)
|
||||
#include <windows.h>
|
||||
typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
|
||||
typedef void(WINAPI *PGNSI)(LPSYSTEM_INFO);
|
||||
#elif defined(__OS2__)
|
||||
#define INCL_DOS
|
||||
#define INCL_DOSSPINLOCK
|
||||
@ -33,74 +32,69 @@ typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
|
||||
#endif
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
static int get_cpu_count()
|
||||
{
|
||||
int core_count = 16;
|
||||
static int get_cpu_count() {
|
||||
int core_count = 16;
|
||||
|
||||
#if HAVE_UNISTD_H && !defined(__OS2__)
|
||||
#if defined(_SC_NPROCESSORS_ONLN)
|
||||
core_count = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
core_count = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#elif defined(_SC_NPROC_ONLN)
|
||||
core_count = sysconf(_SC_NPROC_ONLN);
|
||||
core_count = sysconf(_SC_NPROC_ONLN);
|
||||
#endif
|
||||
#elif defined(_WIN32)
|
||||
{
|
||||
{
|
||||
#if _WIN32_WINNT >= 0x0501
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetNativeSystemInfo(&sysinfo);
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetNativeSystemInfo(&sysinfo);
|
||||
#else
|
||||
PGNSI pGNSI;
|
||||
SYSTEM_INFO sysinfo;
|
||||
PGNSI pGNSI;
|
||||
SYSTEM_INFO sysinfo;
|
||||
|
||||
/* Call GetNativeSystemInfo if supported or
|
||||
* GetSystemInfo otherwise. */
|
||||
/* Call GetNativeSystemInfo if supported or
|
||||
* GetSystemInfo otherwise. */
|
||||
|
||||
pGNSI = (PGNSI) GetProcAddress(
|
||||
GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo");
|
||||
if (pGNSI != NULL)
|
||||
pGNSI(&sysinfo);
|
||||
else
|
||||
GetSystemInfo(&sysinfo);
|
||||
pGNSI = (PGNSI)GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
|
||||
"GetNativeSystemInfo");
|
||||
if (pGNSI != NULL)
|
||||
pGNSI(&sysinfo);
|
||||
else
|
||||
GetSystemInfo(&sysinfo);
|
||||
#endif
|
||||
|
||||
core_count = sysinfo.dwNumberOfProcessors;
|
||||
}
|
||||
core_count = sysinfo.dwNumberOfProcessors;
|
||||
}
|
||||
#elif defined(__OS2__)
|
||||
{
|
||||
ULONG proc_id;
|
||||
ULONG status;
|
||||
{
|
||||
ULONG proc_id;
|
||||
ULONG status;
|
||||
|
||||
core_count = 0;
|
||||
for (proc_id = 1; ; proc_id++)
|
||||
{
|
||||
if (DosGetProcessorStatus(proc_id, &status))
|
||||
break;
|
||||
core_count = 0;
|
||||
for (proc_id = 1;; proc_id++) {
|
||||
if (DosGetProcessorStatus(proc_id, &status)) break;
|
||||
|
||||
if (status == PROC_ONLINE)
|
||||
core_count++;
|
||||
}
|
||||
if (status == PROC_ONLINE) core_count++;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* other platforms */
|
||||
/* other platforms */
|
||||
#endif
|
||||
|
||||
return core_count > 0 ? core_count : 1;
|
||||
return core_count > 0 ? core_count : 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp8_clear_system_state_c() {};
|
||||
void vp8_clear_system_state_c(){};
|
||||
|
||||
void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
{
|
||||
void vp8_machine_specific_config(VP8_COMMON *ctx) {
|
||||
#if CONFIG_MULTITHREAD
|
||||
ctx->processor_core_count = get_cpu_count();
|
||||
ctx->processor_core_count = get_cpu_count();
|
||||
#else
|
||||
(void)ctx;
|
||||
(void)ctx;
|
||||
#endif /* CONFIG_MULTITHREAD */
|
||||
|
||||
#if ARCH_ARM
|
||||
ctx->cpu_caps = arm_cpu_caps();
|
||||
ctx->cpu_caps = arm_cpu_caps();
|
||||
#elif ARCH_X86 || ARCH_X86_64
|
||||
ctx->cpu_caps = x86_simd_caps();
|
||||
ctx->cpu_caps = x86_simd_caps();
|
||||
#endif
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_HEADER_H_
|
||||
#define VP8_COMMON_HEADER_H_
|
||||
|
||||
@ -17,22 +16,21 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
/* 24 bits total */
|
||||
typedef struct
|
||||
{
|
||||
unsigned int type: 1;
|
||||
unsigned int version: 3;
|
||||
unsigned int show_frame: 1;
|
||||
typedef struct {
|
||||
unsigned int type : 1;
|
||||
unsigned int version : 3;
|
||||
unsigned int show_frame : 1;
|
||||
|
||||
/* Allow 2^20 bytes = 8 megabits for first partition */
|
||||
/* Allow 2^20 bytes = 8 megabits for first partition */
|
||||
|
||||
unsigned int first_partition_length_in_bytes: 19;
|
||||
unsigned int first_partition_length_in_bytes : 19;
|
||||
|
||||
#ifdef PACKET_TESTING
|
||||
unsigned int frame_number;
|
||||
unsigned int update_gold: 1;
|
||||
unsigned int uses_gold: 1;
|
||||
unsigned int update_last: 1;
|
||||
unsigned int uses_last: 1;
|
||||
unsigned int frame_number;
|
||||
unsigned int update_gold : 1;
|
||||
unsigned int uses_gold : 1;
|
||||
unsigned int update_last : 1;
|
||||
unsigned int uses_last : 1;
|
||||
#endif
|
||||
|
||||
} VP8_HEADER;
|
||||
@ -43,7 +41,6 @@ typedef struct
|
||||
#define VP8_HEADER_SIZE 3
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
@ -12,79 +12,67 @@
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
void vp8_dequant_idct_add_c(short *input, short *dq,
|
||||
unsigned char *dest, int stride);
|
||||
void vp8_dc_only_idct_add_c(short input_dc, unsigned char * pred,
|
||||
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *dest,
|
||||
int stride);
|
||||
void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride);
|
||||
|
||||
void vp8_dequant_idct_add_y_block_c
|
||||
(short *q, short *dq,
|
||||
unsigned char *dst, int stride, char *eobs)
|
||||
{
|
||||
int i, j;
|
||||
void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst,
|
||||
int stride, char *eobs) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c (q, dq, dst, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c(q, dq, dst, stride);
|
||||
else {
|
||||
vp8_dc_only_idct_add_c(q[0] * dq[0], dst, stride, dst, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dst += 4;
|
||||
}
|
||||
|
||||
dst += 4*stride - 16;
|
||||
q += 16;
|
||||
dst += 4;
|
||||
}
|
||||
|
||||
dst += 4 * stride - 16;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_c
|
||||
(short *q, short *dq,
|
||||
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
|
||||
{
|
||||
int i, j;
|
||||
void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dstu,
|
||||
unsigned char *dstv, int stride,
|
||||
char *eobs) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c (q, dq, dstu, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c(q, dq, dstu, stride);
|
||||
else {
|
||||
vp8_dc_only_idct_add_c(q[0] * dq[0], dstu, stride, dstu, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dstu += 4;
|
||||
}
|
||||
|
||||
dstu += 4*stride - 8;
|
||||
q += 16;
|
||||
dstu += 4;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c (q, dq, dstv, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
dstu += 4 * stride - 8;
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dstv += 4;
|
||||
}
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c(q, dq, dstv, stride);
|
||||
else {
|
||||
vp8_dc_only_idct_add_c(q[0] * dq[0], dstv, stride, dstv, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
dstv += 4*stride - 8;
|
||||
q += 16;
|
||||
dstv += 4;
|
||||
}
|
||||
|
||||
dstv += 4 * stride - 8;
|
||||
}
|
||||
}
|
||||
|
@ -24,182 +24,162 @@
|
||||
* x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
|
||||
**************************************************************************/
|
||||
static const int cospi8sqrt2minus1 = 20091;
|
||||
static const int sinpi8sqrt2 = 35468;
|
||||
static const int sinpi8sqrt2 = 35468;
|
||||
|
||||
void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride)
|
||||
{
|
||||
int i;
|
||||
int r, c;
|
||||
int a1, b1, c1, d1;
|
||||
short output[16];
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
int temp1, temp2;
|
||||
int shortpitch = 4;
|
||||
int dst_stride) {
|
||||
int i;
|
||||
int r, c;
|
||||
int a1, b1, c1, d1;
|
||||
short output[16];
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
int temp1, temp2;
|
||||
int shortpitch = 4;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[8];
|
||||
b1 = ip[0] - ip[8];
|
||||
for (i = 0; i < 4; i++) {
|
||||
a1 = ip[0] + ip[8];
|
||||
b1 = ip[0] - ip[8];
|
||||
|
||||
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
op[shortpitch*0] = a1 + d1;
|
||||
op[shortpitch*3] = a1 - d1;
|
||||
op[shortpitch * 0] = a1 + d1;
|
||||
op[shortpitch * 3] = a1 - d1;
|
||||
|
||||
op[shortpitch*1] = b1 + c1;
|
||||
op[shortpitch*2] = b1 - c1;
|
||||
op[shortpitch * 1] = b1 + c1;
|
||||
op[shortpitch * 2] = b1 - c1;
|
||||
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
op = output;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[2];
|
||||
b1 = ip[0] - ip[2];
|
||||
|
||||
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
|
||||
op[0] = (a1 + d1 + 4) >> 3;
|
||||
op[3] = (a1 - d1 + 4) >> 3;
|
||||
|
||||
op[1] = (b1 + c1 + 4) >> 3;
|
||||
op[2] = (b1 - c1 + 4) >> 3;
|
||||
|
||||
ip += shortpitch;
|
||||
op += shortpitch;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
int a = ip[c] + pred_ptr[c] ;
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
|
||||
if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dst_ptr[c] = (unsigned char) a ;
|
||||
}
|
||||
ip += 4;
|
||||
dst_ptr += dst_stride;
|
||||
pred_ptr += pred_stride;
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
op = output;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
a1 = ip[0] + ip[2];
|
||||
b1 = ip[0] - ip[2];
|
||||
|
||||
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
op[0] = (a1 + d1 + 4) >> 3;
|
||||
op[3] = (a1 - d1 + 4) >> 3;
|
||||
|
||||
op[1] = (b1 + c1 + 4) >> 3;
|
||||
op[2] = (b1 - c1 + 4) >> 3;
|
||||
|
||||
ip += shortpitch;
|
||||
op += shortpitch;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
for (r = 0; r < 4; r++) {
|
||||
for (c = 0; c < 4; c++) {
|
||||
int a = ip[c] + pred_ptr[c];
|
||||
|
||||
if (a < 0) a = 0;
|
||||
|
||||
if (a > 255) a = 255;
|
||||
|
||||
dst_ptr[c] = (unsigned char)a;
|
||||
}
|
||||
ip += 4;
|
||||
dst_ptr += dst_stride;
|
||||
pred_ptr += pred_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride)
|
||||
{
|
||||
int a1 = ((input_dc + 4) >> 3);
|
||||
int r, c;
|
||||
int dst_stride) {
|
||||
int a1 = ((input_dc + 4) >> 3);
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
int a = a1 + pred_ptr[c] ;
|
||||
for (r = 0; r < 4; r++) {
|
||||
for (c = 0; c < 4; c++) {
|
||||
int a = a1 + pred_ptr[c];
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
if (a < 0) a = 0;
|
||||
|
||||
if (a > 255)
|
||||
a = 255;
|
||||
if (a > 255) a = 255;
|
||||
|
||||
dst_ptr[c] = (unsigned char) a ;
|
||||
}
|
||||
|
||||
dst_ptr += dst_stride;
|
||||
pred_ptr += pred_stride;
|
||||
dst_ptr[c] = (unsigned char)a;
|
||||
}
|
||||
|
||||
dst_ptr += dst_stride;
|
||||
pred_ptr += pred_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff)
|
||||
{
|
||||
short output[16];
|
||||
int i;
|
||||
int a1, b1, c1, d1;
|
||||
int a2, b2, c2, d2;
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
void vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff) {
|
||||
short output[16];
|
||||
int i;
|
||||
int a1, b1, c1, d1;
|
||||
int a2, b2, c2, d2;
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[12];
|
||||
b1 = ip[4] + ip[8];
|
||||
c1 = ip[4] - ip[8];
|
||||
d1 = ip[0] - ip[12];
|
||||
for (i = 0; i < 4; i++) {
|
||||
a1 = ip[0] + ip[12];
|
||||
b1 = ip[4] + ip[8];
|
||||
c1 = ip[4] - ip[8];
|
||||
d1 = ip[0] - ip[12];
|
||||
|
||||
op[0] = a1 + b1;
|
||||
op[4] = c1 + d1;
|
||||
op[8] = a1 - b1;
|
||||
op[12] = d1 - c1;
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
op[0] = a1 + b1;
|
||||
op[4] = c1 + d1;
|
||||
op[8] = a1 - b1;
|
||||
op[12] = d1 - c1;
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
op = output;
|
||||
ip = output;
|
||||
op = output;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[3];
|
||||
b1 = ip[1] + ip[2];
|
||||
c1 = ip[1] - ip[2];
|
||||
d1 = ip[0] - ip[3];
|
||||
for (i = 0; i < 4; i++) {
|
||||
a1 = ip[0] + ip[3];
|
||||
b1 = ip[1] + ip[2];
|
||||
c1 = ip[1] - ip[2];
|
||||
d1 = ip[0] - ip[3];
|
||||
|
||||
a2 = a1 + b1;
|
||||
b2 = c1 + d1;
|
||||
c2 = a1 - b1;
|
||||
d2 = d1 - c1;
|
||||
a2 = a1 + b1;
|
||||
b2 = c1 + d1;
|
||||
c2 = a1 - b1;
|
||||
d2 = d1 - c1;
|
||||
|
||||
op[0] = (a2 + 3) >> 3;
|
||||
op[1] = (b2 + 3) >> 3;
|
||||
op[2] = (c2 + 3) >> 3;
|
||||
op[3] = (d2 + 3) >> 3;
|
||||
op[0] = (a2 + 3) >> 3;
|
||||
op[1] = (b2 + 3) >> 3;
|
||||
op[2] = (c2 + 3) >> 3;
|
||||
op[3] = (d2 + 3) >> 3;
|
||||
|
||||
ip += 4;
|
||||
op += 4;
|
||||
}
|
||||
ip += 4;
|
||||
op += 4;
|
||||
}
|
||||
|
||||
for(i = 0; i < 16; i++)
|
||||
{
|
||||
mb_dqcoeff[i * 16] = output[i];
|
||||
}
|
||||
for (i = 0; i < 16; i++) {
|
||||
mb_dqcoeff[i * 16] = output[i];
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff)
|
||||
{
|
||||
int i;
|
||||
int a1;
|
||||
void vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff) {
|
||||
int i;
|
||||
int a1;
|
||||
|
||||
a1 = ((input[0] + 3) >> 3);
|
||||
for(i = 0; i < 16; i++)
|
||||
{
|
||||
mb_dqcoeff[i * 16] = a1;
|
||||
}
|
||||
a1 = ((input[0] + 3) >> 3);
|
||||
for (i = 0; i < 16; i++) {
|
||||
mb_dqcoeff[i * 16] = a1;
|
||||
}
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_INVTRANS_H_
|
||||
#define VP8_COMMON_INVTRANS_H_
|
||||
|
||||
@ -25,43 +24,31 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static void eob_adjust(char *eobs, short *diff)
|
||||
{
|
||||
/* eob adjust.... the idct can only skip if both the dc and eob are zero */
|
||||
int js;
|
||||
for(js = 0; js < 16; js++)
|
||||
{
|
||||
if((eobs[js] == 0) && (diff[0] != 0))
|
||||
eobs[js]++;
|
||||
diff+=16;
|
||||
}
|
||||
static void eob_adjust(char *eobs, short *diff) {
|
||||
/* eob adjust.... the idct can only skip if both the dc and eob are zero */
|
||||
int js;
|
||||
for (js = 0; js < 16; js++) {
|
||||
if ((eobs[js] == 0) && (diff[0] != 0)) eobs[js]++;
|
||||
diff += 16;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd)
|
||||
{
|
||||
short *DQC = xd->dequant_y1;
|
||||
static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd) {
|
||||
short *DQC = xd->dequant_y1;
|
||||
|
||||
if (xd->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
/* do 2nd order transform on the dc block */
|
||||
if (xd->eobs[24] > 1)
|
||||
{
|
||||
vp8_short_inv_walsh4x4
|
||||
(&xd->block[24].dqcoeff[0], xd->qcoeff);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_short_inv_walsh4x4_1
|
||||
(&xd->block[24].dqcoeff[0], xd->qcoeff);
|
||||
}
|
||||
eob_adjust(xd->eobs, xd->qcoeff);
|
||||
|
||||
DQC = xd->dequant_y1_dc;
|
||||
if (xd->mode_info_context->mbmi.mode != SPLITMV) {
|
||||
/* do 2nd order transform on the dc block */
|
||||
if (xd->eobs[24] > 1) {
|
||||
vp8_short_inv_walsh4x4(&xd->block[24].dqcoeff[0], xd->qcoeff);
|
||||
} else {
|
||||
vp8_short_inv_walsh4x4_1(&xd->block[24].dqcoeff[0], xd->qcoeff);
|
||||
}
|
||||
vp8_dequant_idct_add_y_block
|
||||
(xd->qcoeff, DQC,
|
||||
xd->dst.y_buffer,
|
||||
xd->dst.y_stride, xd->eobs);
|
||||
eob_adjust(xd->eobs, xd->qcoeff);
|
||||
|
||||
DQC = xd->dequant_y1_dc;
|
||||
}
|
||||
vp8_dequant_idct_add_y_block(xd->qcoeff, DQC, xd->dst.y_buffer,
|
||||
xd->dst.y_stride, xd->eobs);
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_LOOPFILTER_H_
|
||||
#define VP8_COMMON_LOOPFILTER_H_
|
||||
|
||||
@ -20,16 +19,12 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MAX_LOOP_FILTER 63
|
||||
#define MAX_LOOP_FILTER 63
|
||||
/* fraction of total macroblock rows to be used in fast filter level picking */
|
||||
/* has to be > 2 */
|
||||
#define PARTIAL_FRAME_FRACTION 8
|
||||
#define PARTIAL_FRAME_FRACTION 8
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NORMAL_LOOPFILTER = 0,
|
||||
SIMPLE_LOOPFILTER = 1
|
||||
} LOOPFILTERTYPE;
|
||||
typedef enum { NORMAL_LOOPFILTER = 0, SIMPLE_LOOPFILTER = 1 } LOOPFILTERTYPE;
|
||||
|
||||
#if ARCH_ARM
|
||||
#define SIMD_WIDTH 1
|
||||
@ -40,35 +35,32 @@ typedef enum
|
||||
/* Need to align this structure so when it is declared and
|
||||
* passed it can be loaded into vector registers.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
|
||||
unsigned char lvl[4][4][4];
|
||||
unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
|
||||
unsigned char mode_lf_lut[10];
|
||||
typedef struct {
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
|
||||
mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
|
||||
blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
|
||||
lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
|
||||
unsigned char lvl[4][4][4];
|
||||
unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
|
||||
unsigned char mode_lf_lut[10];
|
||||
} loop_filter_info_n;
|
||||
|
||||
typedef struct loop_filter_info
|
||||
{
|
||||
const unsigned char * mblim;
|
||||
const unsigned char * blim;
|
||||
const unsigned char * lim;
|
||||
const unsigned char * hev_thr;
|
||||
typedef struct loop_filter_info {
|
||||
const unsigned char *mblim;
|
||||
const unsigned char *blim;
|
||||
const unsigned char *lim;
|
||||
const unsigned char *hev_thr;
|
||||
} loop_filter_info;
|
||||
|
||||
|
||||
typedef void loop_filter_uvfunction
|
||||
(
|
||||
unsigned char *u, /* source pointer */
|
||||
int p, /* pitch */
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
unsigned char *v
|
||||
);
|
||||
typedef void loop_filter_uvfunction(unsigned char *u, /* source pointer */
|
||||
int p, /* pitch */
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
unsigned char *v);
|
||||
|
||||
/* assorted loopfilter functions which get used elsewhere */
|
||||
struct VP8Common;
|
||||
@ -77,8 +69,7 @@ struct modeinfo;
|
||||
|
||||
void vp8_loop_filter_init(struct VP8Common *cm);
|
||||
|
||||
void vp8_loop_filter_frame_init(struct VP8Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
void vp8_loop_filter_frame_init(struct VP8Common *cm, struct macroblockd *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd,
|
||||
@ -88,22 +79,21 @@ void vp8_loop_filter_partial_frame(struct VP8Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_frame_yonly(struct VP8Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
void vp8_loop_filter_frame_yonly(struct VP8Common *cm, struct macroblockd *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
|
||||
int sharpness_lvl);
|
||||
|
||||
void vp8_loop_filter_row_normal(struct VP8Common *cm,
|
||||
struct modeinfo *mode_info_context,
|
||||
int mb_row, int post_ystride, int post_uvstride,
|
||||
struct modeinfo *mode_info_context, int mb_row,
|
||||
int post_ystride, int post_uvstride,
|
||||
unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr);
|
||||
|
||||
void vp8_loop_filter_row_simple(struct VP8Common *cm,
|
||||
struct modeinfo *mode_info_context,
|
||||
int mb_row, int post_ystride, int post_uvstride,
|
||||
struct modeinfo *mode_info_context, int mb_row,
|
||||
int post_ystride, int post_uvstride,
|
||||
unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr);
|
||||
#ifdef __cplusplus
|
||||
|
@ -8,423 +8,374 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "loopfilter.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
typedef unsigned char uc;
|
||||
|
||||
static signed char vp8_signed_char_clamp(int t)
|
||||
{
|
||||
t = (t < -128 ? -128 : t);
|
||||
t = (t > 127 ? 127 : t);
|
||||
return (signed char) t;
|
||||
static signed char vp8_signed_char_clamp(int t) {
|
||||
t = (t < -128 ? -128 : t);
|
||||
t = (t > 127 ? 127 : t);
|
||||
return (signed char)t;
|
||||
}
|
||||
|
||||
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
static signed char vp8_filter_mask(uc limit, uc blimit,
|
||||
uc p3, uc p2, uc p1, uc p0,
|
||||
uc q0, uc q1, uc q2, uc q3)
|
||||
{
|
||||
signed char mask = 0;
|
||||
mask |= (abs(p3 - p2) > limit);
|
||||
mask |= (abs(p2 - p1) > limit);
|
||||
mask |= (abs(p1 - p0) > limit);
|
||||
mask |= (abs(q1 - q0) > limit);
|
||||
mask |= (abs(q2 - q1) > limit);
|
||||
mask |= (abs(q3 - q2) > limit);
|
||||
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit);
|
||||
return mask - 1;
|
||||
static signed char vp8_filter_mask(uc limit, uc blimit, uc p3, uc p2, uc p1,
|
||||
uc p0, uc q0, uc q1, uc q2, uc q3) {
|
||||
signed char mask = 0;
|
||||
mask |= (abs(p3 - p2) > limit);
|
||||
mask |= (abs(p2 - p1) > limit);
|
||||
mask |= (abs(p1 - p0) > limit);
|
||||
mask |= (abs(q1 - q0) > limit);
|
||||
mask |= (abs(q2 - q1) > limit);
|
||||
mask |= (abs(q3 - q2) > limit);
|
||||
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit);
|
||||
return mask - 1;
|
||||
}
|
||||
|
||||
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
|
||||
static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
signed char hev = 0;
|
||||
hev |= (abs(p1 - p0) > thresh) * -1;
|
||||
hev |= (abs(q1 - q0) > thresh) * -1;
|
||||
return hev;
|
||||
static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1) {
|
||||
signed char hev = 0;
|
||||
hev |= (abs(p1 - p0) > thresh) * -1;
|
||||
hev |= (abs(q1 - q0) > thresh) * -1;
|
||||
return hev;
|
||||
}
|
||||
|
||||
static void vp8_filter(signed char mask, uc hev, uc *op1,
|
||||
uc *op0, uc *oq0, uc *oq1)
|
||||
static void vp8_filter(signed char mask, uc hev, uc *op1, uc *op0, uc *oq0,
|
||||
uc *oq1) {
|
||||
signed char ps0, qs0;
|
||||
signed char ps1, qs1;
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char u;
|
||||
|
||||
{
|
||||
signed char ps0, qs0;
|
||||
signed char ps1, qs1;
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char u;
|
||||
ps1 = (signed char)*op1 ^ 0x80;
|
||||
ps0 = (signed char)*op0 ^ 0x80;
|
||||
qs0 = (signed char)*oq0 ^ 0x80;
|
||||
qs1 = (signed char)*oq1 ^ 0x80;
|
||||
|
||||
ps1 = (signed char) * op1 ^ 0x80;
|
||||
ps0 = (signed char) * op0 ^ 0x80;
|
||||
qs0 = (signed char) * oq0 ^ 0x80;
|
||||
qs1 = (signed char) * oq1 ^ 0x80;
|
||||
/* add outer taps if we have high edge variance */
|
||||
filter_value = vp8_signed_char_clamp(ps1 - qs1);
|
||||
filter_value &= hev;
|
||||
|
||||
/* add outer taps if we have high edge variance */
|
||||
filter_value = vp8_signed_char_clamp(ps1 - qs1);
|
||||
filter_value &= hev;
|
||||
/* inner taps */
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
|
||||
filter_value &= mask;
|
||||
|
||||
/* inner taps */
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
|
||||
filter_value &= mask;
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3
|
||||
* if it equals 4 we'll set to adjust by -1 to account for the fact
|
||||
* we'd round 3 the other way
|
||||
*/
|
||||
Filter1 = vp8_signed_char_clamp(filter_value + 4);
|
||||
Filter2 = vp8_signed_char_clamp(filter_value + 3);
|
||||
Filter1 >>= 3;
|
||||
Filter2 >>= 3;
|
||||
u = vp8_signed_char_clamp(qs0 - Filter1);
|
||||
*oq0 = u ^ 0x80;
|
||||
u = vp8_signed_char_clamp(ps0 + Filter2);
|
||||
*op0 = u ^ 0x80;
|
||||
filter_value = Filter1;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3
|
||||
* if it equals 4 we'll set to adjust by -1 to account for the fact
|
||||
* we'd round 3 the other way
|
||||
*/
|
||||
Filter1 = vp8_signed_char_clamp(filter_value + 4);
|
||||
Filter2 = vp8_signed_char_clamp(filter_value + 3);
|
||||
Filter1 >>= 3;
|
||||
Filter2 >>= 3;
|
||||
u = vp8_signed_char_clamp(qs0 - Filter1);
|
||||
*oq0 = u ^ 0x80;
|
||||
u = vp8_signed_char_clamp(ps0 + Filter2);
|
||||
*op0 = u ^ 0x80;
|
||||
filter_value = Filter1;
|
||||
|
||||
/* outer tap adjustments */
|
||||
filter_value += 1;
|
||||
filter_value >>= 1;
|
||||
filter_value &= ~hev;
|
||||
|
||||
u = vp8_signed_char_clamp(qs1 - filter_value);
|
||||
*oq1 = u ^ 0x80;
|
||||
u = vp8_signed_char_clamp(ps1 + filter_value);
|
||||
*op1 = u ^ 0x80;
|
||||
/* outer tap adjustments */
|
||||
filter_value += 1;
|
||||
filter_value >>= 1;
|
||||
filter_value &= ~hev;
|
||||
|
||||
u = vp8_signed_char_clamp(qs1 - filter_value);
|
||||
*oq1 = u ^ 0x80;
|
||||
u = vp8_signed_char_clamp(ps1 + filter_value);
|
||||
*op1 = u ^ 0x80;
|
||||
}
|
||||
void vp8_loop_filter_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p, /* pitch */
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
int hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
void vp8_loop_filter_horizontal_edge_c(unsigned char *s, int p, /* pitch */
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh, int count) {
|
||||
int hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do {
|
||||
mask = vp8_filter_mask(limit[0], blimit[0], s[-4 * p], s[-3 * p], s[-2 * p],
|
||||
s[-1 * p], s[0 * p], s[1 * p], s[2 * p], s[3 * p]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
hev = vp8_hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]);
|
||||
|
||||
vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
|
||||
++s;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
++s;
|
||||
} while (++i < count * 8);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
int hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
void vp8_loop_filter_vertical_edge_c(unsigned char *s, int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh, int count) {
|
||||
int hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do {
|
||||
mask = vp8_filter_mask(limit[0], blimit[0], s[-4], s[-3], s[-2], s[-1],
|
||||
s[0], s[1], s[2], s[3]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||
|
||||
vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
|
||||
vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
|
||||
|
||||
s += p;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
s += p;
|
||||
} while (++i < count * 8);
|
||||
}
|
||||
|
||||
static void vp8_mbfilter(signed char mask, uc hev,
|
||||
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
|
||||
{
|
||||
signed char s, u;
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char ps2 = (signed char) * op2 ^ 0x80;
|
||||
signed char ps1 = (signed char) * op1 ^ 0x80;
|
||||
signed char ps0 = (signed char) * op0 ^ 0x80;
|
||||
signed char qs0 = (signed char) * oq0 ^ 0x80;
|
||||
signed char qs1 = (signed char) * oq1 ^ 0x80;
|
||||
signed char qs2 = (signed char) * oq2 ^ 0x80;
|
||||
static void vp8_mbfilter(signed char mask, uc hev, uc *op2, uc *op1, uc *op0,
|
||||
uc *oq0, uc *oq1, uc *oq2) {
|
||||
signed char s, u;
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char ps2 = (signed char)*op2 ^ 0x80;
|
||||
signed char ps1 = (signed char)*op1 ^ 0x80;
|
||||
signed char ps0 = (signed char)*op0 ^ 0x80;
|
||||
signed char qs0 = (signed char)*oq0 ^ 0x80;
|
||||
signed char qs1 = (signed char)*oq1 ^ 0x80;
|
||||
signed char qs2 = (signed char)*oq2 ^ 0x80;
|
||||
|
||||
/* add outer taps if we have high edge variance */
|
||||
filter_value = vp8_signed_char_clamp(ps1 - qs1);
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
|
||||
filter_value &= mask;
|
||||
/* add outer taps if we have high edge variance */
|
||||
filter_value = vp8_signed_char_clamp(ps1 - qs1);
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
|
||||
filter_value &= mask;
|
||||
|
||||
Filter2 = filter_value;
|
||||
Filter2 &= hev;
|
||||
Filter2 = filter_value;
|
||||
Filter2 &= hev;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3 */
|
||||
Filter1 = vp8_signed_char_clamp(Filter2 + 4);
|
||||
Filter2 = vp8_signed_char_clamp(Filter2 + 3);
|
||||
Filter1 >>= 3;
|
||||
Filter2 >>= 3;
|
||||
qs0 = vp8_signed_char_clamp(qs0 - Filter1);
|
||||
ps0 = vp8_signed_char_clamp(ps0 + Filter2);
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3 */
|
||||
Filter1 = vp8_signed_char_clamp(Filter2 + 4);
|
||||
Filter2 = vp8_signed_char_clamp(Filter2 + 3);
|
||||
Filter1 >>= 3;
|
||||
Filter2 >>= 3;
|
||||
qs0 = vp8_signed_char_clamp(qs0 - Filter1);
|
||||
ps0 = vp8_signed_char_clamp(ps0 + Filter2);
|
||||
|
||||
/* only apply wider filter if not high edge variance */
|
||||
filter_value &= ~hev;
|
||||
Filter2 = filter_value;
|
||||
|
||||
/* only apply wider filter if not high edge variance */
|
||||
filter_value &= ~hev;
|
||||
Filter2 = filter_value;
|
||||
/* roughly 3/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
|
||||
s = vp8_signed_char_clamp(qs0 - u);
|
||||
*oq0 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps0 + u);
|
||||
*op0 = s ^ 0x80;
|
||||
|
||||
/* roughly 3/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
|
||||
s = vp8_signed_char_clamp(qs0 - u);
|
||||
*oq0 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps0 + u);
|
||||
*op0 = s ^ 0x80;
|
||||
/* roughly 2/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
|
||||
s = vp8_signed_char_clamp(qs1 - u);
|
||||
*oq1 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps1 + u);
|
||||
*op1 = s ^ 0x80;
|
||||
|
||||
/* roughly 2/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
|
||||
s = vp8_signed_char_clamp(qs1 - u);
|
||||
*oq1 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps1 + u);
|
||||
*op1 = s ^ 0x80;
|
||||
|
||||
/* roughly 1/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
|
||||
s = vp8_signed_char_clamp(qs2 - u);
|
||||
*oq2 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps2 + u);
|
||||
*op2 = s ^ 0x80;
|
||||
/* roughly 1/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
|
||||
s = vp8_signed_char_clamp(qs2 - u);
|
||||
*oq2 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps2 + u);
|
||||
*op2 = s ^ 0x80;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
signed char hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
void vp8_mbloop_filter_horizontal_edge_c(unsigned char *s, int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count) {
|
||||
signed char hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do {
|
||||
mask = vp8_filter_mask(limit[0], blimit[0], s[-4 * p], s[-3 * p], s[-2 * p],
|
||||
s[-1 * p], s[0 * p], s[1 * p], s[2 * p], s[3 * p]);
|
||||
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||
hev = vp8_hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
|
||||
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
|
||||
|
||||
++s;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p,
|
||||
s + 2 * p);
|
||||
|
||||
++s;
|
||||
} while (++i < count * 8);
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_vertical_edge_c(unsigned char *s, int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh, int count) {
|
||||
signed char hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
void vp8_mbloop_filter_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
signed char hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
do {
|
||||
mask = vp8_filter_mask(limit[0], blimit[0], s[-4], s[-3], s[-2], s[-1],
|
||||
s[0], s[1], s[2], s[3]);
|
||||
|
||||
do
|
||||
{
|
||||
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||
|
||||
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
|
||||
|
||||
s += p;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
|
||||
|
||||
s += p;
|
||||
} while (++i < count * 8);
|
||||
}
|
||||
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
/* Why does this cause problems for win32?
|
||||
* error C2143: syntax error : missing ';' before 'type'
|
||||
* (void) limit;
|
||||
*/
|
||||
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
|
||||
return mask;
|
||||
static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0,
|
||||
uc q1) {
|
||||
/* Why does this cause problems for win32?
|
||||
* error C2143: syntax error : missing ';' before 'type'
|
||||
* (void) limit;
|
||||
*/
|
||||
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
|
||||
return mask;
|
||||
}
|
||||
|
||||
static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
|
||||
{
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char p1 = (signed char) * op1 ^ 0x80;
|
||||
signed char p0 = (signed char) * op0 ^ 0x80;
|
||||
signed char q0 = (signed char) * oq0 ^ 0x80;
|
||||
signed char q1 = (signed char) * oq1 ^ 0x80;
|
||||
signed char u;
|
||||
static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0,
|
||||
uc *oq1) {
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char p1 = (signed char)*op1 ^ 0x80;
|
||||
signed char p0 = (signed char)*op0 ^ 0x80;
|
||||
signed char q0 = (signed char)*oq0 ^ 0x80;
|
||||
signed char q1 = (signed char)*oq1 ^ 0x80;
|
||||
signed char u;
|
||||
|
||||
filter_value = vp8_signed_char_clamp(p1 - q1);
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0));
|
||||
filter_value &= mask;
|
||||
filter_value = vp8_signed_char_clamp(p1 - q1);
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0));
|
||||
filter_value &= mask;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3 */
|
||||
Filter1 = vp8_signed_char_clamp(filter_value + 4);
|
||||
Filter1 >>= 3;
|
||||
u = vp8_signed_char_clamp(q0 - Filter1);
|
||||
*oq0 = u ^ 0x80;
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3 */
|
||||
Filter1 = vp8_signed_char_clamp(filter_value + 4);
|
||||
Filter1 >>= 3;
|
||||
u = vp8_signed_char_clamp(q0 - Filter1);
|
||||
*oq0 = u ^ 0x80;
|
||||
|
||||
Filter2 = vp8_signed_char_clamp(filter_value + 3);
|
||||
Filter2 >>= 3;
|
||||
u = vp8_signed_char_clamp(p0 + Filter2);
|
||||
*op0 = u ^ 0x80;
|
||||
Filter2 = vp8_signed_char_clamp(filter_value + 3);
|
||||
Filter2 >>= 3;
|
||||
u = vp8_signed_char_clamp(p0 + Filter2);
|
||||
*op0 = u ^ 0x80;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit
|
||||
)
|
||||
{
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *s, int p,
|
||||
const unsigned char *blimit) {
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
++s;
|
||||
}
|
||||
while (++i < 16);
|
||||
do {
|
||||
mask = vp8_simple_filter_mask(blimit[0], s[-2 * p], s[-1 * p], s[0 * p],
|
||||
s[1 * p]);
|
||||
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
++s;
|
||||
} while (++i < 16);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit
|
||||
)
|
||||
{
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
|
||||
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
|
||||
s += p;
|
||||
}
|
||||
while (++i < 16);
|
||||
void vp8_loop_filter_simple_vertical_edge_c(unsigned char *s, int p,
|
||||
const unsigned char *blimit) {
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
do {
|
||||
mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
|
||||
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
|
||||
s += p;
|
||||
} while (++i < 16);
|
||||
}
|
||||
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
loop_filter_info *lfi) {
|
||||
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
loop_filter_info *lfi) {
|
||||
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
loop_filter_info *lfi) {
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim,
|
||||
lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim,
|
||||
lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim,
|
||||
lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride,
|
||||
lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride,
|
||||
lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride,
|
||||
blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride,
|
||||
blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride,
|
||||
blimit);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
loop_filter_info *lfi) {
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
|
@ -8,61 +8,50 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "blockd.h"
|
||||
|
||||
void vp8_setup_block_dptrs(MACROBLOCKD *x)
|
||||
{
|
||||
int r, c;
|
||||
void vp8_setup_block_dptrs(MACROBLOCKD *x) {
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4;
|
||||
}
|
||||
for (r = 0; r < 4; r++) {
|
||||
for (c = 0; c < 4; c++) {
|
||||
x->block[r * 4 + c].predictor = x->predictor + r * 4 * 16 + c * 4;
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 0; r < 2; r++)
|
||||
{
|
||||
for (c = 0; c < 2; c++)
|
||||
{
|
||||
x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4;
|
||||
|
||||
}
|
||||
for (r = 0; r < 2; r++) {
|
||||
for (c = 0; c < 2; c++) {
|
||||
x->block[16 + r * 2 + c].predictor =
|
||||
x->predictor + 256 + r * 4 * 8 + c * 4;
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 0; r < 2; r++)
|
||||
{
|
||||
for (c = 0; c < 2; c++)
|
||||
{
|
||||
x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4;
|
||||
|
||||
}
|
||||
for (r = 0; r < 2; r++) {
|
||||
for (c = 0; c < 2; c++) {
|
||||
x->block[20 + r * 2 + c].predictor =
|
||||
x->predictor + 320 + r * 4 * 8 + c * 4;
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 0; r < 25; r++)
|
||||
{
|
||||
x->block[r].qcoeff = x->qcoeff + r * 16;
|
||||
x->block[r].dqcoeff = x->dqcoeff + r * 16;
|
||||
x->block[r].eob = x->eobs + r;
|
||||
}
|
||||
for (r = 0; r < 25; r++) {
|
||||
x->block[r].qcoeff = x->qcoeff + r * 16;
|
||||
x->block[r].dqcoeff = x->dqcoeff + r * 16;
|
||||
x->block[r].eob = x->eobs + r;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_build_block_doffsets(MACROBLOCKD *x)
|
||||
{
|
||||
int block;
|
||||
void vp8_build_block_doffsets(MACROBLOCKD *x) {
|
||||
int block;
|
||||
|
||||
for (block = 0; block < 16; block++) /* y blocks */
|
||||
{
|
||||
x->block[block].offset =
|
||||
(block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4;
|
||||
}
|
||||
for (block = 0; block < 16; block++) /* y blocks */
|
||||
{
|
||||
x->block[block].offset =
|
||||
(block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4;
|
||||
}
|
||||
|
||||
for (block = 16; block < 20; block++) /* U and V blocks */
|
||||
{
|
||||
x->block[block+4].offset =
|
||||
x->block[block].offset =
|
||||
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4;
|
||||
}
|
||||
for (block = 16; block < 20; block++) /* U and V blocks */
|
||||
{
|
||||
x->block[block + 4].offset = x->block[block].offset =
|
||||
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4;
|
||||
}
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/* MFQE: Multiframe Quality Enhancement
|
||||
* In rate limited situations keyframes may cause significant visual artifacts
|
||||
* commonly referred to as "popping." This file implements a postproccesing
|
||||
@ -28,359 +27,299 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
static void filter_by_weight(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride,
|
||||
int block_size, int src_weight)
|
||||
{
|
||||
int dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
int rounding_bit = 1 << (MFQE_PRECISION - 1);
|
||||
int r, c;
|
||||
unsigned char *dst, int dst_stride, int block_size,
|
||||
int src_weight) {
|
||||
int dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
int rounding_bit = 1 << (MFQE_PRECISION - 1);
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < block_size; r++)
|
||||
{
|
||||
for (c = 0; c < block_size; c++)
|
||||
{
|
||||
dst[c] = (src[c] * src_weight +
|
||||
dst[c] * dst_weight +
|
||||
rounding_bit) >> MFQE_PRECISION;
|
||||
}
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
for (r = 0; r < block_size; r++) {
|
||||
for (c = 0; c < block_size; c++) {
|
||||
dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit) >>
|
||||
MFQE_PRECISION;
|
||||
}
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride,
|
||||
int src_weight)
|
||||
{
|
||||
filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
|
||||
int src_weight) {
|
||||
filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride,
|
||||
int src_weight)
|
||||
{
|
||||
filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
|
||||
int src_weight) {
|
||||
filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride,
|
||||
int src_weight)
|
||||
{
|
||||
filter_by_weight(src, src_stride, dst, dst_stride, 4, src_weight);
|
||||
int src_weight) {
|
||||
filter_by_weight(src, src_stride, dst, dst_stride, 4, src_weight);
|
||||
}
|
||||
|
||||
static void apply_ifactor(unsigned char *y_src,
|
||||
int y_src_stride,
|
||||
unsigned char *y_dst,
|
||||
int y_dst_stride,
|
||||
unsigned char *u_src,
|
||||
unsigned char *v_src,
|
||||
int uv_src_stride,
|
||||
unsigned char *u_dst,
|
||||
unsigned char *v_dst,
|
||||
int uv_dst_stride,
|
||||
int block_size,
|
||||
int src_weight)
|
||||
{
|
||||
if (block_size == 16)
|
||||
{
|
||||
vp8_filter_by_weight16x16(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
|
||||
vp8_filter_by_weight8x8(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
|
||||
vp8_filter_by_weight8x8(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
|
||||
}
|
||||
else /* if (block_size == 8) */
|
||||
{
|
||||
vp8_filter_by_weight8x8(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
|
||||
vp8_filter_by_weight4x4(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
|
||||
vp8_filter_by_weight4x4(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
|
||||
}
|
||||
static void apply_ifactor(unsigned char *y_src, int y_src_stride,
|
||||
unsigned char *y_dst, int y_dst_stride,
|
||||
unsigned char *u_src, unsigned char *v_src,
|
||||
int uv_src_stride, unsigned char *u_dst,
|
||||
unsigned char *v_dst, int uv_dst_stride,
|
||||
int block_size, int src_weight) {
|
||||
if (block_size == 16) {
|
||||
vp8_filter_by_weight16x16(y_src, y_src_stride, y_dst, y_dst_stride,
|
||||
src_weight);
|
||||
vp8_filter_by_weight8x8(u_src, uv_src_stride, u_dst, uv_dst_stride,
|
||||
src_weight);
|
||||
vp8_filter_by_weight8x8(v_src, uv_src_stride, v_dst, uv_dst_stride,
|
||||
src_weight);
|
||||
} else /* if (block_size == 8) */
|
||||
{
|
||||
vp8_filter_by_weight8x8(y_src, y_src_stride, y_dst, y_dst_stride,
|
||||
src_weight);
|
||||
vp8_filter_by_weight4x4(u_src, uv_src_stride, u_dst, uv_dst_stride,
|
||||
src_weight);
|
||||
vp8_filter_by_weight4x4(v_src, uv_src_stride, v_dst, uv_dst_stride,
|
||||
src_weight);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int int_sqrt(unsigned int x)
|
||||
{
|
||||
unsigned int y = x;
|
||||
unsigned int guess;
|
||||
int p = 1;
|
||||
while (y>>=1) p++;
|
||||
p>>=1;
|
||||
static unsigned int int_sqrt(unsigned int x) {
|
||||
unsigned int y = x;
|
||||
unsigned int guess;
|
||||
int p = 1;
|
||||
while (y >>= 1) p++;
|
||||
p >>= 1;
|
||||
|
||||
guess=0;
|
||||
while (p>=0)
|
||||
{
|
||||
guess |= (1<<p);
|
||||
if (x<guess*guess)
|
||||
guess -= (1<<p);
|
||||
p--;
|
||||
}
|
||||
/* choose between guess or guess+1 */
|
||||
return guess+(guess*guess+guess+1<=x);
|
||||
guess = 0;
|
||||
while (p >= 0) {
|
||||
guess |= (1 << p);
|
||||
if (x < guess * guess) guess -= (1 << p);
|
||||
p--;
|
||||
}
|
||||
/* choose between guess or guess+1 */
|
||||
return guess + (guess * guess + guess + 1 <= x);
|
||||
}
|
||||
|
||||
#define USE_SSD
|
||||
static void multiframe_quality_enhance_block
|
||||
(
|
||||
static void multiframe_quality_enhance_block(
|
||||
int blksize, /* Currently only values supported are 16, 8 */
|
||||
int qcurr,
|
||||
int qprev,
|
||||
unsigned char *y,
|
||||
unsigned char *u,
|
||||
unsigned char *v,
|
||||
int y_stride,
|
||||
int uv_stride,
|
||||
unsigned char *yd,
|
||||
unsigned char *ud,
|
||||
unsigned char *vd,
|
||||
int yd_stride,
|
||||
int uvd_stride
|
||||
)
|
||||
{
|
||||
static const unsigned char VP8_ZEROS[16]=
|
||||
int qcurr, int qprev, unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int y_stride, int uv_stride, unsigned char *yd, unsigned char *ud,
|
||||
unsigned char *vd, int yd_stride, int uvd_stride) {
|
||||
static const unsigned char VP8_ZEROS[16] = { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
int uvblksize = blksize >> 1;
|
||||
int qdiff = qcurr - qprev;
|
||||
|
||||
int i;
|
||||
unsigned char *up;
|
||||
unsigned char *udp;
|
||||
unsigned char *vp;
|
||||
unsigned char *vdp;
|
||||
|
||||
unsigned int act, actd, sad, usad, vsad, sse, thr, thrsq, actrisk;
|
||||
|
||||
if (blksize == 16) {
|
||||
actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse) + 128) >> 8;
|
||||
act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse) + 128) >> 8;
|
||||
#ifdef USE_SSD
|
||||
vpx_variance16x16(y, y_stride, yd, yd_stride, &sse);
|
||||
sad = (sse + 128) >> 8;
|
||||
vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
|
||||
usad = (sse + 32) >> 6;
|
||||
vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vsad = (sse + 32) >> 6;
|
||||
#else
|
||||
sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
|
||||
usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6;
|
||||
vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride) + 32) >> 6;
|
||||
#endif
|
||||
} else /* if (blksize == 8) */
|
||||
{
|
||||
actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse) + 32) >> 6;
|
||||
act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse) + 32) >> 6;
|
||||
#ifdef USE_SSD
|
||||
vpx_variance8x8(y, y_stride, yd, yd_stride, &sse);
|
||||
sad = (sse + 32) >> 6;
|
||||
vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
|
||||
usad = (sse + 8) >> 4;
|
||||
vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vsad = (sse + 8) >> 4;
|
||||
#else
|
||||
sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
|
||||
usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4;
|
||||
vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4;
|
||||
#endif
|
||||
}
|
||||
|
||||
actrisk = (actd > act * 5);
|
||||
|
||||
/* thr = qdiff/16 + log2(act) + log4(qprev) */
|
||||
thr = (qdiff >> 4);
|
||||
while (actd >>= 1) thr++;
|
||||
while (qprev >>= 2) thr++;
|
||||
|
||||
#ifdef USE_SSD
|
||||
thrsq = thr * thr;
|
||||
if (sad < thrsq &&
|
||||
/* additional checks for color mismatch and excessive addition of
|
||||
* high-frequencies */
|
||||
4 * usad < thrsq && 4 * vsad < thrsq && !actrisk)
|
||||
#else
|
||||
if (sad < thr &&
|
||||
/* additional checks for color mismatch and excessive addition of
|
||||
* high-frequencies */
|
||||
2 * usad < thr && 2 * vsad < thr && !actrisk)
|
||||
#endif
|
||||
{
|
||||
int ifactor;
|
||||
#ifdef USE_SSD
|
||||
/* TODO: optimize this later to not need sqr root */
|
||||
sad = int_sqrt(sad);
|
||||
#endif
|
||||
ifactor = (sad << MFQE_PRECISION) / thr;
|
||||
ifactor >>= (qdiff >> 5);
|
||||
|
||||
if (ifactor) {
|
||||
apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd,
|
||||
uvd_stride, blksize, ifactor);
|
||||
}
|
||||
} else /* else implicitly copy from previous frame */
|
||||
{
|
||||
if (blksize == 16) {
|
||||
vp8_copy_mem16x16(y, y_stride, yd, yd_stride);
|
||||
vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride);
|
||||
vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride);
|
||||
} else /* if (blksize == 8) */
|
||||
{
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||
vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
|
||||
for (up = u, udp = ud, i = 0; i < uvblksize;
|
||||
++i, up += uv_stride, udp += uvd_stride)
|
||||
memcpy(udp, up, uvblksize);
|
||||
for (vp = v, vdp = vd, i = 0; i < uvblksize;
|
||||
++i, vp += uv_stride, vdp += uvd_stride)
|
||||
memcpy(vdp, vp, uvblksize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int qualify_inter_mb(const MODE_INFO *mode_info_context, int *map) {
|
||||
if (mode_info_context->mbmi.mb_skip_coeff)
|
||||
map[0] = map[1] = map[2] = map[3] = 1;
|
||||
else if (mode_info_context->mbmi.mode == SPLITMV) {
|
||||
static int ndx[4][4] = {
|
||||
{ 0, 1, 4, 5 }, { 2, 3, 6, 7 }, { 8, 9, 12, 13 }, { 10, 11, 14, 15 }
|
||||
};
|
||||
int uvblksize = blksize >> 1;
|
||||
int qdiff = qcurr - qprev;
|
||||
|
||||
int i;
|
||||
unsigned char *up;
|
||||
unsigned char *udp;
|
||||
unsigned char *vp;
|
||||
unsigned char *vdp;
|
||||
|
||||
unsigned int act, actd, sad, usad, vsad, sse, thr, thrsq, actrisk;
|
||||
|
||||
if (blksize == 16)
|
||||
{
|
||||
actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
#ifdef USE_SSD
|
||||
vpx_variance16x16(y, y_stride, yd, yd_stride, &sse);
|
||||
sad = (sse + 128)>>8;
|
||||
vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
|
||||
usad = (sse + 32)>>6;
|
||||
vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vsad = (sse + 32)>>6;
|
||||
#else
|
||||
sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
|
||||
usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6;
|
||||
vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride)+ 32) >> 6;
|
||||
#endif
|
||||
}
|
||||
else /* if (blksize == 8) */
|
||||
{
|
||||
actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
#ifdef USE_SSD
|
||||
vpx_variance8x8(y, y_stride, yd, yd_stride, &sse);
|
||||
sad = (sse + 32)>>6;
|
||||
vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
|
||||
usad = (sse + 8)>>4;
|
||||
vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vsad = (sse + 8)>>4;
|
||||
#else
|
||||
sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
|
||||
usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4;
|
||||
vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4;
|
||||
#endif
|
||||
}
|
||||
|
||||
actrisk = (actd > act * 5);
|
||||
|
||||
/* thr = qdiff/16 + log2(act) + log4(qprev) */
|
||||
thr = (qdiff >> 4);
|
||||
while (actd >>= 1) thr++;
|
||||
while (qprev >>= 2) thr++;
|
||||
|
||||
#ifdef USE_SSD
|
||||
thrsq = thr * thr;
|
||||
if (sad < thrsq &&
|
||||
/* additional checks for color mismatch and excessive addition of
|
||||
* high-frequencies */
|
||||
4 * usad < thrsq && 4 * vsad < thrsq && !actrisk)
|
||||
#else
|
||||
if (sad < thr &&
|
||||
/* additional checks for color mismatch and excessive addition of
|
||||
* high-frequencies */
|
||||
2 * usad < thr && 2 * vsad < thr && !actrisk)
|
||||
#endif
|
||||
{
|
||||
int ifactor;
|
||||
#ifdef USE_SSD
|
||||
/* TODO: optimize this later to not need sqr root */
|
||||
sad = int_sqrt(sad);
|
||||
#endif
|
||||
ifactor = (sad << MFQE_PRECISION) / thr;
|
||||
ifactor >>= (qdiff >> 5);
|
||||
|
||||
if (ifactor)
|
||||
{
|
||||
apply_ifactor(y, y_stride, yd, yd_stride,
|
||||
u, v, uv_stride,
|
||||
ud, vd, uvd_stride,
|
||||
blksize, ifactor);
|
||||
}
|
||||
}
|
||||
else /* else implicitly copy from previous frame */
|
||||
{
|
||||
if (blksize == 16)
|
||||
{
|
||||
vp8_copy_mem16x16(y, y_stride, yd, yd_stride);
|
||||
vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride);
|
||||
vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride);
|
||||
}
|
||||
else /* if (blksize == 8) */
|
||||
{
|
||||
vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
|
||||
for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride)
|
||||
memcpy(udp, up, uvblksize);
|
||||
for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride)
|
||||
memcpy(vdp, vp, uvblksize);
|
||||
}
|
||||
int i, j;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
map[i] = 1;
|
||||
for (j = 0; j < 4 && map[j]; ++j)
|
||||
map[i] &= (mode_info_context->bmi[ndx[i][j]].mv.as_mv.row <= 2 &&
|
||||
mode_info_context->bmi[ndx[i][j]].mv.as_mv.col <= 2);
|
||||
}
|
||||
} else {
|
||||
map[0] = map[1] = map[2] = map[3] =
|
||||
(mode_info_context->mbmi.mode > B_PRED &&
|
||||
abs(mode_info_context->mbmi.mv.as_mv.row) <= 2 &&
|
||||
abs(mode_info_context->mbmi.mv.as_mv.col) <= 2);
|
||||
}
|
||||
return (map[0] + map[1] + map[2] + map[3]);
|
||||
}
|
||||
|
||||
static int qualify_inter_mb(const MODE_INFO *mode_info_context, int *map)
|
||||
{
|
||||
if (mode_info_context->mbmi.mb_skip_coeff)
|
||||
map[0] = map[1] = map[2] = map[3] = 1;
|
||||
else if (mode_info_context->mbmi.mode==SPLITMV)
|
||||
{
|
||||
static int ndx[4][4] =
|
||||
{
|
||||
{0, 1, 4, 5},
|
||||
{2, 3, 6, 7},
|
||||
{8, 9, 12, 13},
|
||||
{10, 11, 14, 15}
|
||||
};
|
||||
int i, j;
|
||||
for (i=0; i<4; ++i)
|
||||
{
|
||||
map[i] = 1;
|
||||
for (j=0; j<4 && map[j]; ++j)
|
||||
map[i] &= (mode_info_context->bmi[ndx[i][j]].mv.as_mv.row <= 2 &&
|
||||
mode_info_context->bmi[ndx[i][j]].mv.as_mv.col <= 2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
map[0] = map[1] = map[2] = map[3] =
|
||||
(mode_info_context->mbmi.mode > B_PRED &&
|
||||
abs(mode_info_context->mbmi.mv.as_mv.row) <= 2 &&
|
||||
abs(mode_info_context->mbmi.mv.as_mv.col) <= 2);
|
||||
}
|
||||
return (map[0]+map[1]+map[2]+map[3]);
|
||||
}
|
||||
void vp8_multiframe_quality_enhance(VP8_COMMON *cm) {
|
||||
YV12_BUFFER_CONFIG *show = cm->frame_to_show;
|
||||
YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
|
||||
|
||||
void vp8_multiframe_quality_enhance
|
||||
(
|
||||
VP8_COMMON *cm
|
||||
)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *show = cm->frame_to_show;
|
||||
YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
/* Point at base of Mb MODE_INFO list has motion vectors etc */
|
||||
const MODE_INFO *mode_info_context = cm->show_frame_mi;
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
int totmap, map[4];
|
||||
int qcurr = cm->base_qindex;
|
||||
int qprev = cm->postproc_state.last_base_qindex;
|
||||
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
/* Point at base of Mb MODE_INFO list has motion vectors etc */
|
||||
const MODE_INFO *mode_info_context = cm->show_frame_mi;
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
int totmap, map[4];
|
||||
int qcurr = cm->base_qindex;
|
||||
int qprev = cm->postproc_state.last_base_qindex;
|
||||
unsigned char *y_ptr, *u_ptr, *v_ptr;
|
||||
unsigned char *yd_ptr, *ud_ptr, *vd_ptr;
|
||||
|
||||
unsigned char *y_ptr, *u_ptr, *v_ptr;
|
||||
unsigned char *yd_ptr, *ud_ptr, *vd_ptr;
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = show->y_buffer;
|
||||
u_ptr = show->u_buffer;
|
||||
v_ptr = show->v_buffer;
|
||||
yd_ptr = dest->y_buffer;
|
||||
ud_ptr = dest->u_buffer;
|
||||
vd_ptr = dest->v_buffer;
|
||||
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = show->y_buffer;
|
||||
u_ptr = show->u_buffer;
|
||||
v_ptr = show->v_buffer;
|
||||
yd_ptr = dest->y_buffer;
|
||||
ud_ptr = dest->u_buffer;
|
||||
vd_ptr = dest->v_buffer;
|
||||
|
||||
/* postprocess each macro block */
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
/* if motion is high there will likely be no benefit */
|
||||
if (frame_type == INTER_FRAME) totmap = qualify_inter_mb(mode_info_context, map);
|
||||
else totmap = (frame_type == KEY_FRAME ? 4 : 0);
|
||||
if (totmap)
|
||||
{
|
||||
if (totmap < 4)
|
||||
{
|
||||
int i, j;
|
||||
for (i=0; i<2; ++i)
|
||||
for (j=0; j<2; ++j)
|
||||
{
|
||||
if (map[i*2+j])
|
||||
{
|
||||
multiframe_quality_enhance_block(8, qcurr, qprev,
|
||||
y_ptr + 8*(i*show->y_stride+j),
|
||||
u_ptr + 4*(i*show->uv_stride+j),
|
||||
v_ptr + 4*(i*show->uv_stride+j),
|
||||
show->y_stride,
|
||||
show->uv_stride,
|
||||
yd_ptr + 8*(i*dest->y_stride+j),
|
||||
ud_ptr + 4*(i*dest->uv_stride+j),
|
||||
vd_ptr + 4*(i*dest->uv_stride+j),
|
||||
dest->y_stride,
|
||||
dest->uv_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* copy a 8x8 block */
|
||||
int k;
|
||||
unsigned char *up = u_ptr + 4*(i*show->uv_stride+j);
|
||||
unsigned char *udp = ud_ptr + 4*(i*dest->uv_stride+j);
|
||||
unsigned char *vp = v_ptr + 4*(i*show->uv_stride+j);
|
||||
unsigned char *vdp = vd_ptr + 4*(i*dest->uv_stride+j);
|
||||
vp8_copy_mem8x8(y_ptr + 8*(i*show->y_stride+j), show->y_stride,
|
||||
yd_ptr + 8*(i*dest->y_stride+j), dest->y_stride);
|
||||
for (k = 0; k < 4; ++k, up += show->uv_stride, udp += dest->uv_stride,
|
||||
vp += show->uv_stride, vdp += dest->uv_stride)
|
||||
{
|
||||
memcpy(udp, up, 4);
|
||||
memcpy(vdp, vp, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else /* totmap = 4 */
|
||||
{
|
||||
multiframe_quality_enhance_block(16, qcurr, qprev, y_ptr,
|
||||
u_ptr, v_ptr,
|
||||
show->y_stride,
|
||||
show->uv_stride,
|
||||
yd_ptr, ud_ptr, vd_ptr,
|
||||
dest->y_stride,
|
||||
dest->uv_stride);
|
||||
/* postprocess each macro block */
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
|
||||
/* if motion is high there will likely be no benefit */
|
||||
if (frame_type == INTER_FRAME)
|
||||
totmap = qualify_inter_mb(mode_info_context, map);
|
||||
else
|
||||
totmap = (frame_type == KEY_FRAME ? 4 : 0);
|
||||
if (totmap) {
|
||||
if (totmap < 4) {
|
||||
int i, j;
|
||||
for (i = 0; i < 2; ++i)
|
||||
for (j = 0; j < 2; ++j) {
|
||||
if (map[i * 2 + j]) {
|
||||
multiframe_quality_enhance_block(
|
||||
8, qcurr, qprev, y_ptr + 8 * (i * show->y_stride + j),
|
||||
u_ptr + 4 * (i * show->uv_stride + j),
|
||||
v_ptr + 4 * (i * show->uv_stride + j), show->y_stride,
|
||||
show->uv_stride, yd_ptr + 8 * (i * dest->y_stride + j),
|
||||
ud_ptr + 4 * (i * dest->uv_stride + j),
|
||||
vd_ptr + 4 * (i * dest->uv_stride + j), dest->y_stride,
|
||||
dest->uv_stride);
|
||||
} else {
|
||||
/* copy a 8x8 block */
|
||||
int k;
|
||||
unsigned char *up = u_ptr + 4 * (i * show->uv_stride + j);
|
||||
unsigned char *udp = ud_ptr + 4 * (i * dest->uv_stride + j);
|
||||
unsigned char *vp = v_ptr + 4 * (i * show->uv_stride + j);
|
||||
unsigned char *vdp = vd_ptr + 4 * (i * dest->uv_stride + j);
|
||||
vp8_copy_mem8x8(
|
||||
y_ptr + 8 * (i * show->y_stride + j), show->y_stride,
|
||||
yd_ptr + 8 * (i * dest->y_stride + j), dest->y_stride);
|
||||
for (k = 0; k < 4; ++k, up += show->uv_stride,
|
||||
udp += dest->uv_stride, vp += show->uv_stride,
|
||||
vdp += dest->uv_stride) {
|
||||
memcpy(udp, up, 4);
|
||||
memcpy(vdp, vp, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
|
||||
vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
|
||||
vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
|
||||
}
|
||||
y_ptr += 16;
|
||||
u_ptr += 8;
|
||||
v_ptr += 8;
|
||||
yd_ptr += 16;
|
||||
ud_ptr += 8;
|
||||
vd_ptr += 8;
|
||||
mode_info_context++; /* step to next MB */
|
||||
} else /* totmap = 4 */
|
||||
{
|
||||
multiframe_quality_enhance_block(
|
||||
16, qcurr, qprev, y_ptr, u_ptr, v_ptr, show->y_stride,
|
||||
show->uv_stride, yd_ptr, ud_ptr, vd_ptr, dest->y_stride,
|
||||
dest->uv_stride);
|
||||
}
|
||||
|
||||
y_ptr += show->y_stride * 16 - 16 * cm->mb_cols;
|
||||
u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols;
|
||||
ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
|
||||
mode_info_context++; /* Skip border mb */
|
||||
} else {
|
||||
vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
|
||||
vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
|
||||
vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
|
||||
}
|
||||
y_ptr += 16;
|
||||
u_ptr += 8;
|
||||
v_ptr += 8;
|
||||
yd_ptr += 16;
|
||||
ud_ptr += 8;
|
||||
vd_ptr += 8;
|
||||
mode_info_context++; /* step to next MB */
|
||||
}
|
||||
|
||||
y_ptr += show->y_stride * 16 - 16 * cm->mb_cols;
|
||||
u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols;
|
||||
ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
|
||||
mode_info_context++; /* Skip border mb */
|
||||
}
|
||||
}
|
||||
|
@ -8,26 +8,22 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#if HAVE_DSPR2
|
||||
void vp8_dequant_idct_add_dspr2(short *input, short *dq,
|
||||
unsigned char *dest, int stride)
|
||||
{
|
||||
int i;
|
||||
void vp8_dequant_idct_add_dspr2(short *input, short *dq, unsigned char *dest,
|
||||
int stride) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
input[i] = dq[i] * input[i];
|
||||
}
|
||||
for (i = 0; i < 16; i++) {
|
||||
input[i] = dq[i] * input[i];
|
||||
}
|
||||
|
||||
vp8_short_idct4x4llm_dspr2(input, dest, stride, dest, stride);
|
||||
|
||||
memset(input, 0, 32);
|
||||
vp8_short_idct4x4llm_dspr2(input, dest, stride, dest, stride);
|
||||
|
||||
memset(input, 0, 32);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -13,76 +13,64 @@
|
||||
|
||||
#if HAVE_DSPR2
|
||||
|
||||
void vp8_dequant_idct_add_y_block_dspr2
|
||||
(short *q, short *dq,
|
||||
unsigned char *dst, int stride, char *eobs)
|
||||
{
|
||||
int i, j;
|
||||
void vp8_dequant_idct_add_y_block_dspr2(short *q, short *dq, unsigned char *dst,
|
||||
int stride, char *eobs) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_dspr2(q, dq, dst, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dst, stride, dst, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_dspr2(q, dq, dst, stride);
|
||||
else {
|
||||
vp8_dc_only_idct_add_dspr2(q[0] * dq[0], dst, stride, dst, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dst += 4;
|
||||
}
|
||||
|
||||
dst += 4 * stride - 16;
|
||||
q += 16;
|
||||
dst += 4;
|
||||
}
|
||||
|
||||
dst += 4 * stride - 16;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_dspr2
|
||||
(short *q, short *dq,
|
||||
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
|
||||
{
|
||||
int i, j;
|
||||
void vp8_dequant_idct_add_uv_block_dspr2(short *q, short *dq,
|
||||
unsigned char *dstu,
|
||||
unsigned char *dstv, int stride,
|
||||
char *eobs) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_dspr2(q, dq, dstu, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_dspr2(q, dq, dstu, stride);
|
||||
else {
|
||||
vp8_dc_only_idct_add_dspr2(q[0] * dq[0], dstu, stride, dstu, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dstu += 4;
|
||||
}
|
||||
|
||||
dstu += 4 * stride - 8;
|
||||
q += 16;
|
||||
dstu += 4;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_dspr2(q, dq, dstv, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
dstu += 4 * stride - 8;
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dstv += 4;
|
||||
}
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_dspr2(q, dq, dstv, stride);
|
||||
else {
|
||||
vp8_dc_only_idct_add_dspr2(q[0] * dq[0], dstv, stride, dstv, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
dstv += 4 * stride - 8;
|
||||
q += 16;
|
||||
dstv += 4;
|
||||
}
|
||||
|
||||
dstv += 4 * stride - 8;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -28,342 +28,319 @@
|
||||
****************************************************************************/
|
||||
extern unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH];
|
||||
static const int cospi8sqrt2minus1 = 20091;
|
||||
static const int sinpi8sqrt2 = 35468;
|
||||
static const int sinpi8sqrt2 = 35468;
|
||||
|
||||
inline void prefetch_load_short(short *src)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"pref 0, 0(%[src]) \n\t"
|
||||
:
|
||||
: [src] "r" (src)
|
||||
);
|
||||
inline void prefetch_load_short(short *src) {
|
||||
__asm__ __volatile__("pref 0, 0(%[src]) \n\t" : : [src] "r"(src));
|
||||
}
|
||||
|
||||
void vp8_short_idct4x4llm_dspr2(short *input, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride)
|
||||
{
|
||||
int r, c;
|
||||
int a1, b1, c1, d1;
|
||||
short output[16];
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
int temp1, temp2;
|
||||
int shortpitch = 4;
|
||||
int dst_stride) {
|
||||
int r, c;
|
||||
int a1, b1, c1, d1;
|
||||
short output[16];
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
int temp1, temp2;
|
||||
int shortpitch = 4;
|
||||
|
||||
int c2, d2;
|
||||
int temp3, temp4;
|
||||
unsigned char *cm = ff_cropTbl + CROP_WIDTH;
|
||||
int c2, d2;
|
||||
int temp3, temp4;
|
||||
unsigned char *cm = ff_cropTbl + CROP_WIDTH;
|
||||
|
||||
/* prepare data for load */
|
||||
prefetch_load_short(ip + 8);
|
||||
/* prepare data for load */
|
||||
prefetch_load_short(ip + 8);
|
||||
|
||||
/* first loop is unrolled */
|
||||
a1 = ip[0] + ip[8];
|
||||
b1 = ip[0] - ip[8];
|
||||
/* first loop is unrolled */
|
||||
a1 = ip[0] + ip[8];
|
||||
b1 = ip[0] - ip[8];
|
||||
|
||||
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
temp3 = (ip[5] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
temp3 = (ip[5] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
|
||||
temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[13] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[13] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
|
||||
op[0] = a1 + d1;
|
||||
op[12] = a1 - d1;
|
||||
op[4] = b1 + c1;
|
||||
op[8] = b1 - c1;
|
||||
op[0] = a1 + d1;
|
||||
op[12] = a1 - d1;
|
||||
op[4] = b1 + c1;
|
||||
op[8] = b1 - c1;
|
||||
|
||||
a1 = ip[1] + ip[9];
|
||||
b1 = ip[1] - ip[9];
|
||||
a1 = ip[1] + ip[9];
|
||||
b1 = ip[1] - ip[9];
|
||||
|
||||
op[1] = a1 + d2;
|
||||
op[13] = a1 - d2;
|
||||
op[5] = b1 + c2;
|
||||
op[9] = b1 - c2;
|
||||
op[1] = a1 + d2;
|
||||
op[13] = a1 - d2;
|
||||
op[5] = b1 + c2;
|
||||
op[9] = b1 - c2;
|
||||
|
||||
a1 = ip[2] + ip[10];
|
||||
b1 = ip[2] - ip[10];
|
||||
a1 = ip[2] + ip[10];
|
||||
b1 = ip[2] - ip[10];
|
||||
|
||||
temp1 = (ip[6] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[14] + ((ip[14] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
temp1 = (ip[6] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[14] + ((ip[14] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[6] + ((ip[6] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[14] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
temp1 = ip[6] + ((ip[6] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[14] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
temp3 = (ip[7] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
temp3 = (ip[7] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
|
||||
temp3 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[15] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
temp3 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[15] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
|
||||
op[2] = a1 + d1;
|
||||
op[14] = a1 - d1;
|
||||
op[6] = b1 + c1;
|
||||
op[10] = b1 - c1;
|
||||
op[2] = a1 + d1;
|
||||
op[14] = a1 - d1;
|
||||
op[6] = b1 + c1;
|
||||
op[10] = b1 - c1;
|
||||
|
||||
a1 = ip[3] + ip[11];
|
||||
b1 = ip[3] - ip[11];
|
||||
a1 = ip[3] + ip[11];
|
||||
b1 = ip[3] - ip[11];
|
||||
|
||||
op[3] = a1 + d2;
|
||||
op[15] = a1 - d2;
|
||||
op[7] = b1 + c2;
|
||||
op[11] = b1 - c2;
|
||||
op[3] = a1 + d2;
|
||||
op[15] = a1 - d2;
|
||||
op[7] = b1 + c2;
|
||||
op[11] = b1 - c2;
|
||||
|
||||
ip = output;
|
||||
ip = output;
|
||||
|
||||
/* prepare data for load */
|
||||
prefetch_load_short(ip + shortpitch);
|
||||
/* prepare data for load */
|
||||
prefetch_load_short(ip + shortpitch);
|
||||
|
||||
/* second loop is unrolled */
|
||||
a1 = ip[0] + ip[2];
|
||||
b1 = ip[0] - ip[2];
|
||||
/* second loop is unrolled */
|
||||
a1 = ip[0] + ip[2];
|
||||
b1 = ip[0] - ip[2];
|
||||
|
||||
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
temp3 = (ip[5] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
temp3 = (ip[5] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
|
||||
temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[7] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[7] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
|
||||
op[0] = (a1 + d1 + 4) >> 3;
|
||||
op[3] = (a1 - d1 + 4) >> 3;
|
||||
op[1] = (b1 + c1 + 4) >> 3;
|
||||
op[2] = (b1 - c1 + 4) >> 3;
|
||||
op[0] = (a1 + d1 + 4) >> 3;
|
||||
op[3] = (a1 - d1 + 4) >> 3;
|
||||
op[1] = (b1 + c1 + 4) >> 3;
|
||||
op[2] = (b1 - c1 + 4) >> 3;
|
||||
|
||||
a1 = ip[4] + ip[6];
|
||||
b1 = ip[4] - ip[6];
|
||||
a1 = ip[4] + ip[6];
|
||||
b1 = ip[4] - ip[6];
|
||||
|
||||
op[4] = (a1 + d2 + 4) >> 3;
|
||||
op[7] = (a1 - d2 + 4) >> 3;
|
||||
op[5] = (b1 + c2 + 4) >> 3;
|
||||
op[6] = (b1 - c2 + 4) >> 3;
|
||||
op[4] = (a1 + d2 + 4) >> 3;
|
||||
op[7] = (a1 - d2 + 4) >> 3;
|
||||
op[5] = (b1 + c2 + 4) >> 3;
|
||||
op[6] = (b1 - c2 + 4) >> 3;
|
||||
|
||||
a1 = ip[8] + ip[10];
|
||||
b1 = ip[8] - ip[10];
|
||||
a1 = ip[8] + ip[10];
|
||||
b1 = ip[8] - ip[10];
|
||||
|
||||
temp1 = (ip[9] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[11] + ((ip[11] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
temp1 = (ip[9] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[11] + ((ip[11] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[9] + ((ip[9] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[11] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
temp1 = ip[9] + ((ip[9] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[11] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
temp3 = (ip[13] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
temp3 = (ip[13] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
|
||||
temp3 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[15] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
temp3 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[15] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
|
||||
op[8] = (a1 + d1 + 4) >> 3;
|
||||
op[11] = (a1 - d1 + 4) >> 3;
|
||||
op[9] = (b1 + c1 + 4) >> 3;
|
||||
op[10] = (b1 - c1 + 4) >> 3;
|
||||
op[8] = (a1 + d1 + 4) >> 3;
|
||||
op[11] = (a1 - d1 + 4) >> 3;
|
||||
op[9] = (b1 + c1 + 4) >> 3;
|
||||
op[10] = (b1 - c1 + 4) >> 3;
|
||||
|
||||
a1 = ip[12] + ip[14];
|
||||
b1 = ip[12] - ip[14];
|
||||
a1 = ip[12] + ip[14];
|
||||
b1 = ip[12] - ip[14];
|
||||
|
||||
op[12] = (a1 + d2 + 4) >> 3;
|
||||
op[15] = (a1 - d2 + 4) >> 3;
|
||||
op[13] = (b1 + c2 + 4) >> 3;
|
||||
op[14] = (b1 - c2 + 4) >> 3;
|
||||
op[12] = (a1 + d2 + 4) >> 3;
|
||||
op[15] = (a1 - d2 + 4) >> 3;
|
||||
op[13] = (b1 + c2 + 4) >> 3;
|
||||
op[14] = (b1 - c2 + 4) >> 3;
|
||||
|
||||
ip = output;
|
||||
ip = output;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
short a = ip[c] + pred_ptr[c] ;
|
||||
dst_ptr[c] = cm[a] ;
|
||||
}
|
||||
|
||||
ip += 4;
|
||||
dst_ptr += dst_stride;
|
||||
pred_ptr += pred_stride;
|
||||
for (r = 0; r < 4; r++) {
|
||||
for (c = 0; c < 4; c++) {
|
||||
short a = ip[c] + pred_ptr[c];
|
||||
dst_ptr[c] = cm[a];
|
||||
}
|
||||
|
||||
ip += 4;
|
||||
dst_ptr += dst_stride;
|
||||
pred_ptr += pred_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dc_only_idct_add_dspr2(short input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride)
|
||||
{
|
||||
int a1;
|
||||
int i, absa1;
|
||||
int t2, vector_a1, vector_a;
|
||||
void vp8_dc_only_idct_add_dspr2(short input_dc, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride) {
|
||||
int a1;
|
||||
int i, absa1;
|
||||
int t2, vector_a1, vector_a;
|
||||
|
||||
/* a1 = ((input_dc + 4) >> 3); */
|
||||
__asm__ __volatile__ (
|
||||
"addi %[a1], %[input_dc], 4 \n\t"
|
||||
"sra %[a1], %[a1], 3 \n\t"
|
||||
: [a1] "=r" (a1)
|
||||
: [input_dc] "r" (input_dc)
|
||||
);
|
||||
/* a1 = ((input_dc + 4) >> 3); */
|
||||
__asm__ __volatile__(
|
||||
"addi %[a1], %[input_dc], 4 \n\t"
|
||||
"sra %[a1], %[a1], 3 \n\t"
|
||||
: [a1] "=r"(a1)
|
||||
: [input_dc] "r"(input_dc));
|
||||
|
||||
if (a1 < 0)
|
||||
{
|
||||
/* use quad-byte
|
||||
* input and output memory are four byte aligned
|
||||
*/
|
||||
__asm__ __volatile__ (
|
||||
"abs %[absa1], %[a1] \n\t"
|
||||
"replv.qb %[vector_a1], %[absa1] \n\t"
|
||||
: [absa1] "=r" (absa1), [vector_a1] "=r" (vector_a1)
|
||||
: [a1] "r" (a1)
|
||||
);
|
||||
if (a1 < 0) {
|
||||
/* use quad-byte
|
||||
* input and output memory are four byte aligned
|
||||
*/
|
||||
__asm__ __volatile__(
|
||||
"abs %[absa1], %[a1] \n\t"
|
||||
"replv.qb %[vector_a1], %[absa1] \n\t"
|
||||
: [absa1] "=r"(absa1), [vector_a1] "=r"(vector_a1)
|
||||
: [a1] "r"(a1));
|
||||
|
||||
/* use (a1 - predptr[c]) instead a1 + predptr[c] */
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"lw %[t2], 0(%[pred_ptr]) \n\t"
|
||||
"add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
|
||||
"subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t"
|
||||
"sw %[vector_a], 0(%[dst_ptr]) \n\t"
|
||||
"add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
|
||||
: [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),
|
||||
[dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr)
|
||||
: [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1)
|
||||
);
|
||||
}
|
||||
/* use (a1 - predptr[c]) instead a1 + predptr[c] */
|
||||
for (i = 4; i--;) {
|
||||
__asm__ __volatile__(
|
||||
"lw %[t2], 0(%[pred_ptr]) \n\t"
|
||||
"add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
|
||||
"subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t"
|
||||
"sw %[vector_a], 0(%[dst_ptr]) \n\t"
|
||||
"add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
|
||||
: [t2] "=&r"(t2), [vector_a] "=&r"(vector_a),
|
||||
[dst_ptr] "+&r"(dst_ptr), [pred_ptr] "+&r"(pred_ptr)
|
||||
: [dst_stride] "r"(dst_stride), [pred_stride] "r"(pred_stride),
|
||||
[vector_a1] "r"(vector_a1));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* use quad-byte
|
||||
* input and output memory are four byte aligned
|
||||
*/
|
||||
__asm__ __volatile__ (
|
||||
"replv.qb %[vector_a1], %[a1] \n\t"
|
||||
: [vector_a1] "=r" (vector_a1)
|
||||
: [a1] "r" (a1)
|
||||
);
|
||||
} else {
|
||||
/* use quad-byte
|
||||
* input and output memory are four byte aligned
|
||||
*/
|
||||
__asm__ __volatile__("replv.qb %[vector_a1], %[a1] \n\t"
|
||||
: [vector_a1] "=r"(vector_a1)
|
||||
: [a1] "r"(a1));
|
||||
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"lw %[t2], 0(%[pred_ptr]) \n\t"
|
||||
"add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
|
||||
"addu_s.qb %[vector_a], %[vector_a1], %[t2] \n\t"
|
||||
"sw %[vector_a], 0(%[dst_ptr]) \n\t"
|
||||
"add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
|
||||
: [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),
|
||||
[dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr)
|
||||
: [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1)
|
||||
);
|
||||
}
|
||||
for (i = 4; i--;) {
|
||||
__asm__ __volatile__(
|
||||
"lw %[t2], 0(%[pred_ptr]) \n\t"
|
||||
"add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
|
||||
"addu_s.qb %[vector_a], %[vector_a1], %[t2] \n\t"
|
||||
"sw %[vector_a], 0(%[dst_ptr]) \n\t"
|
||||
"add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
|
||||
: [t2] "=&r"(t2), [vector_a] "=&r"(vector_a),
|
||||
[dst_ptr] "+&r"(dst_ptr), [pred_ptr] "+&r"(pred_ptr)
|
||||
: [dst_stride] "r"(dst_stride), [pred_stride] "r"(pred_stride),
|
||||
[vector_a1] "r"(vector_a1));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_dspr2(short *input, short *mb_dqcoeff)
|
||||
{
|
||||
short output[16];
|
||||
int i;
|
||||
int a1, b1, c1, d1;
|
||||
int a2, b2, c2, d2;
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
void vp8_short_inv_walsh4x4_dspr2(short *input, short *mb_dqcoeff) {
|
||||
short output[16];
|
||||
int i;
|
||||
int a1, b1, c1, d1;
|
||||
int a2, b2, c2, d2;
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
|
||||
prefetch_load_short(ip);
|
||||
prefetch_load_short(ip);
|
||||
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
a1 = ip[0] + ip[12];
|
||||
b1 = ip[4] + ip[8];
|
||||
c1 = ip[4] - ip[8];
|
||||
d1 = ip[0] - ip[12];
|
||||
for (i = 4; i--;) {
|
||||
a1 = ip[0] + ip[12];
|
||||
b1 = ip[4] + ip[8];
|
||||
c1 = ip[4] - ip[8];
|
||||
d1 = ip[0] - ip[12];
|
||||
|
||||
op[0] = a1 + b1;
|
||||
op[4] = c1 + d1;
|
||||
op[8] = a1 - b1;
|
||||
op[12] = d1 - c1;
|
||||
op[0] = a1 + b1;
|
||||
op[4] = c1 + d1;
|
||||
op[8] = a1 - b1;
|
||||
op[12] = d1 - c1;
|
||||
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
op = output;
|
||||
ip = output;
|
||||
op = output;
|
||||
|
||||
prefetch_load_short(ip);
|
||||
prefetch_load_short(ip);
|
||||
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
a1 = ip[0] + ip[3] + 3;
|
||||
b1 = ip[1] + ip[2];
|
||||
c1 = ip[1] - ip[2];
|
||||
d1 = ip[0] - ip[3] + 3;
|
||||
for (i = 4; i--;) {
|
||||
a1 = ip[0] + ip[3] + 3;
|
||||
b1 = ip[1] + ip[2];
|
||||
c1 = ip[1] - ip[2];
|
||||
d1 = ip[0] - ip[3] + 3;
|
||||
|
||||
a2 = a1 + b1;
|
||||
b2 = d1 + c1;
|
||||
c2 = a1 - b1;
|
||||
d2 = d1 - c1;
|
||||
a2 = a1 + b1;
|
||||
b2 = d1 + c1;
|
||||
c2 = a1 - b1;
|
||||
d2 = d1 - c1;
|
||||
|
||||
op[0] = a2 >> 3;
|
||||
op[1] = b2 >> 3;
|
||||
op[2] = c2 >> 3;
|
||||
op[3] = d2 >> 3;
|
||||
op[0] = a2 >> 3;
|
||||
op[1] = b2 >> 3;
|
||||
op[2] = c2 >> 3;
|
||||
op[3] = d2 >> 3;
|
||||
|
||||
ip += 4;
|
||||
op += 4;
|
||||
}
|
||||
ip += 4;
|
||||
op += 4;
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
mb_dqcoeff[i * 16] = output[i];
|
||||
}
|
||||
for (i = 0; i < 16; i++) {
|
||||
mb_dqcoeff[i * 16] = output[i];
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_1_dspr2(short *input, short *mb_dqcoeff)
|
||||
{
|
||||
int a1;
|
||||
void vp8_short_inv_walsh4x4_1_dspr2(short *input, short *mb_dqcoeff) {
|
||||
int a1;
|
||||
|
||||
a1 = ((input[0] + 3) >> 3);
|
||||
a1 = ((input[0] + 3) >> 3);
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sh %[a1], 0(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 32(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 64(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 96(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 128(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 160(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 192(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 224(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 256(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 288(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 320(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 352(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 384(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 416(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 448(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 480(%[mb_dqcoeff]) \n\t"
|
||||
__asm__ __volatile__(
|
||||
"sh %[a1], 0(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 32(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 64(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 96(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 128(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 160(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 192(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 224(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 256(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 288(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 320(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 352(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 384(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 416(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 448(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 480(%[mb_dqcoeff]) \n\t"
|
||||
|
||||
:
|
||||
: [a1] "r" (a1), [mb_dqcoeff] "r" (mb_dqcoeff)
|
||||
);
|
||||
:
|
||||
: [a1] "r"(a1), [mb_dqcoeff] "r"(mb_dqcoeff));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -8,114 +8,90 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#if HAVE_DSPR2
|
||||
inline void prefetch_load_int(unsigned char *src)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"pref 0, 0(%[src]) \n\t"
|
||||
:
|
||||
: [src] "r" (src)
|
||||
);
|
||||
inline void prefetch_load_int(unsigned char *src) {
|
||||
__asm__ __volatile__("pref 0, 0(%[src]) \n\t" : : [src] "r"(src));
|
||||
}
|
||||
|
||||
__inline void vp8_copy_mem16x16_dspr2(unsigned char *RESTRICT src,
|
||||
int src_stride,
|
||||
unsigned char *RESTRICT dst,
|
||||
int dst_stride) {
|
||||
int r;
|
||||
unsigned int a0, a1, a2, a3;
|
||||
|
||||
__inline void vp8_copy_mem16x16_dspr2(
|
||||
unsigned char *RESTRICT src,
|
||||
int src_stride,
|
||||
unsigned char *RESTRICT dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
unsigned int a0, a1, a2, a3;
|
||||
|
||||
for (r = 16; r--;)
|
||||
{
|
||||
/* load src data in cache memory */
|
||||
prefetch_load_int(src + src_stride);
|
||||
|
||||
/* use unaligned memory load and store */
|
||||
__asm__ __volatile__ (
|
||||
"ulw %[a0], 0(%[src]) \n\t"
|
||||
"ulw %[a1], 4(%[src]) \n\t"
|
||||
"ulw %[a2], 8(%[src]) \n\t"
|
||||
"ulw %[a3], 12(%[src]) \n\t"
|
||||
"sw %[a0], 0(%[dst]) \n\t"
|
||||
"sw %[a1], 4(%[dst]) \n\t"
|
||||
"sw %[a2], 8(%[dst]) \n\t"
|
||||
"sw %[a3], 12(%[dst]) \n\t"
|
||||
: [a0] "=&r" (a0), [a1] "=&r" (a1),
|
||||
[a2] "=&r" (a2), [a3] "=&r" (a3)
|
||||
: [src] "r" (src), [dst] "r" (dst)
|
||||
);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__inline void vp8_copy_mem8x8_dspr2(
|
||||
unsigned char *RESTRICT src,
|
||||
int src_stride,
|
||||
unsigned char *RESTRICT dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
unsigned int a0, a1;
|
||||
|
||||
for (r = 16; r--;) {
|
||||
/* load src data in cache memory */
|
||||
prefetch_load_int(src + src_stride);
|
||||
|
||||
for (r = 8; r--;)
|
||||
{
|
||||
/* use unaligned memory load and store */
|
||||
__asm__ __volatile__ (
|
||||
"ulw %[a0], 0(%[src]) \n\t"
|
||||
"ulw %[a1], 4(%[src]) \n\t"
|
||||
"sw %[a0], 0(%[dst]) \n\t"
|
||||
"sw %[a1], 4(%[dst]) \n\t"
|
||||
: [a0] "=&r" (a0), [a1] "=&r" (a1)
|
||||
: [src] "r" (src), [dst] "r" (dst)
|
||||
);
|
||||
/* use unaligned memory load and store */
|
||||
__asm__ __volatile__(
|
||||
"ulw %[a0], 0(%[src]) \n\t"
|
||||
"ulw %[a1], 4(%[src]) \n\t"
|
||||
"ulw %[a2], 8(%[src]) \n\t"
|
||||
"ulw %[a3], 12(%[src]) \n\t"
|
||||
"sw %[a0], 0(%[dst]) \n\t"
|
||||
"sw %[a1], 4(%[dst]) \n\t"
|
||||
"sw %[a2], 8(%[dst]) \n\t"
|
||||
"sw %[a3], 12(%[dst]) \n\t"
|
||||
: [a0] "=&r"(a0), [a1] "=&r"(a1), [a2] "=&r"(a2), [a3] "=&r"(a3)
|
||||
: [src] "r"(src), [dst] "r"(dst));
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
__inline void vp8_copy_mem8x8_dspr2(unsigned char *RESTRICT src, int src_stride,
|
||||
unsigned char *RESTRICT dst,
|
||||
int dst_stride) {
|
||||
int r;
|
||||
unsigned int a0, a1;
|
||||
|
||||
__inline void vp8_copy_mem8x4_dspr2(
|
||||
unsigned char *RESTRICT src,
|
||||
int src_stride,
|
||||
unsigned char *RESTRICT dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
unsigned int a0, a1;
|
||||
/* load src data in cache memory */
|
||||
prefetch_load_int(src + src_stride);
|
||||
|
||||
/* load src data in cache memory */
|
||||
prefetch_load_int(src + src_stride);
|
||||
for (r = 8; r--;) {
|
||||
/* use unaligned memory load and store */
|
||||
__asm__ __volatile__(
|
||||
"ulw %[a0], 0(%[src]) \n\t"
|
||||
"ulw %[a1], 4(%[src]) \n\t"
|
||||
"sw %[a0], 0(%[dst]) \n\t"
|
||||
"sw %[a1], 4(%[dst]) \n\t"
|
||||
: [a0] "=&r"(a0), [a1] "=&r"(a1)
|
||||
: [src] "r"(src), [dst] "r"(dst));
|
||||
|
||||
for (r = 4; r--;)
|
||||
{
|
||||
/* use unaligned memory load and store */
|
||||
__asm__ __volatile__ (
|
||||
"ulw %[a0], 0(%[src]) \n\t"
|
||||
"ulw %[a1], 4(%[src]) \n\t"
|
||||
"sw %[a0], 0(%[dst]) \n\t"
|
||||
"sw %[a1], 4(%[dst]) \n\t"
|
||||
: [a0] "=&r" (a0), [a1] "=&r" (a1)
|
||||
: [src] "r" (src), [dst] "r" (dst)
|
||||
);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
__inline void vp8_copy_mem8x4_dspr2(unsigned char *RESTRICT src, int src_stride,
|
||||
unsigned char *RESTRICT dst,
|
||||
int dst_stride) {
|
||||
int r;
|
||||
unsigned int a0, a1;
|
||||
|
||||
/* load src data in cache memory */
|
||||
prefetch_load_int(src + src_stride);
|
||||
|
||||
for (r = 4; r--;) {
|
||||
/* use unaligned memory load and store */
|
||||
__asm__ __volatile__(
|
||||
"ulw %[a0], 0(%[src]) \n\t"
|
||||
"ulw %[a1], 4(%[src]) \n\t"
|
||||
"sw %[a0], 0(%[dst]) \n\t"
|
||||
"sw %[a1], 4(%[dst]) \n\t"
|
||||
: [a0] "=&r"(a0), [a1] "=&r"(a1)
|
||||
: [src] "r"(src), [dst] "r"(dst));
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -11,60 +11,52 @@
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
static void copy_8x4_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
uint64_t src0, src1, src2, src3;
|
||||
static void copy_8x4_msa(uint8_t *src, int32_t src_stride, uint8_t *dst,
|
||||
int32_t dst_stride) {
|
||||
uint64_t src0, src1, src2, src3;
|
||||
|
||||
LD4(src, src_stride, src0, src1, src2, src3);
|
||||
SD4(src0, src1, src2, src3, dst, dst_stride);
|
||||
LD4(src, src_stride, src0, src1, src2, src3);
|
||||
SD4(src0, src1, src2, src3, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void copy_8x8_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
uint64_t src0, src1, src2, src3;
|
||||
static void copy_8x8_msa(uint8_t *src, int32_t src_stride, uint8_t *dst,
|
||||
int32_t dst_stride) {
|
||||
uint64_t src0, src1, src2, src3;
|
||||
|
||||
LD4(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
SD4(src0, src1, src2, src3, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
LD4(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
SD4(src0, src1, src2, src3, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
LD4(src, src_stride, src0, src1, src2, src3);
|
||||
SD4(src0, src1, src2, src3, dst, dst_stride);
|
||||
LD4(src, src_stride, src0, src1, src2, src3);
|
||||
SD4(src0, src1, src2, src3, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void copy_16x16_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
|
||||
static void copy_16x16_msa(uint8_t *src, int32_t src_stride, uint8_t *dst,
|
||||
int32_t dst_stride) {
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
|
||||
|
||||
LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src += (8 * src_stride);
|
||||
LD_UB8(src, src_stride, src8, src9, src10, src11, src12, src13, src14,
|
||||
src15);
|
||||
LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src += (8 * src_stride);
|
||||
LD_UB8(src, src_stride, src8, src9, src10, src11, src12, src13, src14, src15);
|
||||
|
||||
ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
|
||||
dst += (8 * dst_stride);
|
||||
ST_UB8(src8, src9, src10, src11, src12, src13, src14, src15, dst,
|
||||
dst_stride);
|
||||
ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
|
||||
dst += (8 * dst_stride);
|
||||
ST_UB8(src8, src9, src10, src11, src12, src13, src14, src15, dst, dst_stride);
|
||||
}
|
||||
|
||||
void vp8_copy_mem16x16_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
copy_16x16_msa(src, src_stride, dst, dst_stride);
|
||||
void vp8_copy_mem16x16_msa(uint8_t *src, int32_t src_stride, uint8_t *dst,
|
||||
int32_t dst_stride) {
|
||||
copy_16x16_msa(src, src_stride, dst, dst_stride);
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x8_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
copy_8x8_msa(src, src_stride, dst, dst_stride);
|
||||
void vp8_copy_mem8x8_msa(uint8_t *src, int32_t src_stride, uint8_t *dst,
|
||||
int32_t dst_stride) {
|
||||
copy_8x8_msa(src, src_stride, dst, dst_stride);
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x4_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
copy_8x4_msa(src, src_stride, dst, dst_stride);
|
||||
void vp8_copy_mem8x4_msa(uint8_t *src, int32_t src_stride, uint8_t *dst,
|
||||
int32_t dst_stride) {
|
||||
copy_8x4_msa(src, src_stride, dst, dst_stride);
|
||||
}
|
||||
|
@ -15,443 +15,401 @@
|
||||
static const int32_t cospi8sqrt2minus1 = 20091;
|
||||
static const int32_t sinpi8sqrt2 = 35468;
|
||||
|
||||
#define TRANSPOSE_TWO_4x4_H(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v8i16 s4_m, s5_m, s6_m, s7_m; \
|
||||
\
|
||||
TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, s4_m, s5_m, s6_m, s7_m); \
|
||||
ILVR_D2_SH(s6_m, s4_m, s7_m, s5_m, out0, out2); \
|
||||
out1 = (v8i16)__msa_ilvl_d((v2i64)s6_m, (v2i64)s4_m); \
|
||||
out3 = (v8i16)__msa_ilvl_d((v2i64)s7_m, (v2i64)s5_m); \
|
||||
}
|
||||
#define TRANSPOSE_TWO_4x4_H(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v8i16 s4_m, s5_m, s6_m, s7_m; \
|
||||
\
|
||||
TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, s4_m, s5_m, s6_m, s7_m); \
|
||||
ILVR_D2_SH(s6_m, s4_m, s7_m, s5_m, out0, out2); \
|
||||
out1 = (v8i16)__msa_ilvl_d((v2i64)s6_m, (v2i64)s4_m); \
|
||||
out3 = (v8i16)__msa_ilvl_d((v2i64)s7_m, (v2i64)s5_m); \
|
||||
}
|
||||
|
||||
#define EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in) \
|
||||
({ \
|
||||
v8i16 out_m; \
|
||||
v8i16 zero_m = { 0 }; \
|
||||
v4i32 tmp1_m, tmp2_m; \
|
||||
v4i32 sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \
|
||||
\
|
||||
ILVRL_H2_SW(in, zero_m, tmp1_m, tmp2_m); \
|
||||
tmp1_m >>= 16; \
|
||||
tmp2_m >>= 16; \
|
||||
tmp1_m = (tmp1_m * sinpi8_sqrt2_m) >> 16; \
|
||||
tmp2_m = (tmp2_m * sinpi8_sqrt2_m) >> 16; \
|
||||
out_m = __msa_pckev_h((v8i16)tmp2_m, (v8i16)tmp1_m); \
|
||||
\
|
||||
out_m; \
|
||||
})
|
||||
#define EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in) \
|
||||
({ \
|
||||
v8i16 out_m; \
|
||||
v8i16 zero_m = { 0 }; \
|
||||
v4i32 tmp1_m, tmp2_m; \
|
||||
v4i32 sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \
|
||||
\
|
||||
ILVRL_H2_SW(in, zero_m, tmp1_m, tmp2_m); \
|
||||
tmp1_m >>= 16; \
|
||||
tmp2_m >>= 16; \
|
||||
tmp1_m = (tmp1_m * sinpi8_sqrt2_m) >> 16; \
|
||||
tmp2_m = (tmp2_m * sinpi8_sqrt2_m) >> 16; \
|
||||
out_m = __msa_pckev_h((v8i16)tmp2_m, (v8i16)tmp1_m); \
|
||||
\
|
||||
out_m; \
|
||||
})
|
||||
|
||||
#define VP8_IDCT_1D_H(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v8i16 a1_m, b1_m, c1_m, d1_m; \
|
||||
v8i16 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
|
||||
v8i16 const_cospi8sqrt2minus1_m; \
|
||||
\
|
||||
const_cospi8sqrt2minus1_m = __msa_fill_h(cospi8sqrt2minus1); \
|
||||
a1_m = in0 + in2; \
|
||||
b1_m = in0 - in2; \
|
||||
c_tmp1_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in1); \
|
||||
c_tmp2_m = __msa_mul_q_h(in3, const_cospi8sqrt2minus1_m); \
|
||||
c_tmp2_m = c_tmp2_m >> 1; \
|
||||
c_tmp2_m = in3 + c_tmp2_m; \
|
||||
c1_m = c_tmp1_m - c_tmp2_m; \
|
||||
d_tmp1_m = __msa_mul_q_h(in1, const_cospi8sqrt2minus1_m); \
|
||||
d_tmp1_m = d_tmp1_m >> 1; \
|
||||
d_tmp1_m = in1 + d_tmp1_m; \
|
||||
d_tmp2_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in3); \
|
||||
d1_m = d_tmp1_m + d_tmp2_m; \
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
}
|
||||
#define VP8_IDCT_1D_H(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v8i16 a1_m, b1_m, c1_m, d1_m; \
|
||||
v8i16 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
|
||||
v8i16 const_cospi8sqrt2minus1_m; \
|
||||
\
|
||||
const_cospi8sqrt2minus1_m = __msa_fill_h(cospi8sqrt2minus1); \
|
||||
a1_m = in0 + in2; \
|
||||
b1_m = in0 - in2; \
|
||||
c_tmp1_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in1); \
|
||||
c_tmp2_m = __msa_mul_q_h(in3, const_cospi8sqrt2minus1_m); \
|
||||
c_tmp2_m = c_tmp2_m >> 1; \
|
||||
c_tmp2_m = in3 + c_tmp2_m; \
|
||||
c1_m = c_tmp1_m - c_tmp2_m; \
|
||||
d_tmp1_m = __msa_mul_q_h(in1, const_cospi8sqrt2minus1_m); \
|
||||
d_tmp1_m = d_tmp1_m >> 1; \
|
||||
d_tmp1_m = in1 + d_tmp1_m; \
|
||||
d_tmp2_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in3); \
|
||||
d1_m = d_tmp1_m + d_tmp2_m; \
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
}
|
||||
|
||||
#define VP8_IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v4i32 a1_m, b1_m, c1_m, d1_m; \
|
||||
v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
|
||||
v4i32 const_cospi8sqrt2minus1_m, sinpi8_sqrt2_m; \
|
||||
\
|
||||
const_cospi8sqrt2minus1_m = __msa_fill_w(cospi8sqrt2minus1); \
|
||||
sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \
|
||||
a1_m = in0 + in2; \
|
||||
b1_m = in0 - in2; \
|
||||
c_tmp1_m = (in1 * sinpi8_sqrt2_m) >> 16; \
|
||||
c_tmp2_m = in3 + ((in3 * const_cospi8sqrt2minus1_m) >> 16); \
|
||||
c1_m = c_tmp1_m - c_tmp2_m; \
|
||||
d_tmp1_m = in1 + ((in1 * const_cospi8sqrt2minus1_m) >> 16); \
|
||||
d_tmp2_m = (in3 * sinpi8_sqrt2_m) >> 16; \
|
||||
d1_m = d_tmp1_m + d_tmp2_m; \
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
}
|
||||
#define VP8_IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v4i32 a1_m, b1_m, c1_m, d1_m; \
|
||||
v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
|
||||
v4i32 const_cospi8sqrt2minus1_m, sinpi8_sqrt2_m; \
|
||||
\
|
||||
const_cospi8sqrt2minus1_m = __msa_fill_w(cospi8sqrt2minus1); \
|
||||
sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \
|
||||
a1_m = in0 + in2; \
|
||||
b1_m = in0 - in2; \
|
||||
c_tmp1_m = (in1 * sinpi8_sqrt2_m) >> 16; \
|
||||
c_tmp2_m = in3 + ((in3 * const_cospi8sqrt2minus1_m) >> 16); \
|
||||
c1_m = c_tmp1_m - c_tmp2_m; \
|
||||
d_tmp1_m = in1 + ((in1 * const_cospi8sqrt2minus1_m) >> 16); \
|
||||
d_tmp2_m = (in3 * sinpi8_sqrt2_m) >> 16; \
|
||||
d1_m = d_tmp1_m + d_tmp2_m; \
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
}
|
||||
|
||||
static void idct4x4_addblk_msa(int16_t *input, uint8_t *pred,
|
||||
int32_t pred_stride,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v4i32 res0, res1, res2, res3;
|
||||
v16i8 zero = { 0 };
|
||||
v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
|
||||
v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24,
|
||||
25, 26, 27, 28, 29, 30, 31 };
|
||||
int32_t pred_stride, uint8_t *dest,
|
||||
int32_t dest_stride) {
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v4i32 res0, res1, res2, res3;
|
||||
v16i8 zero = { 0 };
|
||||
v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
|
||||
v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
|
||||
|
||||
LD_SH2(input, 8, input0, input1);
|
||||
UNPCK_SH_SW(input0, in0, in1);
|
||||
UNPCK_SH_SW(input1, in2, in3);
|
||||
VP8_IDCT_1D_W(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
|
||||
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
|
||||
VP8_IDCT_1D_W(hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3);
|
||||
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
|
||||
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
|
||||
LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3);
|
||||
ILVR_B4_SW(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1,
|
||||
res2, res3);
|
||||
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
|
||||
res0 = CLIP_SW_0_255(res0);
|
||||
res1 = CLIP_SW_0_255(res1);
|
||||
res2 = CLIP_SW_0_255(res2);
|
||||
res3 = CLIP_SW_0_255(res3);
|
||||
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
|
||||
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
|
||||
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
LD_SH2(input, 8, input0, input1);
|
||||
UNPCK_SH_SW(input0, in0, in1);
|
||||
UNPCK_SH_SW(input1, in2, in3);
|
||||
VP8_IDCT_1D_W(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
|
||||
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
|
||||
VP8_IDCT_1D_W(hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3);
|
||||
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
|
||||
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
|
||||
LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3);
|
||||
ILVR_B4_SW(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1,
|
||||
res2, res3);
|
||||
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2,
|
||||
res3);
|
||||
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
|
||||
res0 = CLIP_SW_0_255(res0);
|
||||
res1 = CLIP_SW_0_255(res1);
|
||||
res2 = CLIP_SW_0_255(res2);
|
||||
res3 = CLIP_SW_0_255(res3);
|
||||
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
|
||||
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
|
||||
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
}
|
||||
|
||||
static void idct4x4_addconst_msa(int16_t in_dc, uint8_t *pred,
|
||||
int32_t pred_stride,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v8i16 vec;
|
||||
v8i16 res0, res1, res2, res3;
|
||||
v16i8 zero = { 0 };
|
||||
v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
|
||||
v16i8 mask = { 0, 2, 4, 6, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
|
||||
int32_t pred_stride, uint8_t *dest,
|
||||
int32_t dest_stride) {
|
||||
v8i16 vec;
|
||||
v8i16 res0, res1, res2, res3;
|
||||
v16i8 zero = { 0 };
|
||||
v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
|
||||
v16i8 mask = { 0, 2, 4, 6, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
|
||||
|
||||
vec = __msa_fill_h(in_dc);
|
||||
vec = __msa_srari_h(vec, 3);
|
||||
LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3);
|
||||
ILVR_B4_SH(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
|
||||
CLIP_SH4_0_255(res0, res1, res2, res3);
|
||||
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
|
||||
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
|
||||
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
vec = __msa_fill_h(in_dc);
|
||||
vec = __msa_srari_h(vec, 3);
|
||||
LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3);
|
||||
ILVR_B4_SH(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
|
||||
CLIP_SH4_0_255(res0, res1, res2, res3);
|
||||
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
|
||||
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
|
||||
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_msa(int16_t *input, int16_t *mb_dq_coeff)
|
||||
{
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, a1, b1, c1, d1;
|
||||
v4i32 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
void vp8_short_inv_walsh4x4_msa(int16_t *input, int16_t *mb_dq_coeff) {
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, a1, b1, c1, d1;
|
||||
v4i32 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
|
||||
LD_SH2(input, 8, input0, input1);
|
||||
UNPCK_SH_SW(input0, in0, in1);
|
||||
UNPCK_SH_SW(input1, in2, in3);
|
||||
BUTTERFLY_4(in0, in1, in2, in3, a1, b1, c1, d1);
|
||||
BUTTERFLY_4(a1, d1, c1, b1, hz0, hz1, hz3, hz2);
|
||||
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
|
||||
BUTTERFLY_4(hz0, hz1, hz2, hz3, a1, b1, c1, d1);
|
||||
BUTTERFLY_4(a1, d1, c1, b1, vt0, vt1, vt3, vt2);
|
||||
ADD4(vt0, 3, vt1, 3, vt2, 3, vt3, 3, vt0, vt1, vt2, vt3);
|
||||
SRA_4V(vt0, vt1, vt2, vt3, 3);
|
||||
mb_dq_coeff[0] = __msa_copy_s_h((v8i16)vt0, 0);
|
||||
mb_dq_coeff[16] = __msa_copy_s_h((v8i16)vt1, 0);
|
||||
mb_dq_coeff[32] = __msa_copy_s_h((v8i16)vt2, 0);
|
||||
mb_dq_coeff[48] = __msa_copy_s_h((v8i16)vt3, 0);
|
||||
mb_dq_coeff[64] = __msa_copy_s_h((v8i16)vt0, 2);
|
||||
mb_dq_coeff[80] = __msa_copy_s_h((v8i16)vt1, 2);
|
||||
mb_dq_coeff[96] = __msa_copy_s_h((v8i16)vt2, 2);
|
||||
mb_dq_coeff[112] = __msa_copy_s_h((v8i16)vt3, 2);
|
||||
mb_dq_coeff[128] = __msa_copy_s_h((v8i16)vt0, 4);
|
||||
mb_dq_coeff[144] = __msa_copy_s_h((v8i16)vt1, 4);
|
||||
mb_dq_coeff[160] = __msa_copy_s_h((v8i16)vt2, 4);
|
||||
mb_dq_coeff[176] = __msa_copy_s_h((v8i16)vt3, 4);
|
||||
mb_dq_coeff[192] = __msa_copy_s_h((v8i16)vt0, 6);
|
||||
mb_dq_coeff[208] = __msa_copy_s_h((v8i16)vt1, 6);
|
||||
mb_dq_coeff[224] = __msa_copy_s_h((v8i16)vt2, 6);
|
||||
mb_dq_coeff[240] = __msa_copy_s_h((v8i16)vt3, 6);
|
||||
LD_SH2(input, 8, input0, input1);
|
||||
UNPCK_SH_SW(input0, in0, in1);
|
||||
UNPCK_SH_SW(input1, in2, in3);
|
||||
BUTTERFLY_4(in0, in1, in2, in3, a1, b1, c1, d1);
|
||||
BUTTERFLY_4(a1, d1, c1, b1, hz0, hz1, hz3, hz2);
|
||||
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
|
||||
BUTTERFLY_4(hz0, hz1, hz2, hz3, a1, b1, c1, d1);
|
||||
BUTTERFLY_4(a1, d1, c1, b1, vt0, vt1, vt3, vt2);
|
||||
ADD4(vt0, 3, vt1, 3, vt2, 3, vt3, 3, vt0, vt1, vt2, vt3);
|
||||
SRA_4V(vt0, vt1, vt2, vt3, 3);
|
||||
mb_dq_coeff[0] = __msa_copy_s_h((v8i16)vt0, 0);
|
||||
mb_dq_coeff[16] = __msa_copy_s_h((v8i16)vt1, 0);
|
||||
mb_dq_coeff[32] = __msa_copy_s_h((v8i16)vt2, 0);
|
||||
mb_dq_coeff[48] = __msa_copy_s_h((v8i16)vt3, 0);
|
||||
mb_dq_coeff[64] = __msa_copy_s_h((v8i16)vt0, 2);
|
||||
mb_dq_coeff[80] = __msa_copy_s_h((v8i16)vt1, 2);
|
||||
mb_dq_coeff[96] = __msa_copy_s_h((v8i16)vt2, 2);
|
||||
mb_dq_coeff[112] = __msa_copy_s_h((v8i16)vt3, 2);
|
||||
mb_dq_coeff[128] = __msa_copy_s_h((v8i16)vt0, 4);
|
||||
mb_dq_coeff[144] = __msa_copy_s_h((v8i16)vt1, 4);
|
||||
mb_dq_coeff[160] = __msa_copy_s_h((v8i16)vt2, 4);
|
||||
mb_dq_coeff[176] = __msa_copy_s_h((v8i16)vt3, 4);
|
||||
mb_dq_coeff[192] = __msa_copy_s_h((v8i16)vt0, 6);
|
||||
mb_dq_coeff[208] = __msa_copy_s_h((v8i16)vt1, 6);
|
||||
mb_dq_coeff[224] = __msa_copy_s_h((v8i16)vt2, 6);
|
||||
mb_dq_coeff[240] = __msa_copy_s_h((v8i16)vt3, 6);
|
||||
}
|
||||
|
||||
static void dequant_idct4x4_addblk_msa(int16_t *input, int16_t *dequant_input,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v8i16 input0, input1, dequant_in0, dequant_in1, mul0, mul1;
|
||||
v8i16 in0, in1, in2, in3;
|
||||
v8i16 hz0_h, hz1_h, hz2_h, hz3_h;
|
||||
v16i8 dest0, dest1, dest2, dest3;
|
||||
v4i32 hz0_w, hz1_w, hz2_w, hz3_w;
|
||||
v4i32 vt0, vt1, vt2, vt3, res0, res1, res2, res3;
|
||||
v2i64 zero = { 0 };
|
||||
v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24,
|
||||
25, 26, 27, 28, 29, 30, 31 };
|
||||
uint8_t *dest, int32_t dest_stride) {
|
||||
v8i16 input0, input1, dequant_in0, dequant_in1, mul0, mul1;
|
||||
v8i16 in0, in1, in2, in3;
|
||||
v8i16 hz0_h, hz1_h, hz2_h, hz3_h;
|
||||
v16i8 dest0, dest1, dest2, dest3;
|
||||
v4i32 hz0_w, hz1_w, hz2_w, hz3_w;
|
||||
v4i32 vt0, vt1, vt2, vt3, res0, res1, res2, res3;
|
||||
v2i64 zero = { 0 };
|
||||
v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
|
||||
|
||||
LD_SH2(input, 8, input0, input1);
|
||||
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
|
||||
MUL2(input0, dequant_in0, input1, dequant_in1, mul0, mul1);
|
||||
PCKEV_D2_SH(zero, mul0, zero, mul1, in0, in2);
|
||||
PCKOD_D2_SH(zero, mul0, zero, mul1, in1, in3);
|
||||
VP8_IDCT_1D_H(in0, in1, in2, in3, hz0_h, hz1_h, hz2_h, hz3_h);
|
||||
PCKEV_D2_SH(hz1_h, hz0_h, hz3_h, hz2_h, mul0, mul1);
|
||||
UNPCK_SH_SW(mul0, hz0_w, hz1_w);
|
||||
UNPCK_SH_SW(mul1, hz2_w, hz3_w);
|
||||
TRANSPOSE4x4_SW_SW(hz0_w, hz1_w, hz2_w, hz3_w, hz0_w, hz1_w, hz2_w, hz3_w);
|
||||
VP8_IDCT_1D_W(hz0_w, hz1_w, hz2_w, hz3_w, vt0, vt1, vt2, vt3);
|
||||
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
|
||||
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
|
||||
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
|
||||
res2, res3);
|
||||
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
|
||||
res0 = CLIP_SW_0_255(res0);
|
||||
res1 = CLIP_SW_0_255(res1);
|
||||
res2 = CLIP_SW_0_255(res2);
|
||||
res3 = CLIP_SW_0_255(res3);
|
||||
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
|
||||
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
|
||||
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
LD_SH2(input, 8, input0, input1);
|
||||
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
|
||||
MUL2(input0, dequant_in0, input1, dequant_in1, mul0, mul1);
|
||||
PCKEV_D2_SH(zero, mul0, zero, mul1, in0, in2);
|
||||
PCKOD_D2_SH(zero, mul0, zero, mul1, in1, in3);
|
||||
VP8_IDCT_1D_H(in0, in1, in2, in3, hz0_h, hz1_h, hz2_h, hz3_h);
|
||||
PCKEV_D2_SH(hz1_h, hz0_h, hz3_h, hz2_h, mul0, mul1);
|
||||
UNPCK_SH_SW(mul0, hz0_w, hz1_w);
|
||||
UNPCK_SH_SW(mul1, hz2_w, hz3_w);
|
||||
TRANSPOSE4x4_SW_SW(hz0_w, hz1_w, hz2_w, hz3_w, hz0_w, hz1_w, hz2_w, hz3_w);
|
||||
VP8_IDCT_1D_W(hz0_w, hz1_w, hz2_w, hz3_w, vt0, vt1, vt2, vt3);
|
||||
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
|
||||
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
|
||||
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
|
||||
res2, res3);
|
||||
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2,
|
||||
res3);
|
||||
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
|
||||
res0 = CLIP_SW_0_255(res0);
|
||||
res1 = CLIP_SW_0_255(res1);
|
||||
res2 = CLIP_SW_0_255(res2);
|
||||
res3 = CLIP_SW_0_255(res3);
|
||||
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
|
||||
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
|
||||
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
}
|
||||
|
||||
static void dequant_idct4x4_addblk_2x_msa(int16_t *input,
|
||||
int16_t *dequant_input,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v16u8 dest0, dest1, dest2, dest3;
|
||||
v8i16 in0, in1, in2, in3;
|
||||
v8i16 mul0, mul1, mul2, mul3, dequant_in0, dequant_in1;
|
||||
v8i16 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v8i16 res0, res1, res2, res3;
|
||||
v4i32 hz0l, hz1l, hz2l, hz3l, hz0r, hz1r, hz2r, hz3r;
|
||||
v4i32 vt0l, vt1l, vt2l, vt3l, vt0r, vt1r, vt2r, vt3r;
|
||||
v16i8 zero = { 0 };
|
||||
int16_t *dequant_input, uint8_t *dest,
|
||||
int32_t dest_stride) {
|
||||
v16u8 dest0, dest1, dest2, dest3;
|
||||
v8i16 in0, in1, in2, in3;
|
||||
v8i16 mul0, mul1, mul2, mul3, dequant_in0, dequant_in1;
|
||||
v8i16 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v8i16 res0, res1, res2, res3;
|
||||
v4i32 hz0l, hz1l, hz2l, hz3l, hz0r, hz1r, hz2r, hz3r;
|
||||
v4i32 vt0l, vt1l, vt2l, vt3l, vt0r, vt1r, vt2r, vt3r;
|
||||
v16i8 zero = { 0 };
|
||||
|
||||
LD_SH4(input, 8, in0, in1, in2, in3);
|
||||
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
|
||||
MUL4(in0, dequant_in0, in1, dequant_in1, in2, dequant_in0, in3, dequant_in1,
|
||||
mul0, mul1, mul2, mul3);
|
||||
PCKEV_D2_SH(mul2, mul0, mul3, mul1, in0, in2);
|
||||
PCKOD_D2_SH(mul2, mul0, mul3, mul1, in1, in3);
|
||||
VP8_IDCT_1D_H(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
|
||||
TRANSPOSE_TWO_4x4_H(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
|
||||
UNPCK_SH_SW(hz0, hz0r, hz0l);
|
||||
UNPCK_SH_SW(hz1, hz1r, hz1l);
|
||||
UNPCK_SH_SW(hz2, hz2r, hz2l);
|
||||
UNPCK_SH_SW(hz3, hz3r, hz3l);
|
||||
VP8_IDCT_1D_W(hz0l, hz1l, hz2l, hz3l, vt0l, vt1l, vt2l, vt3l);
|
||||
SRARI_W4_SW(vt0l, vt1l, vt2l, vt3l, 3);
|
||||
VP8_IDCT_1D_W(hz0r, hz1r, hz2r, hz3r, vt0r, vt1r, vt2r, vt3r);
|
||||
SRARI_W4_SW(vt0r, vt1r, vt2r, vt3r, 3);
|
||||
PCKEV_H4_SH(vt0l, vt0r, vt1l, vt1r, vt2l, vt2r, vt3l, vt3r, vt0, vt1, vt2,
|
||||
vt3);
|
||||
TRANSPOSE_TWO_4x4_H(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
|
||||
LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
|
||||
CLIP_SH4_0_255(res0, res1, res2, res3);
|
||||
PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1,
|
||||
res2, res3);
|
||||
PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
|
||||
PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
|
||||
ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
LD_SH4(input, 8, in0, in1, in2, in3);
|
||||
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
|
||||
MUL4(in0, dequant_in0, in1, dequant_in1, in2, dequant_in0, in3, dequant_in1,
|
||||
mul0, mul1, mul2, mul3);
|
||||
PCKEV_D2_SH(mul2, mul0, mul3, mul1, in0, in2);
|
||||
PCKOD_D2_SH(mul2, mul0, mul3, mul1, in1, in3);
|
||||
VP8_IDCT_1D_H(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
|
||||
TRANSPOSE_TWO_4x4_H(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
|
||||
UNPCK_SH_SW(hz0, hz0r, hz0l);
|
||||
UNPCK_SH_SW(hz1, hz1r, hz1l);
|
||||
UNPCK_SH_SW(hz2, hz2r, hz2l);
|
||||
UNPCK_SH_SW(hz3, hz3r, hz3l);
|
||||
VP8_IDCT_1D_W(hz0l, hz1l, hz2l, hz3l, vt0l, vt1l, vt2l, vt3l);
|
||||
SRARI_W4_SW(vt0l, vt1l, vt2l, vt3l, 3);
|
||||
VP8_IDCT_1D_W(hz0r, hz1r, hz2r, hz3r, vt0r, vt1r, vt2r, vt3r);
|
||||
SRARI_W4_SW(vt0r, vt1r, vt2r, vt3r, 3);
|
||||
PCKEV_H4_SH(vt0l, vt0r, vt1l, vt1r, vt2l, vt2r, vt3l, vt3r, vt0, vt1, vt2,
|
||||
vt3);
|
||||
TRANSPOSE_TWO_4x4_H(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
|
||||
LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
|
||||
CLIP_SH4_0_255(res0, res1, res2, res3);
|
||||
PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1, res2,
|
||||
res3);
|
||||
PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
|
||||
PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
|
||||
ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
|
||||
__asm__ __volatile__(
|
||||
"sw $zero, 0(%[input]) \n\t"
|
||||
"sw $zero, 4(%[input]) \n\t"
|
||||
"sw $zero, 8(%[input]) \n\t"
|
||||
"sw $zero, 12(%[input]) \n\t"
|
||||
"sw $zero, 16(%[input]) \n\t"
|
||||
"sw $zero, 20(%[input]) \n\t"
|
||||
"sw $zero, 24(%[input]) \n\t"
|
||||
"sw $zero, 28(%[input]) \n\t"
|
||||
"sw $zero, 32(%[input]) \n\t"
|
||||
"sw $zero, 36(%[input]) \n\t"
|
||||
"sw $zero, 40(%[input]) \n\t"
|
||||
"sw $zero, 44(%[input]) \n\t"
|
||||
"sw $zero, 48(%[input]) \n\t"
|
||||
"sw $zero, 52(%[input]) \n\t"
|
||||
"sw $zero, 56(%[input]) \n\t"
|
||||
"sw $zero, 60(%[input]) \n\t"::
|
||||
__asm__ __volatile__(
|
||||
"sw $zero, 0(%[input]) \n\t"
|
||||
"sw $zero, 4(%[input]) \n\t"
|
||||
"sw $zero, 8(%[input]) \n\t"
|
||||
"sw $zero, 12(%[input]) \n\t"
|
||||
"sw $zero, 16(%[input]) \n\t"
|
||||
"sw $zero, 20(%[input]) \n\t"
|
||||
"sw $zero, 24(%[input]) \n\t"
|
||||
"sw $zero, 28(%[input]) \n\t"
|
||||
"sw $zero, 32(%[input]) \n\t"
|
||||
"sw $zero, 36(%[input]) \n\t"
|
||||
"sw $zero, 40(%[input]) \n\t"
|
||||
"sw $zero, 44(%[input]) \n\t"
|
||||
"sw $zero, 48(%[input]) \n\t"
|
||||
"sw $zero, 52(%[input]) \n\t"
|
||||
"sw $zero, 56(%[input]) \n\t"
|
||||
"sw $zero, 60(%[input]) \n\t" ::
|
||||
|
||||
[input] "r"(input)
|
||||
);
|
||||
[input] "r"(input));
|
||||
}
|
||||
|
||||
static void dequant_idct_addconst_2x_msa(int16_t *input, int16_t *dequant_input,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v8i16 input_dc0, input_dc1, vec;
|
||||
v16u8 dest0, dest1, dest2, dest3;
|
||||
v16i8 zero = { 0 };
|
||||
v8i16 res0, res1, res2, res3;
|
||||
uint8_t *dest, int32_t dest_stride) {
|
||||
v8i16 input_dc0, input_dc1, vec;
|
||||
v16u8 dest0, dest1, dest2, dest3;
|
||||
v16i8 zero = { 0 };
|
||||
v8i16 res0, res1, res2, res3;
|
||||
|
||||
input_dc0 = __msa_fill_h(input[0] * dequant_input[0]);
|
||||
input_dc1 = __msa_fill_h(input[16] * dequant_input[0]);
|
||||
SRARI_H2_SH(input_dc0, input_dc1, 3);
|
||||
vec = (v8i16)__msa_pckev_d((v2i64)input_dc1, (v2i64)input_dc0);
|
||||
input[0] = 0;
|
||||
input[16] = 0;
|
||||
LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0,
|
||||
res1, res2, res3);
|
||||
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
|
||||
CLIP_SH4_0_255(res0, res1, res2, res3);
|
||||
PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1,
|
||||
res2, res3);
|
||||
PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
|
||||
PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
|
||||
ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
input_dc0 = __msa_fill_h(input[0] * dequant_input[0]);
|
||||
input_dc1 = __msa_fill_h(input[16] * dequant_input[0]);
|
||||
SRARI_H2_SH(input_dc0, input_dc1, 3);
|
||||
vec = (v8i16)__msa_pckev_d((v2i64)input_dc1, (v2i64)input_dc0);
|
||||
input[0] = 0;
|
||||
input[16] = 0;
|
||||
LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
|
||||
CLIP_SH4_0_255(res0, res1, res2, res3);
|
||||
PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1, res2,
|
||||
res3);
|
||||
PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
|
||||
PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
|
||||
ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
}
|
||||
|
||||
void vp8_short_idct4x4llm_msa(int16_t *input, uint8_t *pred_ptr,
|
||||
int32_t pred_stride, uint8_t *dst_ptr,
|
||||
int32_t dst_stride)
|
||||
{
|
||||
idct4x4_addblk_msa(input, pred_ptr, pred_stride, dst_ptr, dst_stride);
|
||||
int32_t dst_stride) {
|
||||
idct4x4_addblk_msa(input, pred_ptr, pred_stride, dst_ptr, dst_stride);
|
||||
}
|
||||
|
||||
void vp8_dc_only_idct_add_msa(int16_t input_dc, uint8_t *pred_ptr,
|
||||
int32_t pred_stride, uint8_t *dst_ptr,
|
||||
int32_t dst_stride)
|
||||
{
|
||||
idct4x4_addconst_msa(input_dc, pred_ptr, pred_stride, dst_ptr, dst_stride);
|
||||
int32_t dst_stride) {
|
||||
idct4x4_addconst_msa(input_dc, pred_ptr, pred_stride, dst_ptr, dst_stride);
|
||||
}
|
||||
|
||||
void vp8_dequantize_b_msa(BLOCKD *d, int16_t *DQC)
|
||||
{
|
||||
v8i16 dqc0, dqc1, q0, q1, dq0, dq1;
|
||||
void vp8_dequantize_b_msa(BLOCKD *d, int16_t *DQC) {
|
||||
v8i16 dqc0, dqc1, q0, q1, dq0, dq1;
|
||||
|
||||
LD_SH2(DQC, 8, dqc0, dqc1);
|
||||
LD_SH2(d->qcoeff, 8, q0, q1);
|
||||
MUL2(dqc0, q0, dqc1, q1, dq0, dq1);
|
||||
ST_SH2(dq0, dq1, d->dqcoeff, 8);
|
||||
LD_SH2(DQC, 8, dqc0, dqc1);
|
||||
LD_SH2(d->qcoeff, 8, q0, q1);
|
||||
MUL2(dqc0, q0, dqc1, q1, dq0, dq1);
|
||||
ST_SH2(dq0, dq1, d->dqcoeff, 8);
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_msa(int16_t *input, int16_t *dq,
|
||||
uint8_t *dest, int32_t stride)
|
||||
{
|
||||
dequant_idct4x4_addblk_msa(input, dq, dest, stride);
|
||||
void vp8_dequant_idct_add_msa(int16_t *input, int16_t *dq, uint8_t *dest,
|
||||
int32_t stride) {
|
||||
dequant_idct4x4_addblk_msa(input, dq, dest, stride);
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sw $zero, 0(%[input]) \n\t"
|
||||
"sw $zero, 4(%[input]) \n\t"
|
||||
"sw $zero, 8(%[input]) \n\t"
|
||||
"sw $zero, 12(%[input]) \n\t"
|
||||
"sw $zero, 16(%[input]) \n\t"
|
||||
"sw $zero, 20(%[input]) \n\t"
|
||||
"sw $zero, 24(%[input]) \n\t"
|
||||
"sw $zero, 28(%[input]) \n\t"
|
||||
__asm__ __volatile__(
|
||||
"sw $zero, 0(%[input]) \n\t"
|
||||
"sw $zero, 4(%[input]) \n\t"
|
||||
"sw $zero, 8(%[input]) \n\t"
|
||||
"sw $zero, 12(%[input]) \n\t"
|
||||
"sw $zero, 16(%[input]) \n\t"
|
||||
"sw $zero, 20(%[input]) \n\t"
|
||||
"sw $zero, 24(%[input]) \n\t"
|
||||
"sw $zero, 28(%[input]) \n\t"
|
||||
|
||||
:
|
||||
: [input] "r" (input)
|
||||
);
|
||||
:
|
||||
: [input] "r"(input));
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_y_block_msa(int16_t *q, int16_t *dq,
|
||||
uint8_t *dst, int32_t stride,
|
||||
char *eobs)
|
||||
{
|
||||
int16_t *eobs_h = (int16_t *)eobs;
|
||||
uint8_t i;
|
||||
void vp8_dequant_idct_add_y_block_msa(int16_t *q, int16_t *dq, uint8_t *dst,
|
||||
int32_t stride, char *eobs) {
|
||||
int16_t *eobs_h = (int16_t *)eobs;
|
||||
uint8_t i;
|
||||
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
if (eobs_h[0])
|
||||
{
|
||||
if (eobs_h[0] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dst, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dst, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
|
||||
if (eobs_h[1])
|
||||
{
|
||||
if (eobs_h[1] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dst + 8, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dst + 8, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dst += (4 * stride);
|
||||
eobs_h += 2;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_msa(int16_t *q, int16_t *dq,
|
||||
uint8_t *dstu, uint8_t *dstv,
|
||||
int32_t stride, char *eobs)
|
||||
{
|
||||
int16_t *eobs_h = (int16_t *)eobs;
|
||||
|
||||
if (eobs_h[0])
|
||||
{
|
||||
if (eobs_h[0] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstu, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += (stride * 4);
|
||||
|
||||
if (eobs_h[1])
|
||||
{
|
||||
if (eobs_h[1] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstu, stride);
|
||||
}
|
||||
for (i = 4; i--;) {
|
||||
if (eobs_h[0]) {
|
||||
if (eobs_h[0] & 0xfefe) {
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dst, stride);
|
||||
} else {
|
||||
dequant_idct_addconst_2x_msa(q, dq, dst, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
|
||||
if (eobs_h[2])
|
||||
{
|
||||
if (eobs_h[2] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstv, stride);
|
||||
}
|
||||
if (eobs_h[1]) {
|
||||
if (eobs_h[1] & 0xfefe) {
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dst + 8, stride);
|
||||
} else {
|
||||
dequant_idct_addconst_2x_msa(q, dq, dst + 8, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += (stride * 4);
|
||||
|
||||
if (eobs_h[3])
|
||||
{
|
||||
if (eobs_h[3] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstv, stride);
|
||||
}
|
||||
}
|
||||
dst += (4 * stride);
|
||||
eobs_h += 2;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_msa(int16_t *q, int16_t *dq, uint8_t *dstu,
|
||||
uint8_t *dstv, int32_t stride,
|
||||
char *eobs) {
|
||||
int16_t *eobs_h = (int16_t *)eobs;
|
||||
|
||||
if (eobs_h[0]) {
|
||||
if (eobs_h[0] & 0xfefe) {
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride);
|
||||
} else {
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstu, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += (stride * 4);
|
||||
|
||||
if (eobs_h[1]) {
|
||||
if (eobs_h[1] & 0xfefe) {
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride);
|
||||
} else {
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstu, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
|
||||
if (eobs_h[2]) {
|
||||
if (eobs_h[2] & 0xfefe) {
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride);
|
||||
} else {
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstv, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += (stride * 4);
|
||||
|
||||
if (eobs_h[3]) {
|
||||
if (eobs_h[3] & 0xfefe) {
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride);
|
||||
} else {
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstv, stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -14,133 +14,126 @@
|
||||
|
||||
static void filter_by_weight8x8_msa(uint8_t *src_ptr, int32_t src_stride,
|
||||
uint8_t *dst_ptr, int32_t dst_stride,
|
||||
int32_t src_weight)
|
||||
{
|
||||
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
int32_t row;
|
||||
uint64_t src0_d, src1_d, dst0_d, dst1_d;
|
||||
v16i8 src0 = { 0 };
|
||||
v16i8 src1 = { 0 };
|
||||
v16i8 dst0 = { 0 };
|
||||
v16i8 dst1 = { 0 };
|
||||
v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
|
||||
int32_t src_weight) {
|
||||
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
int32_t row;
|
||||
uint64_t src0_d, src1_d, dst0_d, dst1_d;
|
||||
v16i8 src0 = { 0 };
|
||||
v16i8 src1 = { 0 };
|
||||
v16i8 dst0 = { 0 };
|
||||
v16i8 dst1 = { 0 };
|
||||
v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
|
||||
|
||||
src_wt = __msa_fill_h(src_weight);
|
||||
dst_wt = __msa_fill_h(dst_weight);
|
||||
src_wt = __msa_fill_h(src_weight);
|
||||
dst_wt = __msa_fill_h(dst_weight);
|
||||
|
||||
for (row = 2; row--;)
|
||||
{
|
||||
LD2(src_ptr, src_stride, src0_d, src1_d);
|
||||
src_ptr += (2 * src_stride);
|
||||
LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
|
||||
INSERT_D2_SB(src0_d, src1_d, src0);
|
||||
INSERT_D2_SB(dst0_d, dst1_d, dst0);
|
||||
for (row = 2; row--;) {
|
||||
LD2(src_ptr, src_stride, src0_d, src1_d);
|
||||
src_ptr += (2 * src_stride);
|
||||
LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
|
||||
INSERT_D2_SB(src0_d, src1_d, src0);
|
||||
INSERT_D2_SB(dst0_d, dst1_d, dst0);
|
||||
|
||||
LD2(src_ptr, src_stride, src0_d, src1_d);
|
||||
src_ptr += (2 * src_stride);
|
||||
LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
|
||||
INSERT_D2_SB(src0_d, src1_d, src1);
|
||||
INSERT_D2_SB(dst0_d, dst1_d, dst1);
|
||||
LD2(src_ptr, src_stride, src0_d, src1_d);
|
||||
src_ptr += (2 * src_stride);
|
||||
LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
|
||||
INSERT_D2_SB(src0_d, src1_d, src1);
|
||||
INSERT_D2_SB(dst0_d, dst1_d, dst1);
|
||||
|
||||
UNPCK_UB_SH(src0, src_r, src_l);
|
||||
UNPCK_UB_SH(dst0, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
|
||||
ST8x2_UB(dst0, dst_ptr, dst_stride);
|
||||
dst_ptr += (2 * dst_stride);
|
||||
UNPCK_UB_SH(src0, src_r, src_l);
|
||||
UNPCK_UB_SH(dst0, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
|
||||
ST8x2_UB(dst0, dst_ptr, dst_stride);
|
||||
dst_ptr += (2 * dst_stride);
|
||||
|
||||
UNPCK_UB_SH(src1, src_r, src_l);
|
||||
UNPCK_UB_SH(dst1, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
|
||||
ST8x2_UB(dst1, dst_ptr, dst_stride);
|
||||
dst_ptr += (2 * dst_stride);
|
||||
}
|
||||
UNPCK_UB_SH(src1, src_r, src_l);
|
||||
UNPCK_UB_SH(dst1, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
|
||||
ST8x2_UB(dst1, dst_ptr, dst_stride);
|
||||
dst_ptr += (2 * dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void filter_by_weight16x16_msa(uint8_t *src_ptr, int32_t src_stride,
|
||||
uint8_t *dst_ptr, int32_t dst_stride,
|
||||
int32_t src_weight)
|
||||
{
|
||||
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
int32_t row;
|
||||
v16i8 src0, src1, src2, src3;
|
||||
v16i8 dst0, dst1, dst2, dst3;
|
||||
v8i16 src_wt, dst_wt;
|
||||
v8i16 res_h_r, res_h_l;
|
||||
v8i16 src_r, src_l, dst_r, dst_l;
|
||||
int32_t src_weight) {
|
||||
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
int32_t row;
|
||||
v16i8 src0, src1, src2, src3;
|
||||
v16i8 dst0, dst1, dst2, dst3;
|
||||
v8i16 src_wt, dst_wt;
|
||||
v8i16 res_h_r, res_h_l;
|
||||
v8i16 src_r, src_l, dst_r, dst_l;
|
||||
|
||||
src_wt = __msa_fill_h(src_weight);
|
||||
dst_wt = __msa_fill_h(dst_weight);
|
||||
src_wt = __msa_fill_h(src_weight);
|
||||
dst_wt = __msa_fill_h(dst_weight);
|
||||
|
||||
for (row = 4; row--;)
|
||||
{
|
||||
LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
|
||||
src_ptr += (4 * src_stride);
|
||||
LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
|
||||
for (row = 4; row--;) {
|
||||
LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
|
||||
src_ptr += (4 * src_stride);
|
||||
LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
|
||||
|
||||
UNPCK_UB_SH(src0, src_r, src_l);
|
||||
UNPCK_UB_SH(dst0, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
UNPCK_UB_SH(src0, src_r, src_l);
|
||||
UNPCK_UB_SH(dst0, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
UNPCK_UB_SH(src1, src_r, src_l);
|
||||
UNPCK_UB_SH(dst1, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
UNPCK_UB_SH(src1, src_r, src_l);
|
||||
UNPCK_UB_SH(dst1, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
UNPCK_UB_SH(src2, src_r, src_l);
|
||||
UNPCK_UB_SH(dst2, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
UNPCK_UB_SH(src2, src_r, src_l);
|
||||
UNPCK_UB_SH(dst2, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
UNPCK_UB_SH(src3, src_r, src_l);
|
||||
UNPCK_UB_SH(dst3, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
UNPCK_UB_SH(src3, src_r, src_l);
|
||||
UNPCK_UB_SH(dst3, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight16x16_msa(uint8_t *src_ptr, int32_t src_stride,
|
||||
uint8_t *dst_ptr, int32_t dst_stride,
|
||||
int32_t src_weight)
|
||||
{
|
||||
filter_by_weight16x16_msa(src_ptr, src_stride, dst_ptr, dst_stride,
|
||||
src_weight);
|
||||
int32_t src_weight) {
|
||||
filter_by_weight16x16_msa(src_ptr, src_stride, dst_ptr, dst_stride,
|
||||
src_weight);
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight8x8_msa(uint8_t *src_ptr, int32_t src_stride,
|
||||
uint8_t *dst_ptr, int32_t dst_stride,
|
||||
int32_t src_weight)
|
||||
{
|
||||
filter_by_weight8x8_msa(src_ptr, src_stride, dst_ptr, dst_stride,
|
||||
src_weight);
|
||||
int32_t src_weight) {
|
||||
filter_by_weight8x8_msa(src_ptr, src_stride, dst_ptr, dst_stride, src_weight);
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -8,33 +8,31 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "entropy.h"
|
||||
|
||||
const int vp8_mode_contexts[6][4] =
|
||||
{
|
||||
{
|
||||
/* 0 */
|
||||
7, 1, 1, 143,
|
||||
},
|
||||
{
|
||||
/* 1 */
|
||||
14, 18, 14, 107,
|
||||
},
|
||||
{
|
||||
/* 2 */
|
||||
135, 64, 57, 68,
|
||||
},
|
||||
{
|
||||
/* 3 */
|
||||
60, 56, 128, 65,
|
||||
},
|
||||
{
|
||||
/* 4 */
|
||||
159, 134, 128, 34,
|
||||
},
|
||||
{
|
||||
/* 5 */
|
||||
234, 188, 128, 28,
|
||||
},
|
||||
const int vp8_mode_contexts[6][4] = {
|
||||
{
|
||||
/* 0 */
|
||||
7, 1, 1, 143,
|
||||
},
|
||||
{
|
||||
/* 1 */
|
||||
14, 18, 14, 107,
|
||||
},
|
||||
{
|
||||
/* 2 */
|
||||
135, 64, 57, 68,
|
||||
},
|
||||
{
|
||||
/* 3 */
|
||||
60, 56, 128, 65,
|
||||
},
|
||||
{
|
||||
/* 4 */
|
||||
159, 134, 128, 34,
|
||||
},
|
||||
{
|
||||
/* 5 */
|
||||
234, 188, 128, 28,
|
||||
},
|
||||
};
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_MODECONT_H_
|
||||
#define VP8_COMMON_MODECONT_H_
|
||||
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_MV_H_
|
||||
#define VP8_COMMON_MV_H_
|
||||
#include "vpx/vpx_integer.h"
|
||||
@ -17,17 +16,15 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
short row;
|
||||
short col;
|
||||
typedef struct {
|
||||
short row;
|
||||
short col;
|
||||
} MV;
|
||||
|
||||
typedef union int_mv
|
||||
{
|
||||
uint32_t as_int;
|
||||
MV as_mv;
|
||||
} int_mv; /* facilitates faster equality tests and copies */
|
||||
typedef union int_mv {
|
||||
uint32_t as_int;
|
||||
MV as_mv;
|
||||
} int_mv; /* facilitates faster equality tests and copies */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
@ -8,13 +8,11 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ONYX_H_
|
||||
#define VP8_COMMON_ONYX_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "vpx_config.h"
|
||||
@ -24,256 +22,258 @@ extern "C"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "ppflags.h"
|
||||
|
||||
struct VP8_COMP;
|
||||
struct VP8_COMP;
|
||||
|
||||
/* Create/destroy static data structures. */
|
||||
/* Create/destroy static data structures. */
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NORMAL = 0,
|
||||
FOURFIVE = 1,
|
||||
THREEFIVE = 2,
|
||||
ONETWO = 3
|
||||
typedef enum {
|
||||
NORMAL = 0,
|
||||
FOURFIVE = 1,
|
||||
THREEFIVE = 2,
|
||||
ONETWO = 3
|
||||
} VPX_SCALING;
|
||||
|
||||
} VPX_SCALING;
|
||||
typedef enum {
|
||||
USAGE_LOCAL_FILE_PLAYBACK = 0x0,
|
||||
USAGE_STREAM_FROM_SERVER = 0x1,
|
||||
USAGE_CONSTRAINED_QUALITY = 0x2,
|
||||
USAGE_CONSTANT_QUALITY = 0x3
|
||||
} END_USAGE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
USAGE_LOCAL_FILE_PLAYBACK = 0x0,
|
||||
USAGE_STREAM_FROM_SERVER = 0x1,
|
||||
USAGE_CONSTRAINED_QUALITY = 0x2,
|
||||
USAGE_CONSTANT_QUALITY = 0x3
|
||||
} END_USAGE;
|
||||
|
||||
|
||||
typedef enum
|
||||
{
|
||||
MODE_REALTIME = 0x0,
|
||||
MODE_GOODQUALITY = 0x1,
|
||||
MODE_BESTQUALITY = 0x2,
|
||||
MODE_FIRSTPASS = 0x3,
|
||||
MODE_SECONDPASS = 0x4,
|
||||
MODE_SECONDPASS_BEST = 0x5
|
||||
} MODE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
FRAMEFLAGS_KEY = 1,
|
||||
FRAMEFLAGS_GOLDEN = 2,
|
||||
FRAMEFLAGS_ALTREF = 4
|
||||
} FRAMETYPE_FLAGS;
|
||||
typedef enum {
|
||||
MODE_REALTIME = 0x0,
|
||||
MODE_GOODQUALITY = 0x1,
|
||||
MODE_BESTQUALITY = 0x2,
|
||||
MODE_FIRSTPASS = 0x3,
|
||||
MODE_SECONDPASS = 0x4,
|
||||
MODE_SECONDPASS_BEST = 0x5
|
||||
} MODE;
|
||||
|
||||
typedef enum {
|
||||
FRAMEFLAGS_KEY = 1,
|
||||
FRAMEFLAGS_GOLDEN = 2,
|
||||
FRAMEFLAGS_ALTREF = 4
|
||||
} FRAMETYPE_FLAGS;
|
||||
|
||||
#include <assert.h>
|
||||
static INLINE void Scale2Ratio(int mode, int *hr, int *hs)
|
||||
{
|
||||
switch (mode)
|
||||
{
|
||||
case NORMAL:
|
||||
*hr = 1;
|
||||
*hs = 1;
|
||||
break;
|
||||
case FOURFIVE:
|
||||
*hr = 4;
|
||||
*hs = 5;
|
||||
break;
|
||||
case THREEFIVE:
|
||||
*hr = 3;
|
||||
*hs = 5;
|
||||
break;
|
||||
case ONETWO:
|
||||
*hr = 1;
|
||||
*hs = 2;
|
||||
break;
|
||||
default:
|
||||
*hr = 1;
|
||||
*hs = 1;
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
static INLINE void Scale2Ratio(int mode, int *hr, int *hs) {
|
||||
switch (mode) {
|
||||
case NORMAL:
|
||||
*hr = 1;
|
||||
*hs = 1;
|
||||
break;
|
||||
case FOURFIVE:
|
||||
*hr = 4;
|
||||
*hs = 5;
|
||||
break;
|
||||
case THREEFIVE:
|
||||
*hr = 3;
|
||||
*hs = 5;
|
||||
break;
|
||||
case ONETWO:
|
||||
*hr = 1;
|
||||
*hs = 2;
|
||||
break;
|
||||
default:
|
||||
*hr = 1;
|
||||
*hs = 1;
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* 4 versions of bitstream defined:
|
||||
* 0 best quality/slowest decode, 3 lowest quality/fastest decode
|
||||
*/
|
||||
int Version;
|
||||
int Width;
|
||||
int Height;
|
||||
struct vpx_rational timebase;
|
||||
unsigned int target_bandwidth; /* kilobits per second */
|
||||
typedef struct {
|
||||
/* 4 versions of bitstream defined:
|
||||
* 0 best quality/slowest decode, 3 lowest quality/fastest decode
|
||||
*/
|
||||
int Version;
|
||||
int Width;
|
||||
int Height;
|
||||
struct vpx_rational timebase;
|
||||
unsigned int target_bandwidth; /* kilobits per second */
|
||||
|
||||
/* Parameter used for applying denoiser.
|
||||
* For temporal denoiser: noise_sensitivity = 0 means off,
|
||||
* noise_sensitivity = 1 means temporal denoiser on for Y channel only,
|
||||
* noise_sensitivity = 2 means temporal denoiser on for all channels.
|
||||
* noise_sensitivity = 3 means aggressive denoising mode.
|
||||
* noise_sensitivity >= 4 means adaptive denoising mode.
|
||||
* Temporal denoiser is enabled via the configuration option:
|
||||
* CONFIG_TEMPORAL_DENOISING.
|
||||
* For spatial denoiser: noise_sensitivity controls the amount of
|
||||
* pre-processing blur: noise_sensitivity = 0 means off.
|
||||
* Spatial denoiser invoked under !CONFIG_TEMPORAL_DENOISING.
|
||||
*/
|
||||
int noise_sensitivity;
|
||||
/* Parameter used for applying denoiser.
|
||||
* For temporal denoiser: noise_sensitivity = 0 means off,
|
||||
* noise_sensitivity = 1 means temporal denoiser on for Y channel only,
|
||||
* noise_sensitivity = 2 means temporal denoiser on for all channels.
|
||||
* noise_sensitivity = 3 means aggressive denoising mode.
|
||||
* noise_sensitivity >= 4 means adaptive denoising mode.
|
||||
* Temporal denoiser is enabled via the configuration option:
|
||||
* CONFIG_TEMPORAL_DENOISING.
|
||||
* For spatial denoiser: noise_sensitivity controls the amount of
|
||||
* pre-processing blur: noise_sensitivity = 0 means off.
|
||||
* Spatial denoiser invoked under !CONFIG_TEMPORAL_DENOISING.
|
||||
*/
|
||||
int noise_sensitivity;
|
||||
|
||||
/* parameter used for sharpening output: recommendation 0: */
|
||||
int Sharpness;
|
||||
int cpu_used;
|
||||
unsigned int rc_max_intra_bitrate_pct;
|
||||
unsigned int screen_content_mode;
|
||||
/* parameter used for sharpening output: recommendation 0: */
|
||||
int Sharpness;
|
||||
int cpu_used;
|
||||
unsigned int rc_max_intra_bitrate_pct;
|
||||
unsigned int screen_content_mode;
|
||||
|
||||
/* mode ->
|
||||
*(0)=Realtime/Live Encoding. This mode is optimized for realtim
|
||||
* encoding (for example, capturing a television signal or feed
|
||||
* from a live camera). ( speed setting controls how fast )
|
||||
*(1)=Good Quality Fast Encoding. The encoder balances quality with
|
||||
* the amount of time it takes to encode the output. ( speed
|
||||
* setting controls how fast )
|
||||
*(2)=One Pass - Best Quality. The encoder places priority on the
|
||||
* quality of the output over encoding speed. The output is
|
||||
* compressed at the highest possible quality. This option takes
|
||||
* the longest amount of time to encode. ( speed setting ignored
|
||||
* )
|
||||
*(3)=Two Pass - First Pass. The encoder generates a file of
|
||||
* statistics for use in the second encoding pass. ( speed
|
||||
* setting controls how fast )
|
||||
*(4)=Two Pass - Second Pass. The encoder uses the statistics that
|
||||
* were generated in the first encoding pass to create the
|
||||
* compressed output. ( speed setting controls how fast )
|
||||
*(5)=Two Pass - Second Pass Best. The encoder uses the statistics
|
||||
* that were generated in the first encoding pass to create the
|
||||
* compressed output using the highest possible quality, and
|
||||
* taking a longer amount of time to encode.. ( speed setting
|
||||
* ignored )
|
||||
*/
|
||||
int Mode;
|
||||
/* mode ->
|
||||
*(0)=Realtime/Live Encoding. This mode is optimized for realtim
|
||||
* encoding (for example, capturing a television signal or feed
|
||||
* from a live camera). ( speed setting controls how fast )
|
||||
*(1)=Good Quality Fast Encoding. The encoder balances quality with
|
||||
* the amount of time it takes to encode the output. ( speed
|
||||
* setting controls how fast )
|
||||
*(2)=One Pass - Best Quality. The encoder places priority on the
|
||||
* quality of the output over encoding speed. The output is
|
||||
* compressed at the highest possible quality. This option takes
|
||||
* the longest amount of time to encode. ( speed setting ignored
|
||||
* )
|
||||
*(3)=Two Pass - First Pass. The encoder generates a file of
|
||||
* statistics for use in the second encoding pass. ( speed
|
||||
* setting controls how fast )
|
||||
*(4)=Two Pass - Second Pass. The encoder uses the statistics that
|
||||
* were generated in the first encoding pass to create the
|
||||
* compressed output. ( speed setting controls how fast )
|
||||
*(5)=Two Pass - Second Pass Best. The encoder uses the statistics
|
||||
* that were generated in the first encoding pass to create the
|
||||
* compressed output using the highest possible quality, and
|
||||
* taking a longer amount of time to encode.. ( speed setting
|
||||
* ignored )
|
||||
*/
|
||||
int Mode;
|
||||
|
||||
/* Key Framing Operations */
|
||||
int auto_key; /* automatically detect cut scenes */
|
||||
int key_freq; /* maximum distance to key frame. */
|
||||
/* Key Framing Operations */
|
||||
int auto_key; /* automatically detect cut scenes */
|
||||
int key_freq; /* maximum distance to key frame. */
|
||||
|
||||
/* lagged compression (if allow_lag == 0 lag_in_frames is ignored) */
|
||||
int allow_lag;
|
||||
int lag_in_frames; /* how many frames lag before we start encoding */
|
||||
/* lagged compression (if allow_lag == 0 lag_in_frames is ignored) */
|
||||
int allow_lag;
|
||||
int lag_in_frames; /* how many frames lag before we start encoding */
|
||||
|
||||
/*
|
||||
* DATARATE CONTROL OPTIONS
|
||||
*/
|
||||
/*
|
||||
* DATARATE CONTROL OPTIONS
|
||||
*/
|
||||
|
||||
int end_usage; /* vbr or cbr */
|
||||
int end_usage; /* vbr or cbr */
|
||||
|
||||
/* buffer targeting aggressiveness */
|
||||
int under_shoot_pct;
|
||||
int over_shoot_pct;
|
||||
/* buffer targeting aggressiveness */
|
||||
int under_shoot_pct;
|
||||
int over_shoot_pct;
|
||||
|
||||
/* buffering parameters */
|
||||
int64_t starting_buffer_level;
|
||||
int64_t optimal_buffer_level;
|
||||
int64_t maximum_buffer_size;
|
||||
/* buffering parameters */
|
||||
int64_t starting_buffer_level;
|
||||
int64_t optimal_buffer_level;
|
||||
int64_t maximum_buffer_size;
|
||||
|
||||
int64_t starting_buffer_level_in_ms;
|
||||
int64_t optimal_buffer_level_in_ms;
|
||||
int64_t maximum_buffer_size_in_ms;
|
||||
int64_t starting_buffer_level_in_ms;
|
||||
int64_t optimal_buffer_level_in_ms;
|
||||
int64_t maximum_buffer_size_in_ms;
|
||||
|
||||
/* controlling quality */
|
||||
int fixed_q;
|
||||
int worst_allowed_q;
|
||||
int best_allowed_q;
|
||||
int cq_level;
|
||||
/* controlling quality */
|
||||
int fixed_q;
|
||||
int worst_allowed_q;
|
||||
int best_allowed_q;
|
||||
int cq_level;
|
||||
|
||||
/* allow internal resizing */
|
||||
int allow_spatial_resampling;
|
||||
int resample_down_water_mark;
|
||||
int resample_up_water_mark;
|
||||
/* allow internal resizing */
|
||||
int allow_spatial_resampling;
|
||||
int resample_down_water_mark;
|
||||
int resample_up_water_mark;
|
||||
|
||||
/* allow internal frame rate alterations */
|
||||
int allow_df;
|
||||
int drop_frames_water_mark;
|
||||
/* allow internal frame rate alterations */
|
||||
int allow_df;
|
||||
int drop_frames_water_mark;
|
||||
|
||||
/* two pass datarate control */
|
||||
int two_pass_vbrbias;
|
||||
int two_pass_vbrmin_section;
|
||||
int two_pass_vbrmax_section;
|
||||
/* two pass datarate control */
|
||||
int two_pass_vbrbias;
|
||||
int two_pass_vbrmin_section;
|
||||
int two_pass_vbrmax_section;
|
||||
|
||||
/*
|
||||
* END DATARATE CONTROL OPTIONS
|
||||
*/
|
||||
/*
|
||||
* END DATARATE CONTROL OPTIONS
|
||||
*/
|
||||
|
||||
/* these parameters aren't to be used in final build don't use!!! */
|
||||
int play_alternate;
|
||||
int alt_freq;
|
||||
int alt_q;
|
||||
int key_q;
|
||||
int gold_q;
|
||||
/* these parameters aren't to be used in final build don't use!!! */
|
||||
int play_alternate;
|
||||
int alt_freq;
|
||||
int alt_q;
|
||||
int key_q;
|
||||
int gold_q;
|
||||
|
||||
int multi_threaded; /* how many threads to run the encoder on */
|
||||
int token_partitions; /* how many token partitions to create */
|
||||
|
||||
int multi_threaded; /* how many threads to run the encoder on */
|
||||
int token_partitions; /* how many token partitions to create */
|
||||
/* early breakout threshold: for video conf recommend 800 */
|
||||
int encode_breakout;
|
||||
|
||||
/* early breakout threshold: for video conf recommend 800 */
|
||||
int encode_breakout;
|
||||
/* Bitfield defining the error resiliency features to enable.
|
||||
* Can provide decodable frames after losses in previous
|
||||
* frames and decodable partitions after losses in the same frame.
|
||||
*/
|
||||
unsigned int error_resilient_mode;
|
||||
|
||||
/* Bitfield defining the error resiliency features to enable.
|
||||
* Can provide decodable frames after losses in previous
|
||||
* frames and decodable partitions after losses in the same frame.
|
||||
*/
|
||||
unsigned int error_resilient_mode;
|
||||
int arnr_max_frames;
|
||||
int arnr_strength;
|
||||
int arnr_type;
|
||||
|
||||
int arnr_max_frames;
|
||||
int arnr_strength;
|
||||
int arnr_type;
|
||||
vpx_fixed_buf_t two_pass_stats_in;
|
||||
struct vpx_codec_pkt_list *output_pkt_list;
|
||||
|
||||
vpx_fixed_buf_t two_pass_stats_in;
|
||||
struct vpx_codec_pkt_list *output_pkt_list;
|
||||
vp8e_tuning tuning;
|
||||
|
||||
vp8e_tuning tuning;
|
||||
|
||||
/* Temporal scaling parameters */
|
||||
unsigned int number_of_layers;
|
||||
unsigned int target_bitrate[VPX_TS_MAX_PERIODICITY];
|
||||
unsigned int rate_decimator[VPX_TS_MAX_PERIODICITY];
|
||||
unsigned int periodicity;
|
||||
unsigned int layer_id[VPX_TS_MAX_PERIODICITY];
|
||||
/* Temporal scaling parameters */
|
||||
unsigned int number_of_layers;
|
||||
unsigned int target_bitrate[VPX_TS_MAX_PERIODICITY];
|
||||
unsigned int rate_decimator[VPX_TS_MAX_PERIODICITY];
|
||||
unsigned int periodicity;
|
||||
unsigned int layer_id[VPX_TS_MAX_PERIODICITY];
|
||||
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
/* Number of total resolutions encoded */
|
||||
unsigned int mr_total_resolutions;
|
||||
/* Number of total resolutions encoded */
|
||||
unsigned int mr_total_resolutions;
|
||||
|
||||
/* Current encoder ID */
|
||||
unsigned int mr_encoder_id;
|
||||
/* Current encoder ID */
|
||||
unsigned int mr_encoder_id;
|
||||
|
||||
/* Down-sampling factor */
|
||||
vpx_rational_t mr_down_sampling_factor;
|
||||
/* Down-sampling factor */
|
||||
vpx_rational_t mr_down_sampling_factor;
|
||||
|
||||
/* Memory location to store low-resolution encoder's mode info */
|
||||
void* mr_low_res_mode_info;
|
||||
/* Memory location to store low-resolution encoder's mode info */
|
||||
void *mr_low_res_mode_info;
|
||||
#endif
|
||||
} VP8_CONFIG;
|
||||
} VP8_CONFIG;
|
||||
|
||||
void vp8_initialize();
|
||||
|
||||
void vp8_initialize();
|
||||
struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf);
|
||||
void vp8_remove_compressor(struct VP8_COMP **comp);
|
||||
|
||||
struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf);
|
||||
void vp8_remove_compressor(struct VP8_COMP* *comp);
|
||||
void vp8_init_config(struct VP8_COMP *onyx, VP8_CONFIG *oxcf);
|
||||
void vp8_change_config(struct VP8_COMP *onyx, VP8_CONFIG *oxcf);
|
||||
|
||||
void vp8_init_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
|
||||
void vp8_change_config(struct VP8_COMP* onyx, VP8_CONFIG *oxcf);
|
||||
int vp8_receive_raw_frame(struct VP8_COMP *comp, unsigned int frame_flags,
|
||||
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
|
||||
int64_t end_time_stamp);
|
||||
int vp8_get_compressed_data(struct VP8_COMP *comp, unsigned int *frame_flags,
|
||||
unsigned long *size, unsigned char *dest,
|
||||
unsigned char *dest_end, int64_t *time_stamp,
|
||||
int64_t *time_end, int flush);
|
||||
int vp8_get_preview_raw_frame(struct VP8_COMP *comp, YV12_BUFFER_CONFIG *dest,
|
||||
vp8_ppflags_t *flags);
|
||||
|
||||
int vp8_receive_raw_frame(struct VP8_COMP* comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp);
|
||||
int vp8_get_compressed_data(struct VP8_COMP* comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush);
|
||||
int vp8_get_preview_raw_frame(struct VP8_COMP* comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
|
||||
|
||||
int vp8_use_as_reference(struct VP8_COMP* comp, int ref_frame_flags);
|
||||
int vp8_update_reference(struct VP8_COMP* comp, int ref_frame_flags);
|
||||
int vp8_get_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
int vp8_set_reference(struct VP8_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
int vp8_update_entropy(struct VP8_COMP* comp, int update);
|
||||
int vp8_set_roimap(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]);
|
||||
int vp8_set_active_map(struct VP8_COMP* comp, unsigned char *map, unsigned int rows, unsigned int cols);
|
||||
int vp8_set_internal_size(struct VP8_COMP* comp, VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
|
||||
int vp8_get_quantizer(struct VP8_COMP* c);
|
||||
int vp8_use_as_reference(struct VP8_COMP *comp, int ref_frame_flags);
|
||||
int vp8_update_reference(struct VP8_COMP *comp, int ref_frame_flags);
|
||||
int vp8_get_reference(struct VP8_COMP *comp,
|
||||
enum vpx_ref_frame_type ref_frame_flag,
|
||||
YV12_BUFFER_CONFIG *sd);
|
||||
int vp8_set_reference(struct VP8_COMP *comp,
|
||||
enum vpx_ref_frame_type ref_frame_flag,
|
||||
YV12_BUFFER_CONFIG *sd);
|
||||
int vp8_update_entropy(struct VP8_COMP *comp, int update);
|
||||
int vp8_set_roimap(struct VP8_COMP *comp, unsigned char *map, unsigned int rows,
|
||||
unsigned int cols, int delta_q[4], int delta_lf[4],
|
||||
unsigned int threshold[4]);
|
||||
int vp8_set_active_map(struct VP8_COMP *comp, unsigned char *map,
|
||||
unsigned int rows, unsigned int cols);
|
||||
int vp8_set_internal_size(struct VP8_COMP *comp, VPX_SCALING horiz_mode,
|
||||
VPX_SCALING vert_mode);
|
||||
int vp8_get_quantizer(struct VP8_COMP *c);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ONYXC_INT_H_
|
||||
#define VP8_COMMON_ONYXC_INT_H_
|
||||
|
||||
@ -38,144 +37,143 @@ extern "C" {
|
||||
|
||||
#define MAX_PARTITIONS 9
|
||||
|
||||
typedef struct frame_contexts
|
||||
{
|
||||
vp8_prob bmode_prob [VP8_BINTRAMODES-1];
|
||||
vp8_prob ymode_prob [VP8_YMODES-1]; /* interframe intra mode probs */
|
||||
vp8_prob uv_mode_prob [VP8_UV_MODES-1];
|
||||
vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
|
||||
vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
|
||||
MV_CONTEXT mvc[2];
|
||||
typedef struct frame_contexts {
|
||||
vp8_prob bmode_prob[VP8_BINTRAMODES - 1];
|
||||
vp8_prob ymode_prob[VP8_YMODES - 1]; /* interframe intra mode probs */
|
||||
vp8_prob uv_mode_prob[VP8_UV_MODES - 1];
|
||||
vp8_prob sub_mv_ref_prob[VP8_SUBMVREFS - 1];
|
||||
vp8_prob
|
||||
coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
|
||||
MV_CONTEXT mvc[2];
|
||||
} FRAME_CONTEXT;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
ONE_PARTITION = 0,
|
||||
TWO_PARTITION = 1,
|
||||
FOUR_PARTITION = 2,
|
||||
EIGHT_PARTITION = 3
|
||||
typedef enum {
|
||||
ONE_PARTITION = 0,
|
||||
TWO_PARTITION = 1,
|
||||
FOUR_PARTITION = 2,
|
||||
EIGHT_PARTITION = 3
|
||||
} TOKEN_PARTITION;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
RECON_CLAMP_REQUIRED = 0,
|
||||
RECON_CLAMP_NOTREQUIRED = 1
|
||||
typedef enum {
|
||||
RECON_CLAMP_REQUIRED = 0,
|
||||
RECON_CLAMP_NOTREQUIRED = 1
|
||||
} CLAMP_TYPE;
|
||||
|
||||
typedef struct VP8Common
|
||||
|
||||
{
|
||||
struct vpx_internal_error_info error;
|
||||
{
|
||||
struct vpx_internal_error_info error;
|
||||
|
||||
DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]);
|
||||
DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]);
|
||||
DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]);
|
||||
DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]);
|
||||
DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]);
|
||||
DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]);
|
||||
|
||||
int Width;
|
||||
int Height;
|
||||
int horiz_scale;
|
||||
int vert_scale;
|
||||
int Width;
|
||||
int Height;
|
||||
int horiz_scale;
|
||||
int vert_scale;
|
||||
|
||||
CLAMP_TYPE clamp_type;
|
||||
CLAMP_TYPE clamp_type;
|
||||
|
||||
YV12_BUFFER_CONFIG *frame_to_show;
|
||||
YV12_BUFFER_CONFIG *frame_to_show;
|
||||
|
||||
YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
|
||||
int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
|
||||
int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
|
||||
YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
|
||||
int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
|
||||
int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
|
||||
|
||||
YV12_BUFFER_CONFIG temp_scale_frame;
|
||||
YV12_BUFFER_CONFIG temp_scale_frame;
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
YV12_BUFFER_CONFIG post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG post_proc_buffer_int;
|
||||
int post_proc_buffer_int_used;
|
||||
unsigned char *pp_limits_buffer; /* post-processing filter coefficients */
|
||||
YV12_BUFFER_CONFIG post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG post_proc_buffer_int;
|
||||
int post_proc_buffer_int_used;
|
||||
unsigned char *pp_limits_buffer; /* post-processing filter coefficients */
|
||||
#endif
|
||||
|
||||
FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
|
||||
FRAME_TYPE frame_type;
|
||||
FRAME_TYPE
|
||||
last_frame_type; /* Save last frame's frame type for motion search. */
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int show_frame;
|
||||
int show_frame;
|
||||
|
||||
int frame_flags;
|
||||
int MBs;
|
||||
int mb_rows;
|
||||
int mb_cols;
|
||||
int mode_info_stride;
|
||||
int frame_flags;
|
||||
int MBs;
|
||||
int mb_rows;
|
||||
int mb_cols;
|
||||
int mode_info_stride;
|
||||
|
||||
/* profile settings */
|
||||
int mb_no_coeff_skip;
|
||||
int no_lpf;
|
||||
int use_bilinear_mc_filter;
|
||||
int full_pixel;
|
||||
/* profile settings */
|
||||
int mb_no_coeff_skip;
|
||||
int no_lpf;
|
||||
int use_bilinear_mc_filter;
|
||||
int full_pixel;
|
||||
|
||||
int base_qindex;
|
||||
int base_qindex;
|
||||
|
||||
int y1dc_delta_q;
|
||||
int y2dc_delta_q;
|
||||
int y2ac_delta_q;
|
||||
int uvdc_delta_q;
|
||||
int uvac_delta_q;
|
||||
int y1dc_delta_q;
|
||||
int y2dc_delta_q;
|
||||
int y2ac_delta_q;
|
||||
int uvdc_delta_q;
|
||||
int uvac_delta_q;
|
||||
|
||||
/* We allocate a MODE_INFO struct for each macroblock, together with
|
||||
an extra row on top and column on the left to simplify prediction. */
|
||||
/* We allocate a MODE_INFO struct for each macroblock, together with
|
||||
an extra row on top and column on the left to simplify prediction. */
|
||||
|
||||
MODE_INFO *mip; /* Base of allocated array */
|
||||
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
|
||||
MODE_INFO *mip; /* Base of allocated array */
|
||||
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
|
||||
#if CONFIG_ERROR_CONCEALMENT
|
||||
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
|
||||
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
|
||||
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
|
||||
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
|
||||
#endif
|
||||
MODE_INFO *show_frame_mi; /* MODE_INFO for the last decoded frame
|
||||
to show */
|
||||
LOOPFILTERTYPE filter_type;
|
||||
/* MODE_INFO for the last decoded frame to show */
|
||||
MODE_INFO *show_frame_mi;
|
||||
LOOPFILTERTYPE filter_type;
|
||||
|
||||
loop_filter_info_n lf_info;
|
||||
loop_filter_info_n lf_info;
|
||||
|
||||
int filter_level;
|
||||
int last_sharpness_level;
|
||||
int sharpness_level;
|
||||
int filter_level;
|
||||
int last_sharpness_level;
|
||||
int sharpness_level;
|
||||
|
||||
int refresh_last_frame; /* Two state 0 = NO, 1 = YES */
|
||||
int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */
|
||||
int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */
|
||||
int refresh_last_frame; /* Two state 0 = NO, 1 = YES */
|
||||
int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */
|
||||
int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */
|
||||
|
||||
int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */
|
||||
int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */
|
||||
int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */
|
||||
int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */
|
||||
|
||||
int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */
|
||||
int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */
|
||||
|
||||
int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */
|
||||
int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */
|
||||
|
||||
/* Y,U,V,Y2 */
|
||||
ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */
|
||||
ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */
|
||||
/* Y,U,V,Y2 */
|
||||
ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */
|
||||
ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */
|
||||
|
||||
FRAME_CONTEXT lfc; /* last frame entropy */
|
||||
FRAME_CONTEXT fc; /* this frame entropy */
|
||||
FRAME_CONTEXT lfc; /* last frame entropy */
|
||||
FRAME_CONTEXT fc; /* this frame entropy */
|
||||
|
||||
unsigned int current_video_frame;
|
||||
unsigned int current_video_frame;
|
||||
|
||||
int version;
|
||||
int version;
|
||||
|
||||
TOKEN_PARTITION multi_token_partition;
|
||||
TOKEN_PARTITION multi_token_partition;
|
||||
|
||||
#ifdef PACKET_TESTING
|
||||
VP8_HEADER oh;
|
||||
VP8_HEADER oh;
|
||||
#endif
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
double bitrate;
|
||||
double framerate;
|
||||
double bitrate;
|
||||
double framerate;
|
||||
#endif
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
int processor_core_count;
|
||||
int processor_core_count;
|
||||
#endif
|
||||
#if CONFIG_POSTPROC
|
||||
struct postproc_state postproc_state;
|
||||
struct postproc_state postproc_state;
|
||||
#endif
|
||||
int cpu_caps;
|
||||
int cpu_caps;
|
||||
} VP8_COMMON;
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -8,15 +8,12 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ONYXD_H_
|
||||
#define VP8_COMMON_ONYXD_H_
|
||||
|
||||
|
||||
/* Create/destroy static data structures. */
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
extern "C" {
|
||||
#endif
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "ppflags.h"
|
||||
@ -24,40 +21,40 @@ extern "C"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/vp8.h"
|
||||
|
||||
struct VP8D_COMP;
|
||||
struct VP8D_COMP;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int Width;
|
||||
int Height;
|
||||
int Version;
|
||||
int postprocess;
|
||||
int max_threads;
|
||||
int error_concealment;
|
||||
} VP8D_CONFIG;
|
||||
typedef struct {
|
||||
int Width;
|
||||
int Height;
|
||||
int Version;
|
||||
int postprocess;
|
||||
int max_threads;
|
||||
int error_concealment;
|
||||
} VP8D_CONFIG;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
VP8D_OK = 0
|
||||
} VP8D_SETTING;
|
||||
typedef enum { VP8D_OK = 0 } VP8D_SETTING;
|
||||
|
||||
void vp8dx_initialize(void);
|
||||
void vp8dx_initialize(void);
|
||||
|
||||
void vp8dx_set_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst, int x);
|
||||
void vp8dx_set_setting(struct VP8D_COMP *comp, VP8D_SETTING oxst, int x);
|
||||
|
||||
int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst);
|
||||
int vp8dx_get_setting(struct VP8D_COMP *comp, VP8D_SETTING oxst);
|
||||
|
||||
int vp8dx_receive_compressed_data(struct VP8D_COMP* comp,
|
||||
size_t size, const uint8_t *dest,
|
||||
int64_t time_stamp);
|
||||
int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
|
||||
int vp8dx_receive_compressed_data(struct VP8D_COMP *comp, size_t size,
|
||||
const uint8_t *dest, int64_t time_stamp);
|
||||
int vp8dx_get_raw_frame(struct VP8D_COMP *comp, YV12_BUFFER_CONFIG *sd,
|
||||
int64_t *time_stamp, int64_t *time_end_stamp,
|
||||
vp8_ppflags_t *flags);
|
||||
|
||||
vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP *comp,
|
||||
enum vpx_ref_frame_type ref_frame_flag,
|
||||
YV12_BUFFER_CONFIG *sd);
|
||||
vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP *comp,
|
||||
enum vpx_ref_frame_type ref_frame_flag,
|
||||
YV12_BUFFER_CONFIG *sd);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif // VP8_COMMON_ONYXD_H_
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -8,19 +8,17 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_POSTPROC_H_
|
||||
#define VP8_COMMON_POSTPROC_H_
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
struct postproc_state
|
||||
{
|
||||
int last_q;
|
||||
int last_noise;
|
||||
int last_base_qindex;
|
||||
int last_frame_valid;
|
||||
int clamp;
|
||||
int8_t *generated_noise;
|
||||
struct postproc_state {
|
||||
int last_q;
|
||||
int last_noise;
|
||||
int last_base_qindex;
|
||||
int last_frame_valid;
|
||||
int clamp;
|
||||
int8_t *generated_noise;
|
||||
};
|
||||
#include "onyxc_int.h"
|
||||
#include "ppflags.h"
|
||||
@ -31,21 +29,12 @@ extern "C" {
|
||||
int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest,
|
||||
vp8_ppflags_t *flags);
|
||||
|
||||
void vp8_de_noise(struct VP8Common *oci, YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, int flag,
|
||||
int uvfilter);
|
||||
|
||||
void vp8_de_noise(struct VP8Common *oci,
|
||||
YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *post,
|
||||
int q,
|
||||
int low_var_thresh,
|
||||
int flag,
|
||||
int uvfilter);
|
||||
|
||||
void vp8_deblock(struct VP8Common *oci,
|
||||
YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *post,
|
||||
int q,
|
||||
int low_var_thresh,
|
||||
int flag);
|
||||
void vp8_deblock(struct VP8Common *oci, YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, int flag);
|
||||
|
||||
#define MFQE_PRECISION 4
|
||||
|
||||
|
@ -8,38 +8,35 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_PPFLAGS_H_
|
||||
#define VP8_COMMON_PPFLAGS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
enum
|
||||
{
|
||||
VP8D_NOFILTERING = 0,
|
||||
VP8D_DEBLOCK = 1<<0,
|
||||
VP8D_DEMACROBLOCK = 1<<1,
|
||||
VP8D_ADDNOISE = 1<<2,
|
||||
VP8D_DEBUG_TXT_FRAME_INFO = 1<<3,
|
||||
VP8D_DEBUG_TXT_MBLK_MODES = 1<<4,
|
||||
VP8D_DEBUG_TXT_DC_DIFF = 1<<5,
|
||||
VP8D_DEBUG_TXT_RATE_INFO = 1<<6,
|
||||
VP8D_DEBUG_DRAW_MV = 1<<7,
|
||||
VP8D_DEBUG_CLR_BLK_MODES = 1<<8,
|
||||
VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9,
|
||||
VP8D_MFQE = 1<<10
|
||||
enum {
|
||||
VP8D_NOFILTERING = 0,
|
||||
VP8D_DEBLOCK = 1 << 0,
|
||||
VP8D_DEMACROBLOCK = 1 << 1,
|
||||
VP8D_ADDNOISE = 1 << 2,
|
||||
VP8D_DEBUG_TXT_FRAME_INFO = 1 << 3,
|
||||
VP8D_DEBUG_TXT_MBLK_MODES = 1 << 4,
|
||||
VP8D_DEBUG_TXT_DC_DIFF = 1 << 5,
|
||||
VP8D_DEBUG_TXT_RATE_INFO = 1 << 6,
|
||||
VP8D_DEBUG_DRAW_MV = 1 << 7,
|
||||
VP8D_DEBUG_CLR_BLK_MODES = 1 << 8,
|
||||
VP8D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9,
|
||||
VP8D_MFQE = 1 << 10
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int post_proc_flag;
|
||||
int deblocking_level;
|
||||
int noise_level;
|
||||
int display_ref_frame_flag;
|
||||
int display_mb_modes_flag;
|
||||
int display_b_modes_flag;
|
||||
int display_mv_flag;
|
||||
typedef struct {
|
||||
int post_proc_flag;
|
||||
int deblocking_level;
|
||||
int noise_level;
|
||||
int display_ref_frame_flag;
|
||||
int display_mb_modes_flag;
|
||||
int display_b_modes_flag;
|
||||
int display_mv_flag;
|
||||
} vp8_ppflags_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -8,128 +8,117 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "quant_common.h"
|
||||
|
||||
static const int dc_qlookup[QINDEX_RANGE] =
|
||||
{
|
||||
4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
|
||||
18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
|
||||
29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
|
||||
44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
|
||||
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
|
||||
75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
|
||||
91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
|
||||
122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
|
||||
static const int dc_qlookup[QINDEX_RANGE] = {
|
||||
4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17,
|
||||
17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26,
|
||||
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54,
|
||||
55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
|
||||
70, 71, 72, 73, 74, 75, 76, 76, 77, 78, 79, 80, 81, 82, 83,
|
||||
84, 85, 86, 87, 88, 89, 91, 93, 95, 96, 98, 100, 101, 102, 104,
|
||||
106, 108, 110, 112, 114, 116, 118, 122, 124, 126, 128, 130, 132, 134, 136,
|
||||
138, 140, 143, 145, 148, 151, 154, 157,
|
||||
};
|
||||
|
||||
static const int ac_qlookup[QINDEX_RANGE] =
|
||||
{
|
||||
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
||||
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
|
||||
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
|
||||
52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
|
||||
78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
|
||||
110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
|
||||
155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
|
||||
213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
|
||||
static const int ac_qlookup[QINDEX_RANGE] = {
|
||||
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
|
||||
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
|
||||
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
|
||||
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68,
|
||||
70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98,
|
||||
100, 102, 104, 106, 108, 110, 112, 114, 116, 119, 122, 125, 128, 131, 134,
|
||||
137, 140, 143, 146, 149, 152, 155, 158, 161, 164, 167, 170, 173, 177, 181,
|
||||
185, 189, 193, 197, 201, 205, 209, 213, 217, 221, 225, 229, 234, 239, 245,
|
||||
249, 254, 259, 264, 269, 274, 279, 284,
|
||||
};
|
||||
|
||||
int vp8_dc_quant(int QIndex, int Delta) {
|
||||
int retval;
|
||||
|
||||
int vp8_dc_quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = dc_qlookup[ QIndex ];
|
||||
return retval;
|
||||
retval = dc_qlookup[QIndex];
|
||||
return retval;
|
||||
}
|
||||
|
||||
int vp8_dc2quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
int vp8_dc2quant(int QIndex, int Delta) {
|
||||
int retval;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = dc_qlookup[ QIndex ] * 2;
|
||||
return retval;
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = dc_qlookup[QIndex] * 2;
|
||||
return retval;
|
||||
}
|
||||
int vp8_dc_uv_quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
int vp8_dc_uv_quant(int QIndex, int Delta) {
|
||||
int retval;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = dc_qlookup[ QIndex ];
|
||||
retval = dc_qlookup[QIndex];
|
||||
|
||||
if (retval > 132)
|
||||
retval = 132;
|
||||
if (retval > 132) retval = 132;
|
||||
|
||||
return retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
int vp8_ac_yquant(int QIndex)
|
||||
{
|
||||
int retval;
|
||||
int vp8_ac_yquant(int QIndex) {
|
||||
int retval;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = ac_qlookup[ QIndex ];
|
||||
return retval;
|
||||
retval = ac_qlookup[QIndex];
|
||||
return retval;
|
||||
}
|
||||
|
||||
int vp8_ac2quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
int vp8_ac2quant(int QIndex, int Delta) {
|
||||
int retval;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
/* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
|
||||
* The smallest precision for that is '(x*6349) >> 12' but 16 is a good
|
||||
* word size. */
|
||||
retval = (ac_qlookup[ QIndex ] * 101581) >> 16;
|
||||
/* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
|
||||
* The smallest precision for that is '(x*6349) >> 12' but 16 is a good
|
||||
* word size. */
|
||||
retval = (ac_qlookup[QIndex] * 101581) >> 16;
|
||||
|
||||
if (retval < 8)
|
||||
retval = 8;
|
||||
if (retval < 8) retval = 8;
|
||||
|
||||
return retval;
|
||||
return retval;
|
||||
}
|
||||
int vp8_ac_uv_quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
int vp8_ac_uv_quant(int QIndex, int Delta) {
|
||||
int retval;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = ac_qlookup[ QIndex ];
|
||||
return retval;
|
||||
retval = ac_qlookup[QIndex];
|
||||
return retval;
|
||||
}
|
||||
|
@ -11,7 +11,6 @@
|
||||
#ifndef VP8_COMMON_QUANT_COMMON_H_
|
||||
#define VP8_COMMON_QUANT_COMMON_H_
|
||||
|
||||
|
||||
#include "string.h"
|
||||
#include "blockd.h"
|
||||
#include "onyxc_int.h"
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
@ -21,524 +20,474 @@
|
||||
#include "onyxc_int.h"
|
||||
#endif
|
||||
|
||||
void vp8_copy_mem16x16_c(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride)
|
||||
{
|
||||
void vp8_copy_mem16x16_c(unsigned char *src, int src_stride, unsigned char *dst,
|
||||
int dst_stride) {
|
||||
int r;
|
||||
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 16; r++)
|
||||
{
|
||||
memcpy(dst, src, 16);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
}
|
||||
for (r = 0; r < 16; r++) {
|
||||
memcpy(dst, src, 16);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x8_c(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
void vp8_copy_mem8x8_c(unsigned char *src, int src_stride, unsigned char *dst,
|
||||
int dst_stride) {
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 8; r++)
|
||||
{
|
||||
memcpy(dst, src, 8);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
}
|
||||
for (r = 0; r < 8; r++) {
|
||||
memcpy(dst, src, 8);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x4_c(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
void vp8_copy_mem8x4_c(unsigned char *src, int src_stride, unsigned char *dst,
|
||||
int dst_stride) {
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
memcpy(dst, src, 8);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
}
|
||||
for (r = 0; r < 4; r++) {
|
||||
memcpy(dst, src, 8);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre,
|
||||
int pre_stride, vp8_subpix_fn_t sppf) {
|
||||
int r;
|
||||
unsigned char *pred_ptr = d->predictor;
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride +
|
||||
(d->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf)
|
||||
{
|
||||
int r;
|
||||
unsigned char *pred_ptr = d->predictor;
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
pred_ptr[0] = ptr[0];
|
||||
pred_ptr[1] = ptr[1];
|
||||
pred_ptr[2] = ptr[2];
|
||||
pred_ptr[3] = ptr[3];
|
||||
pred_ptr += pitch;
|
||||
ptr += pre_stride;
|
||||
}
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) {
|
||||
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7,
|
||||
pred_ptr, pitch);
|
||||
} else {
|
||||
for (r = 0; r < 4; r++) {
|
||||
pred_ptr[0] = ptr[0];
|
||||
pred_ptr[1] = ptr[1];
|
||||
pred_ptr[2] = ptr[2];
|
||||
pred_ptr[3] = ptr[3];
|
||||
pred_ptr += pitch;
|
||||
ptr += pre_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride)
|
||||
{
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d,
|
||||
unsigned char *dst, int dst_stride,
|
||||
unsigned char *base_pre, int pre_stride) {
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride +
|
||||
(d->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
x->subpixel_predict8x8(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x8(ptr, pre_stride, dst, dst_stride);
|
||||
}
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) {
|
||||
x->subpixel_predict8x8(ptr, pre_stride, d->bmi.mv.as_mv.col & 7,
|
||||
d->bmi.mv.as_mv.row & 7, dst, dst_stride);
|
||||
} else {
|
||||
vp8_copy_mem8x8(ptr, pre_stride, dst, dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride)
|
||||
{
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d,
|
||||
unsigned char *dst, int dst_stride,
|
||||
unsigned char *base_pre, int pre_stride) {
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride +
|
||||
(d->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
x->subpixel_predict8x4(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x4(ptr, pre_stride, dst, dst_stride);
|
||||
}
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) {
|
||||
x->subpixel_predict8x4(ptr, pre_stride, d->bmi.mv.as_mv.col & 7,
|
||||
d->bmi.mv.as_mv.row & 7, dst, dst_stride);
|
||||
} else {
|
||||
vp8_copy_mem8x4(ptr, pre_stride, dst, dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf)
|
||||
{
|
||||
int r;
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst,
|
||||
int dst_stride, unsigned char *base_pre,
|
||||
int pre_stride, vp8_subpix_fn_t sppf) {
|
||||
int r;
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride +
|
||||
(d->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
dst[0] = ptr[0];
|
||||
dst[1] = ptr[1];
|
||||
dst[2] = ptr[2];
|
||||
dst[3] = ptr[3];
|
||||
dst += dst_stride;
|
||||
ptr += pre_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*encoder only*/
|
||||
void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x)
|
||||
{
|
||||
unsigned char *uptr, *vptr;
|
||||
unsigned char *upred_ptr = &x->predictor[256];
|
||||
unsigned char *vpred_ptr = &x->predictor[320];
|
||||
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int offset;
|
||||
int pre_stride = x->pre.uv_stride;
|
||||
|
||||
/* calc uv motion vectors */
|
||||
mv_row += 1 | (mv_row >> (sizeof(int) * CHAR_BIT - 1));
|
||||
mv_col += 1 | (mv_col >> (sizeof(int) * CHAR_BIT - 1));
|
||||
mv_row /= 2;
|
||||
mv_col /= 2;
|
||||
mv_row &= x->fullpixel_mask;
|
||||
mv_col &= x->fullpixel_mask;
|
||||
|
||||
offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
uptr = x->pre.u_buffer + offset;
|
||||
vptr = x->pre.v_buffer + offset;
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
{
|
||||
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
|
||||
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x8(uptr, pre_stride, upred_ptr, 8);
|
||||
vp8_copy_mem8x8(vptr, pre_stride, vpred_ptr, 8);
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) {
|
||||
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst,
|
||||
dst_stride);
|
||||
} else {
|
||||
for (r = 0; r < 4; r++) {
|
||||
dst[0] = ptr[0];
|
||||
dst[1] = ptr[1];
|
||||
dst[2] = ptr[2];
|
||||
dst[3] = ptr[3];
|
||||
dst += dst_stride;
|
||||
ptr += pre_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*encoder only*/
|
||||
void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x)
|
||||
{
|
||||
int i, j;
|
||||
int pre_stride = x->pre.uv_stride;
|
||||
unsigned char *base_pre;
|
||||
void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x) {
|
||||
unsigned char *uptr, *vptr;
|
||||
unsigned char *upred_ptr = &x->predictor[256];
|
||||
unsigned char *vpred_ptr = &x->predictor[320];
|
||||
|
||||
/* build uv mvs */
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
int yoffset = i * 8 + j * 2;
|
||||
int uoffset = 16 + i * 2 + j;
|
||||
int voffset = 20 + i * 2 + j;
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int offset;
|
||||
int pre_stride = x->pre.uv_stride;
|
||||
|
||||
int temp;
|
||||
/* calc uv motion vectors */
|
||||
mv_row += 1 | (mv_row >> (sizeof(int) * CHAR_BIT - 1));
|
||||
mv_col += 1 | (mv_col >> (sizeof(int) * CHAR_BIT - 1));
|
||||
mv_row /= 2;
|
||||
mv_col /= 2;
|
||||
mv_row &= x->fullpixel_mask;
|
||||
mv_col &= x->fullpixel_mask;
|
||||
|
||||
temp = x->block[yoffset ].bmi.mv.as_mv.row
|
||||
+ x->block[yoffset+1].bmi.mv.as_mv.row
|
||||
+ x->block[yoffset+4].bmi.mv.as_mv.row
|
||||
+ x->block[yoffset+5].bmi.mv.as_mv.row;
|
||||
offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
uptr = x->pre.u_buffer + offset;
|
||||
vptr = x->pre.v_buffer + offset;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
temp = x->block[yoffset ].bmi.mv.as_mv.col
|
||||
+ x->block[yoffset+1].bmi.mv.as_mv.col
|
||||
+ x->block[yoffset+4].bmi.mv.as_mv.col
|
||||
+ x->block[yoffset+5].bmi.mv.as_mv.col;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
|
||||
}
|
||||
}
|
||||
|
||||
base_pre = x->pre.u_buffer;
|
||||
for (i = 16; i < 20; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
|
||||
base_pre = x->pre.v_buffer;
|
||||
for (i = 20; i < 24; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
if ((mv_row | mv_col) & 7) {
|
||||
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr,
|
||||
8);
|
||||
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr,
|
||||
8);
|
||||
} else {
|
||||
vp8_copy_mem8x8(uptr, pre_stride, upred_ptr, 8);
|
||||
vp8_copy_mem8x8(vptr, pre_stride, vpred_ptr, 8);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*encoder only*/
|
||||
void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
|
||||
unsigned char *dst_y,
|
||||
int dst_ystride)
|
||||
{
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int pre_stride = x->pre.y_stride;
|
||||
void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x) {
|
||||
int i, j;
|
||||
int pre_stride = x->pre.uv_stride;
|
||||
unsigned char *base_pre;
|
||||
|
||||
ptr_base = x->pre.y_buffer;
|
||||
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
/* build uv mvs */
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
int yoffset = i * 8 + j * 2;
|
||||
int uoffset = 16 + i * 2 + j;
|
||||
int voffset = 20 + i * 2 + j;
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
{
|
||||
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7,
|
||||
dst_y, dst_ystride);
|
||||
int temp;
|
||||
|
||||
temp = x->block[yoffset].bmi.mv.as_mv.row +
|
||||
x->block[yoffset + 1].bmi.mv.as_mv.row +
|
||||
x->block[yoffset + 4].bmi.mv.as_mv.row +
|
||||
x->block[yoffset + 5].bmi.mv.as_mv.row;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
temp = x->block[yoffset].bmi.mv.as_mv.col +
|
||||
x->block[yoffset + 1].bmi.mv.as_mv.col +
|
||||
x->block[yoffset + 4].bmi.mv.as_mv.col +
|
||||
x->block[yoffset + 5].bmi.mv.as_mv.col;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem16x16(ptr, pre_stride, dst_y,
|
||||
dst_ystride);
|
||||
}
|
||||
|
||||
base_pre = x->pre.u_buffer;
|
||||
for (i = 16; i < 20; i += 2) {
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i + 1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
|
||||
else {
|
||||
vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride,
|
||||
x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride,
|
||||
x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
|
||||
base_pre = x->pre.v_buffer;
|
||||
for (i = 20; i < 24; i += 2) {
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i + 1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
|
||||
else {
|
||||
vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride,
|
||||
x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride,
|
||||
x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
/* If the MV points so far into the UMV border that no visible pixels
|
||||
* are used for reconstruction, the subpel part of the MV can be
|
||||
* discarded and the MV limited to 16 pixels with equivalent results.
|
||||
*
|
||||
* This limit kicks in at 19 pixels for the top and left edges, for
|
||||
* the 16 pixels plus 3 taps right of the central pixel when subpel
|
||||
* filtering. The bottom and right edges use 16 pixels plus 2 pixels
|
||||
* left of the central pixel when filtering.
|
||||
*/
|
||||
if (mv->col < (xd->mb_to_left_edge - (19 << 3)))
|
||||
mv->col = xd->mb_to_left_edge - (16 << 3);
|
||||
else if (mv->col > xd->mb_to_right_edge + (18 << 3))
|
||||
mv->col = xd->mb_to_right_edge + (16 << 3);
|
||||
/*encoder only*/
|
||||
void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, unsigned char *dst_y,
|
||||
int dst_ystride) {
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int pre_stride = x->pre.y_stride;
|
||||
|
||||
if (mv->row < (xd->mb_to_top_edge - (19 << 3)))
|
||||
mv->row = xd->mb_to_top_edge - (16 << 3);
|
||||
else if (mv->row > xd->mb_to_bottom_edge + (18 << 3))
|
||||
mv->row = xd->mb_to_bottom_edge + (16 << 3);
|
||||
ptr_base = x->pre.y_buffer;
|
||||
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
|
||||
if ((mv_row | mv_col) & 7) {
|
||||
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_y,
|
||||
dst_ystride);
|
||||
} else {
|
||||
vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
|
||||
}
|
||||
}
|
||||
|
||||
static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
|
||||
/* If the MV points so far into the UMV border that no visible pixels
|
||||
* are used for reconstruction, the subpel part of the MV can be
|
||||
* discarded and the MV limited to 16 pixels with equivalent results.
|
||||
*
|
||||
* This limit kicks in at 19 pixels for the top and left edges, for
|
||||
* the 16 pixels plus 3 taps right of the central pixel when subpel
|
||||
* filtering. The bottom and right edges use 16 pixels plus 2 pixels
|
||||
* left of the central pixel when filtering.
|
||||
*/
|
||||
if (mv->col < (xd->mb_to_left_edge - (19 << 3)))
|
||||
mv->col = xd->mb_to_left_edge - (16 << 3);
|
||||
else if (mv->col > xd->mb_to_right_edge + (18 << 3))
|
||||
mv->col = xd->mb_to_right_edge + (16 << 3);
|
||||
|
||||
if (mv->row < (xd->mb_to_top_edge - (19 << 3)))
|
||||
mv->row = xd->mb_to_top_edge - (16 << 3);
|
||||
else if (mv->row > xd->mb_to_bottom_edge + (18 << 3))
|
||||
mv->row = xd->mb_to_bottom_edge + (16 << 3);
|
||||
}
|
||||
|
||||
/* A version of the above function for chroma block MVs.*/
|
||||
static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ?
|
||||
(xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col;
|
||||
mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ?
|
||||
(xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col;
|
||||
static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
|
||||
mv->col = (2 * mv->col < (xd->mb_to_left_edge - (19 << 3)))
|
||||
? (xd->mb_to_left_edge - (16 << 3)) >> 1
|
||||
: mv->col;
|
||||
mv->col = (2 * mv->col > xd->mb_to_right_edge + (18 << 3))
|
||||
? (xd->mb_to_right_edge + (16 << 3)) >> 1
|
||||
: mv->col;
|
||||
|
||||
mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ?
|
||||
(xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row;
|
||||
mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ?
|
||||
(xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
|
||||
mv->row = (2 * mv->row < (xd->mb_to_top_edge - (19 << 3)))
|
||||
? (xd->mb_to_top_edge - (16 << 3)) >> 1
|
||||
: mv->row;
|
||||
mv->row = (2 * mv->row > xd->mb_to_bottom_edge + (18 << 3))
|
||||
? (xd->mb_to_bottom_edge + (16 << 3)) >> 1
|
||||
: mv->row;
|
||||
}
|
||||
|
||||
void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
|
||||
unsigned char *dst_y,
|
||||
void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, unsigned char *dst_y,
|
||||
unsigned char *dst_u,
|
||||
unsigned char *dst_v,
|
||||
int dst_ystride,
|
||||
int dst_uvstride)
|
||||
{
|
||||
int offset;
|
||||
unsigned char *ptr;
|
||||
unsigned char *uptr, *vptr;
|
||||
unsigned char *dst_v, int dst_ystride,
|
||||
int dst_uvstride) {
|
||||
int offset;
|
||||
unsigned char *ptr;
|
||||
unsigned char *uptr, *vptr;
|
||||
|
||||
int_mv _16x16mv;
|
||||
int_mv _16x16mv;
|
||||
|
||||
unsigned char *ptr_base = x->pre.y_buffer;
|
||||
int pre_stride = x->pre.y_stride;
|
||||
unsigned char *ptr_base = x->pre.y_buffer;
|
||||
int pre_stride = x->pre.y_stride;
|
||||
|
||||
_16x16mv.as_int = x->mode_info_context->mbmi.mv.as_int;
|
||||
_16x16mv.as_int = x->mode_info_context->mbmi.mv.as_int;
|
||||
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
|
||||
{
|
||||
clamp_mv_to_umv_border(&_16x16mv.as_mv, x);
|
||||
}
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs) {
|
||||
clamp_mv_to_umv_border(&_16x16mv.as_mv, x);
|
||||
}
|
||||
|
||||
ptr = ptr_base + ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
|
||||
ptr = ptr_base + (_16x16mv.as_mv.row >> 3) * pre_stride +
|
||||
(_16x16mv.as_mv.col >> 3);
|
||||
|
||||
if ( _16x16mv.as_int & 0x00070007)
|
||||
{
|
||||
x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_y, dst_ystride);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
|
||||
}
|
||||
if (_16x16mv.as_int & 0x00070007) {
|
||||
x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7,
|
||||
_16x16mv.as_mv.row & 7, dst_y, dst_ystride);
|
||||
} else {
|
||||
vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
|
||||
}
|
||||
|
||||
/* calc uv motion vectors */
|
||||
_16x16mv.as_mv.row += 1 | (_16x16mv.as_mv.row >> (sizeof(int) * CHAR_BIT - 1));
|
||||
_16x16mv.as_mv.col += 1 | (_16x16mv.as_mv.col >> (sizeof(int) * CHAR_BIT - 1));
|
||||
_16x16mv.as_mv.row /= 2;
|
||||
_16x16mv.as_mv.col /= 2;
|
||||
_16x16mv.as_mv.row &= x->fullpixel_mask;
|
||||
_16x16mv.as_mv.col &= x->fullpixel_mask;
|
||||
/* calc uv motion vectors */
|
||||
_16x16mv.as_mv.row +=
|
||||
1 | (_16x16mv.as_mv.row >> (sizeof(int) * CHAR_BIT - 1));
|
||||
_16x16mv.as_mv.col +=
|
||||
1 | (_16x16mv.as_mv.col >> (sizeof(int) * CHAR_BIT - 1));
|
||||
_16x16mv.as_mv.row /= 2;
|
||||
_16x16mv.as_mv.col /= 2;
|
||||
_16x16mv.as_mv.row &= x->fullpixel_mask;
|
||||
_16x16mv.as_mv.col &= x->fullpixel_mask;
|
||||
|
||||
pre_stride >>= 1;
|
||||
offset = ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
|
||||
uptr = x->pre.u_buffer + offset;
|
||||
vptr = x->pre.v_buffer + offset;
|
||||
pre_stride >>= 1;
|
||||
offset = (_16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
|
||||
uptr = x->pre.u_buffer + offset;
|
||||
vptr = x->pre.v_buffer + offset;
|
||||
|
||||
if ( _16x16mv.as_int & 0x00070007)
|
||||
{
|
||||
x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_u, dst_uvstride);
|
||||
x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_v, dst_uvstride);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride);
|
||||
vp8_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride);
|
||||
}
|
||||
if (_16x16mv.as_int & 0x00070007) {
|
||||
x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7,
|
||||
_16x16mv.as_mv.row & 7, dst_u, dst_uvstride);
|
||||
x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7,
|
||||
_16x16mv.as_mv.row & 7, dst_v, dst_uvstride);
|
||||
} else {
|
||||
vp8_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride);
|
||||
vp8_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride);
|
||||
}
|
||||
}
|
||||
|
||||
static void build_inter4x4_predictors_mb(MACROBLOCKD *x)
|
||||
{
|
||||
int i;
|
||||
unsigned char *base_dst = x->dst.y_buffer;
|
||||
unsigned char *base_pre = x->pre.y_buffer;
|
||||
static void build_inter4x4_predictors_mb(MACROBLOCKD *x) {
|
||||
int i;
|
||||
unsigned char *base_dst = x->dst.y_buffer;
|
||||
unsigned char *base_pre = x->pre.y_buffer;
|
||||
|
||||
if (x->mode_info_context->mbmi.partitioning < 3)
|
||||
{
|
||||
BLOCKD *b;
|
||||
int dst_stride = x->dst.y_stride;
|
||||
if (x->mode_info_context->mbmi.partitioning < 3) {
|
||||
BLOCKD *b;
|
||||
int dst_stride = x->dst.y_stride;
|
||||
|
||||
x->block[ 0].bmi = x->mode_info_context->bmi[ 0];
|
||||
x->block[ 2].bmi = x->mode_info_context->bmi[ 2];
|
||||
x->block[ 8].bmi = x->mode_info_context->bmi[ 8];
|
||||
x->block[10].bmi = x->mode_info_context->bmi[10];
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
|
||||
{
|
||||
clamp_mv_to_umv_border(&x->block[ 0].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[ 2].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[ 8].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x);
|
||||
}
|
||||
|
||||
b = &x->block[ 0];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
|
||||
b = &x->block[ 2];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
|
||||
b = &x->block[ 8];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
|
||||
b = &x->block[10];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 16; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
int dst_stride = x->dst.y_stride;
|
||||
|
||||
x->block[i+0].bmi = x->mode_info_context->bmi[i+0];
|
||||
x->block[i+1].bmi = x->mode_info_context->bmi[i+1];
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
|
||||
{
|
||||
clamp_mv_to_umv_border(&x->block[i+0].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[i+1].bmi.mv.as_mv, x);
|
||||
}
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
|
||||
else
|
||||
{
|
||||
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
base_dst = x->dst.u_buffer;
|
||||
base_pre = x->pre.u_buffer;
|
||||
for (i = 16; i < 20; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
int dst_stride = x->dst.uv_stride;
|
||||
|
||||
/* Note: uv mvs already clamped in build_4x4uvmvs() */
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
|
||||
else
|
||||
{
|
||||
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
}
|
||||
x->block[0].bmi = x->mode_info_context->bmi[0];
|
||||
x->block[2].bmi = x->mode_info_context->bmi[2];
|
||||
x->block[8].bmi = x->mode_info_context->bmi[8];
|
||||
x->block[10].bmi = x->mode_info_context->bmi[10];
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs) {
|
||||
clamp_mv_to_umv_border(&x->block[0].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[2].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[8].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x);
|
||||
}
|
||||
|
||||
base_dst = x->dst.v_buffer;
|
||||
base_pre = x->pre.v_buffer;
|
||||
for (i = 20; i < 24; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
int dst_stride = x->dst.uv_stride;
|
||||
b = &x->block[0];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre,
|
||||
dst_stride);
|
||||
b = &x->block[2];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre,
|
||||
dst_stride);
|
||||
b = &x->block[8];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre,
|
||||
dst_stride);
|
||||
b = &x->block[10];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre,
|
||||
dst_stride);
|
||||
} else {
|
||||
for (i = 0; i < 16; i += 2) {
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i + 1];
|
||||
int dst_stride = x->dst.y_stride;
|
||||
|
||||
/* Note: uv mvs already clamped in build_4x4uvmvs() */
|
||||
x->block[i + 0].bmi = x->mode_info_context->bmi[i + 0];
|
||||
x->block[i + 1].bmi = x->mode_info_context->bmi[i + 1];
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs) {
|
||||
clamp_mv_to_umv_border(&x->block[i + 0].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[i + 1].bmi.mv.as_mv, x);
|
||||
}
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
|
||||
else
|
||||
{
|
||||
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
}
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride,
|
||||
base_pre, dst_stride);
|
||||
else {
|
||||
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride,
|
||||
base_pre, dst_stride, x->subpixel_predict);
|
||||
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride,
|
||||
base_pre, dst_stride, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
}
|
||||
base_dst = x->dst.u_buffer;
|
||||
base_pre = x->pre.u_buffer;
|
||||
for (i = 16; i < 20; i += 2) {
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i + 1];
|
||||
int dst_stride = x->dst.uv_stride;
|
||||
|
||||
/* Note: uv mvs already clamped in build_4x4uvmvs() */
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride,
|
||||
base_pre, dst_stride);
|
||||
else {
|
||||
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre,
|
||||
dst_stride, x->subpixel_predict);
|
||||
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre,
|
||||
dst_stride, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
|
||||
base_dst = x->dst.v_buffer;
|
||||
base_pre = x->pre.v_buffer;
|
||||
for (i = 20; i < 24; i += 2) {
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i + 1];
|
||||
int dst_stride = x->dst.uv_stride;
|
||||
|
||||
/* Note: uv mvs already clamped in build_4x4uvmvs() */
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride,
|
||||
base_pre, dst_stride);
|
||||
else {
|
||||
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre,
|
||||
dst_stride, x->subpixel_predict);
|
||||
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre,
|
||||
dst_stride, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void build_4x4uvmvs(MACROBLOCKD *x)
|
||||
{
|
||||
int i, j;
|
||||
static void build_4x4uvmvs(MACROBLOCKD *x) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
int yoffset = i * 8 + j * 2;
|
||||
int uoffset = 16 + i * 2 + j;
|
||||
int voffset = 20 + i * 2 + j;
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
int yoffset = i * 8 + j * 2;
|
||||
int uoffset = 16 + i * 2 + j;
|
||||
int voffset = 20 + i * 2 + j;
|
||||
|
||||
int temp;
|
||||
int temp;
|
||||
|
||||
temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row
|
||||
+ x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row
|
||||
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row
|
||||
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row;
|
||||
temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row +
|
||||
x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row +
|
||||
x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row +
|
||||
x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
|
||||
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col
|
||||
+ x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col
|
||||
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col
|
||||
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col;
|
||||
temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col +
|
||||
x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col +
|
||||
x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col +
|
||||
x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
|
||||
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
|
||||
clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x);
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
|
||||
clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x);
|
||||
|
||||
x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
|
||||
}
|
||||
x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_build_inter_predictors_mb(MACROBLOCKD *xd)
|
||||
{
|
||||
if (xd->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer,
|
||||
xd->dst.u_buffer, xd->dst.v_buffer,
|
||||
xd->dst.y_stride, xd->dst.uv_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
build_4x4uvmvs(xd);
|
||||
build_inter4x4_predictors_mb(xd);
|
||||
}
|
||||
void vp8_build_inter_predictors_mb(MACROBLOCKD *xd) {
|
||||
if (xd->mode_info_context->mbmi.mode != SPLITMV) {
|
||||
vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer,
|
||||
xd->dst.v_buffer, xd->dst.y_stride,
|
||||
xd->dst.uv_stride);
|
||||
} else {
|
||||
build_4x4uvmvs(xd);
|
||||
build_inter4x4_predictors_mb(xd);
|
||||
}
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_RECONINTER_H_
|
||||
#define VP8_COMMON_RECONINTER_H_
|
||||
|
||||
@ -17,21 +16,16 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x);
|
||||
extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
|
||||
unsigned char *dst_y,
|
||||
unsigned char *dst_u,
|
||||
unsigned char *dst_v,
|
||||
int dst_ystride,
|
||||
int dst_uvstride);
|
||||
|
||||
extern void vp8_build_inter16x16_predictors_mb(
|
||||
MACROBLOCKD *x, unsigned char *dst_y, unsigned char *dst_u,
|
||||
unsigned char *dst_v, int dst_ystride, int dst_uvstride);
|
||||
|
||||
extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
|
||||
unsigned char *dst_y,
|
||||
int dst_ystride);
|
||||
extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch,
|
||||
unsigned char *base_pre,
|
||||
int pre_stride,
|
||||
vp8_subpix_fn_t sppf);
|
||||
int pre_stride, vp8_subpix_fn_t sppf);
|
||||
|
||||
extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x);
|
||||
extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x);
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
@ -19,9 +18,9 @@
|
||||
#include "vp8/common/reconintra4x4.h"
|
||||
|
||||
enum {
|
||||
SIZE_16,
|
||||
SIZE_8,
|
||||
NUM_SIZES,
|
||||
SIZE_16,
|
||||
SIZE_8,
|
||||
NUM_SIZES,
|
||||
};
|
||||
|
||||
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
|
||||
@ -30,88 +29,68 @@ typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
|
||||
static intra_pred_fn pred[4][NUM_SIZES];
|
||||
static intra_pred_fn dc_pred[2][2][NUM_SIZES];
|
||||
|
||||
static void vp8_init_intra_predictors_internal(void)
|
||||
{
|
||||
#define INIT_SIZE(sz) \
|
||||
pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \
|
||||
pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \
|
||||
pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \
|
||||
\
|
||||
dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \
|
||||
dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \
|
||||
dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \
|
||||
dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz
|
||||
static void vp8_init_intra_predictors_internal(void) {
|
||||
#define INIT_SIZE(sz) \
|
||||
pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \
|
||||
pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \
|
||||
pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \
|
||||
\
|
||||
dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \
|
||||
dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \
|
||||
dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \
|
||||
dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz
|
||||
|
||||
INIT_SIZE(16);
|
||||
INIT_SIZE(8);
|
||||
vp8_init_intra4x4_predictors_internal();
|
||||
INIT_SIZE(16);
|
||||
INIT_SIZE(8);
|
||||
vp8_init_intra4x4_predictors_internal();
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
|
||||
unsigned char * yabove_row,
|
||||
unsigned char * yleft,
|
||||
int left_stride,
|
||||
unsigned char * ypred_ptr,
|
||||
int y_stride)
|
||||
{
|
||||
MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode;
|
||||
DECLARE_ALIGNED(16, uint8_t, yleft_col[16]);
|
||||
int i;
|
||||
intra_pred_fn fn;
|
||||
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, unsigned char *yabove_row,
|
||||
unsigned char *yleft, int left_stride,
|
||||
unsigned char *ypred_ptr, int y_stride) {
|
||||
MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode;
|
||||
DECLARE_ALIGNED(16, uint8_t, yleft_col[16]);
|
||||
int i;
|
||||
intra_pred_fn fn;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
yleft_col[i] = yleft[i* left_stride];
|
||||
}
|
||||
for (i = 0; i < 16; i++) {
|
||||
yleft_col[i] = yleft[i * left_stride];
|
||||
}
|
||||
|
||||
if (mode == DC_PRED)
|
||||
{
|
||||
fn = dc_pred[x->left_available][x->up_available][SIZE_16];
|
||||
}
|
||||
else
|
||||
{
|
||||
fn = pred[mode][SIZE_16];
|
||||
}
|
||||
if (mode == DC_PRED) {
|
||||
fn = dc_pred[x->left_available][x->up_available][SIZE_16];
|
||||
} else {
|
||||
fn = pred[mode][SIZE_16];
|
||||
}
|
||||
|
||||
fn(ypred_ptr, y_stride, yabove_row, yleft_col);
|
||||
fn(ypred_ptr, y_stride, yabove_row, yleft_col);
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
|
||||
unsigned char * uabove_row,
|
||||
unsigned char * vabove_row,
|
||||
unsigned char * uleft,
|
||||
unsigned char * vleft,
|
||||
int left_stride,
|
||||
unsigned char * upred_ptr,
|
||||
unsigned char * vpred_ptr,
|
||||
int pred_stride)
|
||||
{
|
||||
MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode;
|
||||
unsigned char uleft_col[8];
|
||||
unsigned char vleft_col[8];
|
||||
int i;
|
||||
intra_pred_fn fn;
|
||||
void vp8_build_intra_predictors_mbuv_s(
|
||||
MACROBLOCKD *x, unsigned char *uabove_row, unsigned char *vabove_row,
|
||||
unsigned char *uleft, unsigned char *vleft, int left_stride,
|
||||
unsigned char *upred_ptr, unsigned char *vpred_ptr, int pred_stride) {
|
||||
MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode;
|
||||
unsigned char uleft_col[8];
|
||||
unsigned char vleft_col[8];
|
||||
int i;
|
||||
intra_pred_fn fn;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
uleft_col[i] = uleft[i * left_stride];
|
||||
vleft_col[i] = vleft[i * left_stride];
|
||||
}
|
||||
for (i = 0; i < 8; i++) {
|
||||
uleft_col[i] = uleft[i * left_stride];
|
||||
vleft_col[i] = vleft[i * left_stride];
|
||||
}
|
||||
|
||||
if (uvmode == DC_PRED)
|
||||
{
|
||||
fn = dc_pred[x->left_available][x->up_available][SIZE_8];
|
||||
}
|
||||
else
|
||||
{
|
||||
fn = pred[uvmode][SIZE_8];
|
||||
}
|
||||
if (uvmode == DC_PRED) {
|
||||
fn = dc_pred[x->left_available][x->up_available][SIZE_8];
|
||||
} else {
|
||||
fn = pred[uvmode][SIZE_8];
|
||||
}
|
||||
|
||||
fn(upred_ptr, pred_stride, uabove_row, uleft_col);
|
||||
fn(vpred_ptr, pred_stride, vabove_row, vleft_col);
|
||||
fn(upred_ptr, pred_stride, uabove_row, uleft_col);
|
||||
fn(vpred_ptr, pred_stride, vabove_row, vleft_col);
|
||||
}
|
||||
|
||||
void vp8_init_intra_predictors(void)
|
||||
{
|
||||
once(vp8_init_intra_predictors_internal);
|
||||
void vp8_init_intra_predictors(void) {
|
||||
once(vp8_init_intra_predictors_internal);
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_RECONINTRA_H_
|
||||
#define VP8_COMMON_RECONINTRA_H_
|
||||
|
||||
@ -18,22 +17,14 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
|
||||
unsigned char *yabove_row,
|
||||
unsigned char *yleft,
|
||||
int left_stride,
|
||||
unsigned char *ypred_ptr,
|
||||
int y_stride);
|
||||
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, unsigned char *yabove_row,
|
||||
unsigned char *yleft, int left_stride,
|
||||
unsigned char *ypred_ptr, int y_stride);
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
|
||||
unsigned char * uabove_row,
|
||||
unsigned char * vabove_row,
|
||||
unsigned char * uleft,
|
||||
unsigned char * vleft,
|
||||
int left_stride,
|
||||
unsigned char * upred_ptr,
|
||||
unsigned char * vpred_ptr,
|
||||
int pred_stride);
|
||||
void vp8_build_intra_predictors_mbuv_s(
|
||||
MACROBLOCKD *x, unsigned char *uabove_row, unsigned char *vabove_row,
|
||||
unsigned char *uleft, unsigned char *vleft, int left_stride,
|
||||
unsigned char *upred_ptr, unsigned char *vpred_ptr, int pred_stride);
|
||||
|
||||
void vp8_init_intra_predictors(void);
|
||||
|
||||
|
@ -21,35 +21,32 @@ typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
|
||||
|
||||
static intra_pred_fn pred[10];
|
||||
|
||||
void vp8_init_intra4x4_predictors_internal(void)
|
||||
{
|
||||
pred[B_DC_PRED] = vpx_dc_predictor_4x4;
|
||||
pred[B_TM_PRED] = vpx_tm_predictor_4x4;
|
||||
pred[B_VE_PRED] = vpx_ve_predictor_4x4;
|
||||
pred[B_HE_PRED] = vpx_he_predictor_4x4;
|
||||
pred[B_LD_PRED] = vpx_d45e_predictor_4x4;
|
||||
pred[B_RD_PRED] = vpx_d135_predictor_4x4;
|
||||
pred[B_VR_PRED] = vpx_d117_predictor_4x4;
|
||||
pred[B_VL_PRED] = vpx_d63f_predictor_4x4;
|
||||
pred[B_HD_PRED] = vpx_d153_predictor_4x4;
|
||||
pred[B_HU_PRED] = vpx_d207_predictor_4x4;
|
||||
void vp8_init_intra4x4_predictors_internal(void) {
|
||||
pred[B_DC_PRED] = vpx_dc_predictor_4x4;
|
||||
pred[B_TM_PRED] = vpx_tm_predictor_4x4;
|
||||
pred[B_VE_PRED] = vpx_ve_predictor_4x4;
|
||||
pred[B_HE_PRED] = vpx_he_predictor_4x4;
|
||||
pred[B_LD_PRED] = vpx_d45e_predictor_4x4;
|
||||
pred[B_RD_PRED] = vpx_d135_predictor_4x4;
|
||||
pred[B_VR_PRED] = vpx_d117_predictor_4x4;
|
||||
pred[B_VL_PRED] = vpx_d63f_predictor_4x4;
|
||||
pred[B_HD_PRED] = vpx_d153_predictor_4x4;
|
||||
pred[B_HU_PRED] = vpx_d207_predictor_4x4;
|
||||
}
|
||||
|
||||
void vp8_intra4x4_predict(unsigned char *above,
|
||||
unsigned char *yleft, int left_stride,
|
||||
B_PREDICTION_MODE b_mode,
|
||||
void vp8_intra4x4_predict(unsigned char *above, unsigned char *yleft,
|
||||
int left_stride, B_PREDICTION_MODE b_mode,
|
||||
unsigned char *dst, int dst_stride,
|
||||
unsigned char top_left)
|
||||
{
|
||||
unsigned char Left[4];
|
||||
unsigned char Aboveb[12], *Above = Aboveb + 4;
|
||||
unsigned char top_left) {
|
||||
unsigned char Left[4];
|
||||
unsigned char Aboveb[12], *Above = Aboveb + 4;
|
||||
|
||||
Left[0] = yleft[0];
|
||||
Left[1] = yleft[left_stride];
|
||||
Left[2] = yleft[2 * left_stride];
|
||||
Left[3] = yleft[3 * left_stride];
|
||||
memcpy(Above, above, 8);
|
||||
Above[-1] = top_left;
|
||||
Left[0] = yleft[0];
|
||||
Left[1] = yleft[left_stride];
|
||||
Left[2] = yleft[2 * left_stride];
|
||||
Left[3] = yleft[3 * left_stride];
|
||||
memcpy(Above, above, 8);
|
||||
Above[-1] = top_left;
|
||||
|
||||
pred[b_mode](dst, dst_stride, Above, Left);
|
||||
pred[b_mode](dst, dst_stride, Above, Left);
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_RECONINTRA4X4_H_
|
||||
#define VP8_COMMON_RECONINTRA4X4_H_
|
||||
#include "vp8/common/blockd.h"
|
||||
@ -18,24 +17,22 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
static INLINE void intra_prediction_down_copy(MACROBLOCKD *xd,
|
||||
unsigned char *above_right_src)
|
||||
{
|
||||
int dst_stride = xd->dst.y_stride;
|
||||
unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16;
|
||||
unsigned char *above_right_src) {
|
||||
int dst_stride = xd->dst.y_stride;
|
||||
unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16;
|
||||
|
||||
unsigned int *src_ptr = (unsigned int *)above_right_src;
|
||||
unsigned int *dst_ptr0 = (unsigned int *)(above_right_dst + 4 * dst_stride);
|
||||
unsigned int *dst_ptr1 = (unsigned int *)(above_right_dst + 8 * dst_stride);
|
||||
unsigned int *dst_ptr2 = (unsigned int *)(above_right_dst + 12 * dst_stride);
|
||||
unsigned int *src_ptr = (unsigned int *)above_right_src;
|
||||
unsigned int *dst_ptr0 = (unsigned int *)(above_right_dst + 4 * dst_stride);
|
||||
unsigned int *dst_ptr1 = (unsigned int *)(above_right_dst + 8 * dst_stride);
|
||||
unsigned int *dst_ptr2 = (unsigned int *)(above_right_dst + 12 * dst_stride);
|
||||
|
||||
*dst_ptr0 = *src_ptr;
|
||||
*dst_ptr1 = *src_ptr;
|
||||
*dst_ptr2 = *src_ptr;
|
||||
*dst_ptr0 = *src_ptr;
|
||||
*dst_ptr1 = *src_ptr;
|
||||
*dst_ptr2 = *src_ptr;
|
||||
}
|
||||
|
||||
void vp8_intra4x4_predict(unsigned char *Above,
|
||||
unsigned char *yleft, int left_stride,
|
||||
B_PREDICTION_MODE b_mode,
|
||||
void vp8_intra4x4_predict(unsigned char *Above, unsigned char *yleft,
|
||||
int left_stride, B_PREDICTION_MODE b_mode,
|
||||
unsigned char *dst, int dst_stride,
|
||||
unsigned char top_left);
|
||||
|
||||
|
@ -12,8 +12,4 @@
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_ports/vpx_once.h"
|
||||
|
||||
|
||||
void vp8_rtcd()
|
||||
{
|
||||
once(setup_rtcd_internal);
|
||||
}
|
||||
void vp8_rtcd() { once(setup_rtcd_internal); }
|
||||
|
@ -8,32 +8,28 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "setupintrarecon.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
|
||||
{
|
||||
int i;
|
||||
void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf) {
|
||||
int i;
|
||||
|
||||
/* set up frame new frame for intra coded blocks */
|
||||
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
|
||||
for (i = 0; i < ybf->y_height; i++)
|
||||
ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129;
|
||||
/* set up frame new frame for intra coded blocks */
|
||||
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
|
||||
for (i = 0; i < ybf->y_height; i++)
|
||||
ybf->y_buffer[ybf->y_stride * i - 1] = (unsigned char)129;
|
||||
|
||||
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
for (i = 0; i < ybf->uv_height; i++)
|
||||
ybf->u_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
|
||||
|
||||
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
for (i = 0; i < ybf->uv_height; i++)
|
||||
ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
|
||||
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
for (i = 0; i < ybf->uv_height; i++)
|
||||
ybf->u_buffer[ybf->uv_stride * i - 1] = (unsigned char)129;
|
||||
|
||||
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
for (i = 0; i < ybf->uv_height; i++)
|
||||
ybf->v_buffer[ybf->uv_stride * i - 1] = (unsigned char)129;
|
||||
}
|
||||
|
||||
void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf)
|
||||
{
|
||||
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
|
||||
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf) {
|
||||
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
|
||||
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
}
|
||||
|
@ -22,20 +22,15 @@ extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf);
|
||||
|
||||
static INLINE void setup_intra_recon_left(unsigned char *y_buffer,
|
||||
unsigned char *u_buffer,
|
||||
unsigned char *v_buffer,
|
||||
int y_stride,
|
||||
int uv_stride)
|
||||
{
|
||||
int i;
|
||||
unsigned char *v_buffer, int y_stride,
|
||||
int uv_stride) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
y_buffer[y_stride *i] = (unsigned char) 129;
|
||||
for (i = 0; i < 16; i++) y_buffer[y_stride * i] = (unsigned char)129;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
u_buffer[uv_stride *i] = (unsigned char) 129;
|
||||
for (i = 0; i < 8; i++) u_buffer[uv_stride * i] = (unsigned char)129;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
v_buffer[uv_stride *i] = (unsigned char) 129;
|
||||
for (i = 0; i < 8; i++) v_buffer[uv_stride * i] = (unsigned char)129;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -8,27 +8,25 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "swapyv12buffer.h"
|
||||
|
||||
void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame)
|
||||
{
|
||||
unsigned char *temp;
|
||||
void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame,
|
||||
YV12_BUFFER_CONFIG *last_frame) {
|
||||
unsigned char *temp;
|
||||
|
||||
temp = last_frame->buffer_alloc;
|
||||
last_frame->buffer_alloc = new_frame->buffer_alloc;
|
||||
new_frame->buffer_alloc = temp;
|
||||
temp = last_frame->buffer_alloc;
|
||||
last_frame->buffer_alloc = new_frame->buffer_alloc;
|
||||
new_frame->buffer_alloc = temp;
|
||||
|
||||
temp = last_frame->y_buffer;
|
||||
last_frame->y_buffer = new_frame->y_buffer;
|
||||
new_frame->y_buffer = temp;
|
||||
temp = last_frame->y_buffer;
|
||||
last_frame->y_buffer = new_frame->y_buffer;
|
||||
new_frame->y_buffer = temp;
|
||||
|
||||
temp = last_frame->u_buffer;
|
||||
last_frame->u_buffer = new_frame->u_buffer;
|
||||
new_frame->u_buffer = temp;
|
||||
|
||||
temp = last_frame->v_buffer;
|
||||
last_frame->v_buffer = new_frame->v_buffer;
|
||||
new_frame->v_buffer = temp;
|
||||
temp = last_frame->u_buffer;
|
||||
last_frame->u_buffer = new_frame->u_buffer;
|
||||
new_frame->u_buffer = temp;
|
||||
|
||||
temp = last_frame->v_buffer;
|
||||
last_frame->v_buffer = new_frame->v_buffer;
|
||||
new_frame->v_buffer = temp;
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_SWAPYV12BUFFER_H_
|
||||
#define VP8_COMMON_SWAPYV12BUFFER_H_
|
||||
|
||||
@ -18,7 +17,8 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame);
|
||||
void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame,
|
||||
YV12_BUFFER_CONFIG *last_frame);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
@ -10,121 +10,109 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
void vp8_blit_text(const char *msg, unsigned char *address, const int pitch) {
|
||||
int letter_bitmap;
|
||||
unsigned char *output_pos = address;
|
||||
int colpos;
|
||||
const int font[] = {
|
||||
0x0, 0x5C00, 0x8020, 0xAFABEA, 0xD7EC0, 0x1111111, 0x1855740,
|
||||
0x18000, 0x45C0, 0x74400, 0x51140, 0x23880, 0xC4000, 0x21080,
|
||||
0x80000, 0x111110, 0xE9D72E, 0x87E40, 0x12AD732, 0xAAD62A, 0x4F94C4,
|
||||
0x4D6B7, 0x456AA, 0x3E8423, 0xAAD6AA, 0xAAD6A2, 0x2800, 0x2A00,
|
||||
0x8A880, 0x52940, 0x22A20, 0x15422, 0x6AD62E, 0x1E4A53E, 0xAAD6BF,
|
||||
0x8C62E, 0xE8C63F, 0x118D6BF, 0x1094BF, 0xCAC62E, 0x1F2109F, 0x118FE31,
|
||||
0xF8C628, 0x8A89F, 0x108421F, 0x1F1105F, 0x1F4105F, 0xE8C62E, 0x2294BF,
|
||||
0x164C62E, 0x12694BF, 0x8AD6A2, 0x10FC21, 0x1F8421F, 0x744107, 0xF8220F,
|
||||
0x1151151, 0x117041, 0x119D731, 0x47E0, 0x1041041, 0xFC400, 0x10440,
|
||||
0x1084210, 0x820
|
||||
};
|
||||
colpos = 0;
|
||||
|
||||
void vp8_blit_text(const char *msg, unsigned char *address, const int pitch)
|
||||
{
|
||||
int letter_bitmap;
|
||||
unsigned char *output_pos = address;
|
||||
int colpos;
|
||||
const int font[] =
|
||||
{
|
||||
0x0, 0x5C00, 0x8020, 0xAFABEA, 0xD7EC0, 0x1111111, 0x1855740, 0x18000,
|
||||
0x45C0, 0x74400, 0x51140, 0x23880, 0xC4000, 0x21080, 0x80000, 0x111110,
|
||||
0xE9D72E, 0x87E40, 0x12AD732, 0xAAD62A, 0x4F94C4, 0x4D6B7, 0x456AA,
|
||||
0x3E8423, 0xAAD6AA, 0xAAD6A2, 0x2800, 0x2A00, 0x8A880, 0x52940, 0x22A20,
|
||||
0x15422, 0x6AD62E, 0x1E4A53E, 0xAAD6BF, 0x8C62E, 0xE8C63F, 0x118D6BF,
|
||||
0x1094BF, 0xCAC62E, 0x1F2109F, 0x118FE31, 0xF8C628, 0x8A89F, 0x108421F,
|
||||
0x1F1105F, 0x1F4105F, 0xE8C62E, 0x2294BF, 0x164C62E, 0x12694BF, 0x8AD6A2,
|
||||
0x10FC21, 0x1F8421F, 0x744107, 0xF8220F, 0x1151151, 0x117041, 0x119D731,
|
||||
0x47E0, 0x1041041, 0xFC400, 0x10440, 0x1084210, 0x820
|
||||
};
|
||||
colpos = 0;
|
||||
while (msg[colpos] != 0) {
|
||||
char letter = msg[colpos];
|
||||
int fontcol, fontrow;
|
||||
|
||||
while (msg[colpos] != 0)
|
||||
{
|
||||
char letter = msg[colpos];
|
||||
int fontcol, fontrow;
|
||||
if (letter <= 'Z' && letter >= ' ')
|
||||
letter_bitmap = font[letter - ' '];
|
||||
else if (letter <= 'z' && letter >= 'a')
|
||||
letter_bitmap = font[letter - 'a' + 'A' - ' '];
|
||||
else
|
||||
letter_bitmap = font[0];
|
||||
|
||||
if (letter <= 'Z' && letter >= ' ')
|
||||
letter_bitmap = font[letter-' '];
|
||||
else if (letter <= 'z' && letter >= 'a')
|
||||
letter_bitmap = font[letter-'a'+'A' - ' '];
|
||||
else
|
||||
letter_bitmap = font[0];
|
||||
for (fontcol = 6; fontcol >= 0; fontcol--)
|
||||
for (fontrow = 0; fontrow < 5; fontrow++)
|
||||
output_pos[fontrow * pitch + fontcol] =
|
||||
((letter_bitmap >> (fontcol * 5)) & (1 << fontrow) ? 255 : 0);
|
||||
|
||||
for (fontcol = 6; fontcol >= 0 ; fontcol--)
|
||||
for (fontrow = 0; fontrow < 5; fontrow++)
|
||||
output_pos[fontrow *pitch + fontcol] =
|
||||
((letter_bitmap >> (fontcol * 5)) & (1 << fontrow) ? 255 : 0);
|
||||
|
||||
output_pos += 7;
|
||||
colpos++;
|
||||
}
|
||||
output_pos += 7;
|
||||
colpos++;
|
||||
}
|
||||
}
|
||||
|
||||
static void plot (const int x, const int y, unsigned char *image, const int pitch)
|
||||
{
|
||||
image [x+y*pitch] ^= 255;
|
||||
static void plot(const int x, const int y, unsigned char *image,
|
||||
const int pitch) {
|
||||
image[x + y * pitch] ^= 255;
|
||||
}
|
||||
|
||||
/* Bresenham line algorithm */
|
||||
void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch)
|
||||
{
|
||||
int steep = abs(y1 - y0) > abs(x1 - x0);
|
||||
int deltax, deltay;
|
||||
int error, ystep, y, x;
|
||||
void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image,
|
||||
const int pitch) {
|
||||
int steep = abs(y1 - y0) > abs(x1 - x0);
|
||||
int deltax, deltay;
|
||||
int error, ystep, y, x;
|
||||
|
||||
if (steep)
|
||||
{
|
||||
int t;
|
||||
t = x0;
|
||||
x0 = y0;
|
||||
y0 = t;
|
||||
if (steep) {
|
||||
int t;
|
||||
t = x0;
|
||||
x0 = y0;
|
||||
y0 = t;
|
||||
|
||||
t = x1;
|
||||
x1 = y1;
|
||||
y1 = t;
|
||||
t = x1;
|
||||
x1 = y1;
|
||||
y1 = t;
|
||||
}
|
||||
|
||||
if (x0 > x1) {
|
||||
int t;
|
||||
t = x0;
|
||||
x0 = x1;
|
||||
x1 = t;
|
||||
|
||||
t = y0;
|
||||
y0 = y1;
|
||||
y1 = t;
|
||||
}
|
||||
|
||||
deltax = x1 - x0;
|
||||
deltay = abs(y1 - y0);
|
||||
error = deltax / 2;
|
||||
|
||||
y = y0;
|
||||
|
||||
if (y0 < y1)
|
||||
ystep = 1;
|
||||
else
|
||||
ystep = -1;
|
||||
|
||||
if (steep) {
|
||||
for (x = x0; x <= x1; x++) {
|
||||
plot(y, x, image, pitch);
|
||||
|
||||
error = error - deltay;
|
||||
if (error < 0) {
|
||||
y = y + ystep;
|
||||
error = error + deltax;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (x = x0; x <= x1; x++) {
|
||||
plot(x, y, image, pitch);
|
||||
|
||||
if (x0 > x1)
|
||||
{
|
||||
int t;
|
||||
t = x0;
|
||||
x0 = x1;
|
||||
x1 = t;
|
||||
|
||||
t = y0;
|
||||
y0 = y1;
|
||||
y1 = t;
|
||||
}
|
||||
|
||||
deltax = x1 - x0;
|
||||
deltay = abs(y1 - y0);
|
||||
error = deltax / 2;
|
||||
|
||||
y = y0;
|
||||
|
||||
if (y0 < y1)
|
||||
ystep = 1;
|
||||
else
|
||||
ystep = -1;
|
||||
|
||||
if (steep)
|
||||
{
|
||||
for (x = x0; x <= x1; x++)
|
||||
{
|
||||
plot(y,x, image, pitch);
|
||||
|
||||
error = error - deltay;
|
||||
if (error < 0)
|
||||
{
|
||||
y = y + ystep;
|
||||
error = error + deltax;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (x = x0; x <= x1; x++)
|
||||
{
|
||||
plot(x,y, image, pitch);
|
||||
|
||||
error = error - deltay;
|
||||
if (error < 0)
|
||||
{
|
||||
y = y + ystep;
|
||||
error = error + deltax;
|
||||
}
|
||||
}
|
||||
error = error - deltay;
|
||||
if (error < 0) {
|
||||
y = y + ystep;
|
||||
error = error + deltax;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_THREADING_H_
|
||||
#define VP8_COMMON_THREADING_H_
|
||||
|
||||
@ -30,10 +29,12 @@ extern "C" {
|
||||
#define THREAD_SPECIFIC_INDEX DWORD
|
||||
#define pthread_t HANDLE
|
||||
#define pthread_attr_t DWORD
|
||||
#define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread)
|
||||
#define pthread_detach(thread) \
|
||||
if (thread != NULL) CloseHandle(thread)
|
||||
#define thread_sleep(nms) Sleep(nms)
|
||||
#define pthread_cancel(thread) terminate_thread(thread,0)
|
||||
#define ts_key_create(ts_key, destructor) {ts_key = TlsAlloc();};
|
||||
#define pthread_cancel(thread) terminate_thread(thread, 0)
|
||||
#define ts_key_create(ts_key, destructor) \
|
||||
{ ts_key = TlsAlloc(); };
|
||||
#define pthread_getspecific(ts_key) TlsGetValue(ts_key)
|
||||
#define pthread_setspecific(ts_key, value) TlsSetValue(ts_key, (void *)value)
|
||||
#define pthread_self() GetCurrentThreadId()
|
||||
@ -53,9 +54,9 @@ extern "C" {
|
||||
#define thread_sleep(nms) DosSleep(nms)
|
||||
#define pthread_cancel(thread) DosKillThread(thread)
|
||||
#define ts_key_create(ts_key, destructor) \
|
||||
DosAllocThreadLocalMemory(1, &(ts_key));
|
||||
DosAllocThreadLocalMemory(1, &(ts_key));
|
||||
#define pthread_getspecific(ts_key) ((void *)(*(ts_key)))
|
||||
#define pthread_setspecific(ts_key, value) (*(ts_key)=(ULONG)(value))
|
||||
#define pthread_setspecific(ts_key, value) (*(ts_key) = (ULONG)(value))
|
||||
#define pthread_self() _gettid()
|
||||
#else
|
||||
#ifdef __APPLE__
|
||||
@ -75,85 +76,82 @@ extern "C" {
|
||||
#define THREAD_FUNCTION void *
|
||||
#define THREAD_FUNCTION_RETURN void *
|
||||
#define THREAD_SPECIFIC_INDEX pthread_key_t
|
||||
#define ts_key_create(ts_key, destructor) pthread_key_create (&(ts_key), destructor);
|
||||
#define ts_key_create(ts_key, destructor) \
|
||||
pthread_key_create(&(ts_key), destructor);
|
||||
#endif
|
||||
|
||||
/* Synchronization macros: Win32 and Pthreads */
|
||||
#if defined(_WIN32) && !HAVE_PTHREAD_H
|
||||
#define sem_t HANDLE
|
||||
#define pause(voidpara) __asm PAUSE
|
||||
#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL)
|
||||
#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE))
|
||||
#define sem_post(sem) ReleaseSemaphore(*sem,1,NULL)
|
||||
#define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE)
|
||||
#define sem_init(sem, sem_attr1, sem_init_value) \
|
||||
(int)((*sem = CreateSemaphore(NULL, 0, 32768, NULL)) == NULL)
|
||||
#define sem_wait(sem) \
|
||||
(int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem, INFINITE))
|
||||
#define sem_post(sem) ReleaseSemaphore(*sem, 1, NULL)
|
||||
#define sem_destroy(sem) \
|
||||
if (*sem) ((int)(CloseHandle(*sem)) == TRUE)
|
||||
#define thread_sleep(nms) Sleep(nms)
|
||||
|
||||
#elif defined(__OS2__)
|
||||
typedef struct
|
||||
{
|
||||
HEV event;
|
||||
HMTX wait_mutex;
|
||||
HMTX count_mutex;
|
||||
int count;
|
||||
typedef struct {
|
||||
HEV event;
|
||||
HMTX wait_mutex;
|
||||
HMTX count_mutex;
|
||||
int count;
|
||||
} sem_t;
|
||||
|
||||
static inline int sem_init(sem_t *sem, int pshared, unsigned int value)
|
||||
{
|
||||
DosCreateEventSem(NULL, &sem->event, pshared ? DC_SEM_SHARED : 0,
|
||||
value > 0 ? TRUE : FALSE);
|
||||
DosCreateMutexSem(NULL, &sem->wait_mutex, 0, FALSE);
|
||||
DosCreateMutexSem(NULL, &sem->count_mutex, 0, FALSE);
|
||||
static inline int sem_init(sem_t *sem, int pshared, unsigned int value) {
|
||||
DosCreateEventSem(NULL, &sem->event, pshared ? DC_SEM_SHARED : 0,
|
||||
value > 0 ? TRUE : FALSE);
|
||||
DosCreateMutexSem(NULL, &sem->wait_mutex, 0, FALSE);
|
||||
DosCreateMutexSem(NULL, &sem->count_mutex, 0, FALSE);
|
||||
|
||||
sem->count = value;
|
||||
sem->count = value;
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int sem_wait(sem_t * sem)
|
||||
{
|
||||
DosRequestMutexSem(sem->wait_mutex, -1);
|
||||
static inline int sem_wait(sem_t *sem) {
|
||||
DosRequestMutexSem(sem->wait_mutex, -1);
|
||||
|
||||
DosWaitEventSem(sem->event, -1);
|
||||
DosWaitEventSem(sem->event, -1);
|
||||
|
||||
DosRequestMutexSem(sem->count_mutex, -1);
|
||||
DosRequestMutexSem(sem->count_mutex, -1);
|
||||
|
||||
sem->count--;
|
||||
if (sem->count == 0)
|
||||
{
|
||||
ULONG post_count;
|
||||
sem->count--;
|
||||
if (sem->count == 0) {
|
||||
ULONG post_count;
|
||||
|
||||
DosResetEventSem(sem->event, &post_count);
|
||||
}
|
||||
DosResetEventSem(sem->event, &post_count);
|
||||
}
|
||||
|
||||
DosReleaseMutexSem(sem->count_mutex);
|
||||
DosReleaseMutexSem(sem->count_mutex);
|
||||
|
||||
DosReleaseMutexSem(sem->wait_mutex);
|
||||
DosReleaseMutexSem(sem->wait_mutex);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int sem_post(sem_t * sem)
|
||||
{
|
||||
DosRequestMutexSem(sem->count_mutex, -1);
|
||||
static inline int sem_post(sem_t *sem) {
|
||||
DosRequestMutexSem(sem->count_mutex, -1);
|
||||
|
||||
if (sem->count < 32768)
|
||||
{
|
||||
sem->count++;
|
||||
DosPostEventSem(sem->event);
|
||||
}
|
||||
if (sem->count < 32768) {
|
||||
sem->count++;
|
||||
DosPostEventSem(sem->event);
|
||||
}
|
||||
|
||||
DosReleaseMutexSem(sem->count_mutex);
|
||||
DosReleaseMutexSem(sem->count_mutex);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int sem_destroy(sem_t * sem)
|
||||
{
|
||||
DosCloseEventSem(sem->event);
|
||||
DosCloseMutexSem(sem->wait_mutex);
|
||||
DosCloseMutexSem(sem->count_mutex);
|
||||
static inline int sem_destroy(sem_t *sem) {
|
||||
DosCloseEventSem(sem->event);
|
||||
DosCloseMutexSem(sem->wait_mutex);
|
||||
DosCloseMutexSem(sem->count_mutex);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define thread_sleep(nms) DosSleep(nms)
|
||||
@ -162,15 +160,20 @@ static inline int sem_destroy(sem_t * sem)
|
||||
|
||||
#ifdef __APPLE__
|
||||
#define sem_t semaphore_t
|
||||
#define sem_init(X,Y,Z) semaphore_create(mach_task_self(), X, SYNC_POLICY_FIFO, Z)
|
||||
#define sem_wait(sem) (semaphore_wait(*sem) )
|
||||
#define sem_init(X, Y, Z) \
|
||||
semaphore_create(mach_task_self(), X, SYNC_POLICY_FIFO, Z)
|
||||
#define sem_wait(sem) (semaphore_wait(*sem))
|
||||
#define sem_post(sem) semaphore_signal(*sem)
|
||||
#define sem_destroy(sem) semaphore_destroy(mach_task_self(),*sem)
|
||||
#define thread_sleep(nms) /* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
|
||||
#define sem_destroy(sem) semaphore_destroy(mach_task_self(), *sem)
|
||||
#define thread_sleep(nms)
|
||||
/* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec =
|
||||
1000*nms;nanosleep(&ts, NULL);} */
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#include <sched.h>
|
||||
#define thread_sleep(nms) sched_yield();/* {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
|
||||
#define thread_sleep(nms) sched_yield();
|
||||
/* {struct timespec ts;ts.tv_sec=0;
|
||||
ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
|
||||
#endif
|
||||
/* Not Windows. Assume pthreads */
|
||||
|
||||
@ -185,42 +188,41 @@ static inline int sem_destroy(sem_t * sem)
|
||||
#include "vpx_util/vpx_thread.h"
|
||||
|
||||
static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
|
||||
const int kMaxTryLocks = 4000;
|
||||
int locked = 0;
|
||||
int i;
|
||||
const int kMaxTryLocks = 4000;
|
||||
int locked = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < kMaxTryLocks; ++i) {
|
||||
if (!pthread_mutex_trylock(mutex)) {
|
||||
locked = 1;
|
||||
break;
|
||||
}
|
||||
for (i = 0; i < kMaxTryLocks; ++i) {
|
||||
if (!pthread_mutex_trylock(mutex)) {
|
||||
locked = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!locked)
|
||||
pthread_mutex_lock(mutex);
|
||||
if (!locked) pthread_mutex_lock(mutex);
|
||||
}
|
||||
|
||||
static INLINE int protected_read(pthread_mutex_t *const mutex, const int *p) {
|
||||
int ret;
|
||||
mutex_lock(mutex);
|
||||
ret = *p;
|
||||
pthread_mutex_unlock(mutex);
|
||||
return ret;
|
||||
int ret;
|
||||
mutex_lock(mutex);
|
||||
ret = *p;
|
||||
pthread_mutex_unlock(mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static INLINE void sync_read(pthread_mutex_t *const mutex, int mb_col,
|
||||
const int *last_row_current_mb_col,
|
||||
const int nsync) {
|
||||
while (mb_col > (protected_read(mutex, last_row_current_mb_col) - nsync)) {
|
||||
x86_pause_hint();
|
||||
thread_sleep(0);
|
||||
}
|
||||
while (mb_col > (protected_read(mutex, last_row_current_mb_col) - nsync)) {
|
||||
x86_pause_hint();
|
||||
thread_sleep(0);
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void protected_write(pthread_mutex_t *mutex, int *p, int v) {
|
||||
mutex_lock(mutex);
|
||||
*p = v;
|
||||
pthread_mutex_unlock(mutex);
|
||||
mutex_lock(mutex);
|
||||
*p = v;
|
||||
pthread_mutex_unlock(mutex);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#if CONFIG_DEBUG
|
||||
#include <assert.h>
|
||||
#endif
|
||||
@ -16,128 +15,95 @@
|
||||
|
||||
#include "treecoder.h"
|
||||
|
||||
static void tree2tok(
|
||||
struct vp8_token_struct *const p,
|
||||
vp8_tree t,
|
||||
int i,
|
||||
int v,
|
||||
int L
|
||||
)
|
||||
{
|
||||
v += v;
|
||||
++L;
|
||||
static void tree2tok(struct vp8_token_struct *const p, vp8_tree t, int i, int v,
|
||||
int L) {
|
||||
v += v;
|
||||
++L;
|
||||
|
||||
do
|
||||
{
|
||||
const vp8_tree_index j = t[i++];
|
||||
do {
|
||||
const vp8_tree_index j = t[i++];
|
||||
|
||||
if (j <= 0)
|
||||
{
|
||||
p[-j].value = v;
|
||||
p[-j].Len = L;
|
||||
}
|
||||
else
|
||||
tree2tok(p, t, j, v, L);
|
||||
}
|
||||
while (++v & 1);
|
||||
if (j <= 0) {
|
||||
p[-j].value = v;
|
||||
p[-j].Len = L;
|
||||
} else
|
||||
tree2tok(p, t, j, v, L);
|
||||
} while (++v & 1);
|
||||
}
|
||||
|
||||
void vp8_tokens_from_tree(struct vp8_token_struct *p, vp8_tree t)
|
||||
{
|
||||
tree2tok(p, t, 0, 0, 0);
|
||||
void vp8_tokens_from_tree(struct vp8_token_struct *p, vp8_tree t) {
|
||||
tree2tok(p, t, 0, 0, 0);
|
||||
}
|
||||
|
||||
void vp8_tokens_from_tree_offset(struct vp8_token_struct *p, vp8_tree t,
|
||||
int offset)
|
||||
{
|
||||
tree2tok(p - offset, t, 0, 0, 0);
|
||||
int offset) {
|
||||
tree2tok(p - offset, t, 0, 0, 0);
|
||||
}
|
||||
|
||||
static void branch_counts(
|
||||
int n, /* n = size of alphabet */
|
||||
vp8_token tok [ /* n */ ],
|
||||
vp8_tree tree,
|
||||
unsigned int branch_ct [ /* n-1 */ ] [2],
|
||||
const unsigned int num_events[ /* n */ ]
|
||||
)
|
||||
{
|
||||
const int tree_len = n - 1;
|
||||
int t = 0;
|
||||
static void branch_counts(int n, /* n = size of alphabet */
|
||||
vp8_token tok[/* n */], vp8_tree tree,
|
||||
unsigned int branch_ct[/* n-1 */][2],
|
||||
const unsigned int num_events[/* n */]) {
|
||||
const int tree_len = n - 1;
|
||||
int t = 0;
|
||||
|
||||
#if CONFIG_DEBUG
|
||||
assert(tree_len);
|
||||
assert(tree_len);
|
||||
#endif
|
||||
|
||||
do
|
||||
{
|
||||
branch_ct[t][0] = branch_ct[t][1] = 0;
|
||||
}
|
||||
while (++t < tree_len);
|
||||
do {
|
||||
branch_ct[t][0] = branch_ct[t][1] = 0;
|
||||
} while (++t < tree_len);
|
||||
|
||||
t = 0;
|
||||
t = 0;
|
||||
|
||||
do
|
||||
{
|
||||
int L = tok[t].Len;
|
||||
const int enc = tok[t].value;
|
||||
const unsigned int ct = num_events[t];
|
||||
do {
|
||||
int L = tok[t].Len;
|
||||
const int enc = tok[t].value;
|
||||
const unsigned int ct = num_events[t];
|
||||
|
||||
vp8_tree_index i = 0;
|
||||
vp8_tree_index i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
const int b = (enc >> --L) & 1;
|
||||
const int j = i >> 1;
|
||||
do {
|
||||
const int b = (enc >> --L) & 1;
|
||||
const int j = i >> 1;
|
||||
#if CONFIG_DEBUG
|
||||
assert(j < tree_len && 0 <= L);
|
||||
assert(j < tree_len && 0 <= L);
|
||||
#endif
|
||||
|
||||
branch_ct [j] [b] += ct;
|
||||
i = tree[ i + b];
|
||||
}
|
||||
while (i > 0);
|
||||
branch_ct[j][b] += ct;
|
||||
i = tree[i + b];
|
||||
} while (i > 0);
|
||||
|
||||
#if CONFIG_DEBUG
|
||||
assert(!L);
|
||||
assert(!L);
|
||||
#endif
|
||||
}
|
||||
while (++t < n);
|
||||
|
||||
} while (++t < n);
|
||||
}
|
||||
|
||||
void vp8_tree_probs_from_distribution(int n, /* n = size of alphabet */
|
||||
vp8_token tok[/* n */], vp8_tree tree,
|
||||
vp8_prob probs[/* n-1 */],
|
||||
unsigned int branch_ct[/* n-1 */][2],
|
||||
const unsigned int num_events[/* n */],
|
||||
unsigned int Pfac, int rd) {
|
||||
const int tree_len = n - 1;
|
||||
int t = 0;
|
||||
|
||||
void vp8_tree_probs_from_distribution(
|
||||
int n, /* n = size of alphabet */
|
||||
vp8_token tok [ /* n */ ],
|
||||
vp8_tree tree,
|
||||
vp8_prob probs [ /* n-1 */ ],
|
||||
unsigned int branch_ct [ /* n-1 */ ] [2],
|
||||
const unsigned int num_events[ /* n */ ],
|
||||
unsigned int Pfac,
|
||||
int rd
|
||||
)
|
||||
{
|
||||
const int tree_len = n - 1;
|
||||
int t = 0;
|
||||
branch_counts(n, tok, tree, branch_ct, num_events);
|
||||
|
||||
branch_counts(n, tok, tree, branch_ct, num_events);
|
||||
|
||||
do
|
||||
{
|
||||
const unsigned int *const c = branch_ct[t];
|
||||
const unsigned int tot = c[0] + c[1];
|
||||
do {
|
||||
const unsigned int *const c = branch_ct[t];
|
||||
const unsigned int tot = c[0] + c[1];
|
||||
|
||||
#if CONFIG_DEBUG
|
||||
assert(tot < (1 << 24)); /* no overflow below */
|
||||
assert(tot < (1 << 24)); /* no overflow below */
|
||||
#endif
|
||||
|
||||
if (tot)
|
||||
{
|
||||
const unsigned int p = ((c[0] * Pfac) + (rd ? tot >> 1 : 0)) / tot;
|
||||
probs[t] = p < 256 ? (p ? p : 1) : 255; /* agree w/old version for now */
|
||||
}
|
||||
else
|
||||
probs[t] = vp8_prob_half;
|
||||
}
|
||||
while (++t < tree_len);
|
||||
if (tot) {
|
||||
const unsigned int p = ((c[0] * Pfac) + (rd ? tot >> 1 : 0)) / tot;
|
||||
probs[t] = p < 256 ? (p ? p : 1) : 255; /* agree w/old version for now */
|
||||
} else
|
||||
probs[t] = vp8_prob_half;
|
||||
} while (++t < tree_len);
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_TREECODER_H_
|
||||
#define VP8_COMMON_TREECODER_H_
|
||||
|
||||
@ -18,10 +17,9 @@ extern "C" {
|
||||
|
||||
typedef unsigned char vp8bc_index_t; /* probability index */
|
||||
|
||||
|
||||
typedef unsigned char vp8_prob;
|
||||
|
||||
#define vp8_prob_half ( (vp8_prob) 128)
|
||||
#define vp8_prob_half ((vp8_prob)128)
|
||||
|
||||
typedef signed char vp8_tree_index;
|
||||
struct bool_coder_spec;
|
||||
@ -34,10 +32,7 @@ typedef const bool_coder_spec c_bool_coder_spec;
|
||||
typedef const bool_writer c_bool_writer;
|
||||
typedef const bool_reader c_bool_reader;
|
||||
|
||||
|
||||
|
||||
# define vp8_complement( x) (255 - x)
|
||||
|
||||
#define vp8_complement(x) (255 - x)
|
||||
|
||||
/* We build coding trees compactly in arrays.
|
||||
Each node of the tree is a pair of vp8_tree_indices.
|
||||
@ -48,11 +43,9 @@ typedef const bool_reader c_bool_reader;
|
||||
|
||||
typedef const vp8_tree_index vp8_tree[], *vp8_tree_p;
|
||||
|
||||
|
||||
typedef const struct vp8_token_struct
|
||||
{
|
||||
int value;
|
||||
int Len;
|
||||
typedef const struct vp8_token_struct {
|
||||
int value;
|
||||
int Len;
|
||||
} vp8_token;
|
||||
|
||||
/* Construct encoding array from tree. */
|
||||
@ -61,35 +54,26 @@ void vp8_tokens_from_tree(struct vp8_token_struct *, vp8_tree);
|
||||
void vp8_tokens_from_tree_offset(struct vp8_token_struct *, vp8_tree,
|
||||
int offset);
|
||||
|
||||
|
||||
/* Convert array of token occurrence counts into a table of probabilities
|
||||
for the associated binary encoding tree. Also writes count of branches
|
||||
taken for each node on the tree; this facilitiates decisions as to
|
||||
probability updates. */
|
||||
|
||||
void vp8_tree_probs_from_distribution(
|
||||
int n, /* n = size of alphabet */
|
||||
vp8_token tok [ /* n */ ],
|
||||
vp8_tree tree,
|
||||
vp8_prob probs [ /* n-1 */ ],
|
||||
unsigned int branch_ct [ /* n-1 */ ] [2],
|
||||
const unsigned int num_events[ /* n */ ],
|
||||
unsigned int Pfactor,
|
||||
int Round
|
||||
);
|
||||
void vp8_tree_probs_from_distribution(int n, /* n = size of alphabet */
|
||||
vp8_token tok[/* n */], vp8_tree tree,
|
||||
vp8_prob probs[/* n-1 */],
|
||||
unsigned int branch_ct[/* n-1 */][2],
|
||||
const unsigned int num_events[/* n */],
|
||||
unsigned int Pfactor, int Round);
|
||||
|
||||
/* Variant of above using coder spec rather than hardwired 8-bit probs. */
|
||||
|
||||
void vp8bc_tree_probs_from_distribution(
|
||||
int n, /* n = size of alphabet */
|
||||
vp8_token tok [ /* n */ ],
|
||||
vp8_tree tree,
|
||||
vp8_prob probs [ /* n-1 */ ],
|
||||
unsigned int branch_ct [ /* n-1 */ ] [2],
|
||||
const unsigned int num_events[ /* n */ ],
|
||||
c_bool_coder_spec *s
|
||||
);
|
||||
|
||||
void vp8bc_tree_probs_from_distribution(int n, /* n = size of alphabet */
|
||||
vp8_token tok[/* n */], vp8_tree tree,
|
||||
vp8_prob probs[/* n-1 */],
|
||||
unsigned int branch_ct[/* n-1 */][2],
|
||||
const unsigned int num_events[/* n */],
|
||||
c_bool_coder_spec *s);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
@ -17,235 +17,153 @@ extern "C" {
|
||||
|
||||
/*Generated file, included by entropymode.c*/
|
||||
|
||||
|
||||
const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES] =
|
||||
{
|
||||
{ 0, 1 },
|
||||
{ 2, 2 },
|
||||
{ 6, 3 },
|
||||
{ 28, 5 },
|
||||
{ 30, 5 },
|
||||
{ 58, 6 },
|
||||
{ 59, 6 },
|
||||
{ 62, 6 },
|
||||
{ 126, 7 },
|
||||
{ 127, 7 }
|
||||
const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES] = {
|
||||
{ 0, 1 }, { 2, 2 }, { 6, 3 }, { 28, 5 }, { 30, 5 },
|
||||
{ 58, 6 }, { 59, 6 }, { 62, 6 }, { 126, 7 }, { 127, 7 }
|
||||
};
|
||||
|
||||
const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES] =
|
||||
{
|
||||
{ 0, 1 },
|
||||
{ 4, 3 },
|
||||
{ 5, 3 },
|
||||
{ 6, 3 },
|
||||
{ 7, 3 }
|
||||
const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES] = {
|
||||
{ 0, 1 }, { 4, 3 }, { 5, 3 }, { 6, 3 }, { 7, 3 }
|
||||
};
|
||||
|
||||
const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES] =
|
||||
{
|
||||
{ 4, 3 },
|
||||
{ 5, 3 },
|
||||
{ 6, 3 },
|
||||
{ 7, 3 },
|
||||
{ 0, 1 }
|
||||
const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES] = {
|
||||
{ 4, 3 }, { 5, 3 }, { 6, 3 }, { 7, 3 }, { 0, 1 }
|
||||
};
|
||||
|
||||
const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES] =
|
||||
{
|
||||
{ 0, 1 },
|
||||
{ 2, 2 },
|
||||
{ 6, 3 },
|
||||
{ 7, 3 }
|
||||
const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES] = {
|
||||
{ 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
|
||||
};
|
||||
|
||||
const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS] =
|
||||
{
|
||||
{ 6, 3 },
|
||||
{ 7, 3 },
|
||||
{ 2, 2 },
|
||||
{ 0, 1 }
|
||||
const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS] = {
|
||||
{ 6, 3 }, { 7, 3 }, { 2, 2 }, { 0, 1 }
|
||||
};
|
||||
|
||||
const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS] =
|
||||
{
|
||||
{ 2, 2 },
|
||||
{ 6, 3 },
|
||||
{ 0, 1 },
|
||||
{ 14, 4 },
|
||||
{ 15, 4 }
|
||||
const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS] = {
|
||||
{ 2, 2 }, { 6, 3 }, { 0, 1 }, { 14, 4 }, { 15, 4 }
|
||||
};
|
||||
|
||||
const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS] =
|
||||
{
|
||||
{ 0, 1 },
|
||||
{ 2, 2 },
|
||||
{ 6, 3 },
|
||||
{ 7, 3 }
|
||||
const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS] = {
|
||||
{ 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
|
||||
};
|
||||
|
||||
const struct vp8_token_struct vp8_small_mvencodings[8] =
|
||||
{
|
||||
{ 0, 3 },
|
||||
{ 1, 3 },
|
||||
{ 2, 3 },
|
||||
{ 3, 3 },
|
||||
{ 4, 3 },
|
||||
{ 5, 3 },
|
||||
{ 6, 3 },
|
||||
{ 7, 3 }
|
||||
const struct vp8_token_struct vp8_small_mvencodings[8] = {
|
||||
{ 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 }, { 4, 3 }, { 5, 3 }, { 6, 3 }, { 7, 3 }
|
||||
};
|
||||
|
||||
const vp8_prob vp8_ymode_prob[VP8_YMODES-1] =
|
||||
{
|
||||
112, 86, 140, 37
|
||||
};
|
||||
const vp8_prob vp8_ymode_prob[VP8_YMODES - 1] = { 112, 86, 140, 37 };
|
||||
|
||||
const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1] =
|
||||
{
|
||||
145, 156, 163, 128
|
||||
};
|
||||
const vp8_prob vp8_kf_ymode_prob[VP8_YMODES - 1] = { 145, 156, 163, 128 };
|
||||
|
||||
const vp8_prob vp8_uv_mode_prob[VP8_UV_MODES-1] =
|
||||
{
|
||||
162, 101, 204
|
||||
};
|
||||
const vp8_prob vp8_uv_mode_prob[VP8_UV_MODES - 1] = { 162, 101, 204 };
|
||||
|
||||
const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1] =
|
||||
{
|
||||
142, 114, 183
|
||||
};
|
||||
const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES - 1] = { 142, 114, 183 };
|
||||
|
||||
const vp8_prob vp8_bmode_prob[VP8_BINTRAMODES-1] =
|
||||
{
|
||||
120, 90, 79, 133, 87, 85, 80, 111, 151
|
||||
};
|
||||
const vp8_prob vp8_bmode_prob[VP8_BINTRAMODES - 1] = { 120, 90, 79, 133, 87,
|
||||
85, 80, 111, 151 };
|
||||
|
||||
|
||||
|
||||
const vp8_prob vp8_kf_bmode_prob
|
||||
[VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1] =
|
||||
{
|
||||
{
|
||||
{ 231, 120, 48, 89, 115, 113, 120, 152, 112 },
|
||||
{ 152, 179, 64, 126, 170, 118, 46, 70, 95 },
|
||||
{ 175, 69, 143, 80, 85, 82, 72, 155, 103 },
|
||||
{ 56, 58, 10, 171, 218, 189, 17, 13, 152 },
|
||||
{ 144, 71, 10, 38, 171, 213, 144, 34, 26 },
|
||||
{ 114, 26, 17, 163, 44, 195, 21, 10, 173 },
|
||||
{ 121, 24, 80, 195, 26, 62, 44, 64, 85 },
|
||||
{ 170, 46, 55, 19, 136, 160, 33, 206, 71 },
|
||||
{ 63, 20, 8, 114, 114, 208, 12, 9, 226 },
|
||||
{ 81, 40, 11, 96, 182, 84, 29, 16, 36 }
|
||||
},
|
||||
{
|
||||
{ 134, 183, 89, 137, 98, 101, 106, 165, 148 },
|
||||
{ 72, 187, 100, 130, 157, 111, 32, 75, 80 },
|
||||
{ 66, 102, 167, 99, 74, 62, 40, 234, 128 },
|
||||
{ 41, 53, 9, 178, 241, 141, 26, 8, 107 },
|
||||
{ 104, 79, 12, 27, 217, 255, 87, 17, 7 },
|
||||
{ 74, 43, 26, 146, 73, 166, 49, 23, 157 },
|
||||
{ 65, 38, 105, 160, 51, 52, 31, 115, 128 },
|
||||
{ 87, 68, 71, 44, 114, 51, 15, 186, 23 },
|
||||
{ 47, 41, 14, 110, 182, 183, 21, 17, 194 },
|
||||
{ 66, 45, 25, 102, 197, 189, 23, 18, 22 }
|
||||
},
|
||||
{
|
||||
{ 88, 88, 147, 150, 42, 46, 45, 196, 205 },
|
||||
{ 43, 97, 183, 117, 85, 38, 35, 179, 61 },
|
||||
{ 39, 53, 200, 87, 26, 21, 43, 232, 171 },
|
||||
{ 56, 34, 51, 104, 114, 102, 29, 93, 77 },
|
||||
{ 107, 54, 32, 26, 51, 1, 81, 43, 31 },
|
||||
{ 39, 28, 85, 171, 58, 165, 90, 98, 64 },
|
||||
{ 34, 22, 116, 206, 23, 34, 43, 166, 73 },
|
||||
{ 68, 25, 106, 22, 64, 171, 36, 225, 114 },
|
||||
{ 34, 19, 21, 102, 132, 188, 16, 76, 124 },
|
||||
{ 62, 18, 78, 95, 85, 57, 50, 48, 51 }
|
||||
},
|
||||
{
|
||||
{ 193, 101, 35, 159, 215, 111, 89, 46, 111 },
|
||||
{ 60, 148, 31, 172, 219, 228, 21, 18, 111 },
|
||||
{ 112, 113, 77, 85, 179, 255, 38, 120, 114 },
|
||||
{ 40, 42, 1, 196, 245, 209, 10, 25, 109 },
|
||||
{ 100, 80, 8, 43, 154, 1, 51, 26, 71 },
|
||||
{ 88, 43, 29, 140, 166, 213, 37, 43, 154 },
|
||||
{ 61, 63, 30, 155, 67, 45, 68, 1, 209 },
|
||||
{ 142, 78, 78, 16, 255, 128, 34, 197, 171 },
|
||||
{ 41, 40, 5, 102, 211, 183, 4, 1, 221 },
|
||||
{ 51, 50, 17, 168, 209, 192, 23, 25, 82 }
|
||||
},
|
||||
{
|
||||
{ 125, 98, 42, 88, 104, 85, 117, 175, 82 },
|
||||
{ 95, 84, 53, 89, 128, 100, 113, 101, 45 },
|
||||
{ 75, 79, 123, 47, 51, 128, 81, 171, 1 },
|
||||
{ 57, 17, 5, 71, 102, 57, 53, 41, 49 },
|
||||
{ 115, 21, 2, 10, 102, 255, 166, 23, 6 },
|
||||
{ 38, 33, 13, 121, 57, 73, 26, 1, 85 },
|
||||
{ 41, 10, 67, 138, 77, 110, 90, 47, 114 },
|
||||
{ 101, 29, 16, 10, 85, 128, 101, 196, 26 },
|
||||
{ 57, 18, 10, 102, 102, 213, 34, 20, 43 },
|
||||
{ 117, 20, 15, 36, 163, 128, 68, 1, 26 }
|
||||
},
|
||||
{
|
||||
{ 138, 31, 36, 171, 27, 166, 38, 44, 229 },
|
||||
{ 67, 87, 58, 169, 82, 115, 26, 59, 179 },
|
||||
{ 63, 59, 90, 180, 59, 166, 93, 73, 154 },
|
||||
{ 40, 40, 21, 116, 143, 209, 34, 39, 175 },
|
||||
{ 57, 46, 22, 24, 128, 1, 54, 17, 37 },
|
||||
{ 47, 15, 16, 183, 34, 223, 49, 45, 183 },
|
||||
{ 46, 17, 33, 183, 6, 98, 15, 32, 183 },
|
||||
{ 65, 32, 73, 115, 28, 128, 23, 128, 205 },
|
||||
{ 40, 3, 9, 115, 51, 192, 18, 6, 223 },
|
||||
{ 87, 37, 9, 115, 59, 77, 64, 21, 47 }
|
||||
},
|
||||
{
|
||||
{ 104, 55, 44, 218, 9, 54, 53, 130, 226 },
|
||||
{ 64, 90, 70, 205, 40, 41, 23, 26, 57 },
|
||||
{ 54, 57, 112, 184, 5, 41, 38, 166, 213 },
|
||||
{ 30, 34, 26, 133, 152, 116, 10, 32, 134 },
|
||||
{ 75, 32, 12, 51, 192, 255, 160, 43, 51 },
|
||||
{ 39, 19, 53, 221, 26, 114, 32, 73, 255 },
|
||||
{ 31, 9, 65, 234, 2, 15, 1, 118, 73 },
|
||||
{ 88, 31, 35, 67, 102, 85, 55, 186, 85 },
|
||||
{ 56, 21, 23, 111, 59, 205, 45, 37, 192 },
|
||||
{ 55, 38, 70, 124, 73, 102, 1, 34, 98 }
|
||||
},
|
||||
{
|
||||
{ 102, 61, 71, 37, 34, 53, 31, 243, 192 },
|
||||
{ 69, 60, 71, 38, 73, 119, 28, 222, 37 },
|
||||
{ 68, 45, 128, 34, 1, 47, 11, 245, 171 },
|
||||
{ 62, 17, 19, 70, 146, 85, 55, 62, 70 },
|
||||
{ 75, 15, 9, 9, 64, 255, 184, 119, 16 },
|
||||
{ 37, 43, 37, 154, 100, 163, 85, 160, 1 },
|
||||
{ 63, 9, 92, 136, 28, 64, 32, 201, 85 },
|
||||
{ 86, 6, 28, 5, 64, 255, 25, 248, 1 },
|
||||
{ 56, 8, 17, 132, 137, 255, 55, 116, 128 },
|
||||
{ 58, 15, 20, 82, 135, 57, 26, 121, 40 }
|
||||
},
|
||||
{
|
||||
{ 164, 50, 31, 137, 154, 133, 25, 35, 218 },
|
||||
{ 51, 103, 44, 131, 131, 123, 31, 6, 158 },
|
||||
{ 86, 40, 64, 135, 148, 224, 45, 183, 128 },
|
||||
{ 22, 26, 17, 131, 240, 154, 14, 1, 209 },
|
||||
{ 83, 12, 13, 54, 192, 255, 68, 47, 28 },
|
||||
{ 45, 16, 21, 91, 64, 222, 7, 1, 197 },
|
||||
{ 56, 21, 39, 155, 60, 138, 23, 102, 213 },
|
||||
{ 85, 26, 85, 85, 128, 128, 32, 146, 171 },
|
||||
{ 18, 11, 7, 63, 144, 171, 4, 4, 246 },
|
||||
{ 35, 27, 10, 146, 174, 171, 12, 26, 128 }
|
||||
},
|
||||
{
|
||||
{ 190, 80, 35, 99, 180, 80, 126, 54, 45 },
|
||||
{ 85, 126, 47, 87, 176, 51, 41, 20, 32 },
|
||||
{ 101, 75, 128, 139, 118, 146, 116, 128, 85 },
|
||||
{ 56, 41, 15, 176, 236, 85, 37, 9, 62 },
|
||||
{ 146, 36, 19, 30, 171, 255, 97, 27, 20 },
|
||||
{ 71, 30, 17, 119, 118, 255, 17, 18, 138 },
|
||||
{ 101, 38, 60, 138, 55, 70, 43, 26, 142 },
|
||||
{ 138, 45, 61, 62, 219, 1, 81, 188, 64 },
|
||||
{ 32, 41, 20, 117, 151, 142, 20, 21, 163 },
|
||||
{ 112, 19, 12, 61, 195, 128, 48, 4, 24 }
|
||||
}
|
||||
};
|
||||
const vp8_prob
|
||||
vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES - 1] = {
|
||||
{ { 231, 120, 48, 89, 115, 113, 120, 152, 112 },
|
||||
{ 152, 179, 64, 126, 170, 118, 46, 70, 95 },
|
||||
{ 175, 69, 143, 80, 85, 82, 72, 155, 103 },
|
||||
{ 56, 58, 10, 171, 218, 189, 17, 13, 152 },
|
||||
{ 144, 71, 10, 38, 171, 213, 144, 34, 26 },
|
||||
{ 114, 26, 17, 163, 44, 195, 21, 10, 173 },
|
||||
{ 121, 24, 80, 195, 26, 62, 44, 64, 85 },
|
||||
{ 170, 46, 55, 19, 136, 160, 33, 206, 71 },
|
||||
{ 63, 20, 8, 114, 114, 208, 12, 9, 226 },
|
||||
{ 81, 40, 11, 96, 182, 84, 29, 16, 36 } },
|
||||
{ { 134, 183, 89, 137, 98, 101, 106, 165, 148 },
|
||||
{ 72, 187, 100, 130, 157, 111, 32, 75, 80 },
|
||||
{ 66, 102, 167, 99, 74, 62, 40, 234, 128 },
|
||||
{ 41, 53, 9, 178, 241, 141, 26, 8, 107 },
|
||||
{ 104, 79, 12, 27, 217, 255, 87, 17, 7 },
|
||||
{ 74, 43, 26, 146, 73, 166, 49, 23, 157 },
|
||||
{ 65, 38, 105, 160, 51, 52, 31, 115, 128 },
|
||||
{ 87, 68, 71, 44, 114, 51, 15, 186, 23 },
|
||||
{ 47, 41, 14, 110, 182, 183, 21, 17, 194 },
|
||||
{ 66, 45, 25, 102, 197, 189, 23, 18, 22 } },
|
||||
{ { 88, 88, 147, 150, 42, 46, 45, 196, 205 },
|
||||
{ 43, 97, 183, 117, 85, 38, 35, 179, 61 },
|
||||
{ 39, 53, 200, 87, 26, 21, 43, 232, 171 },
|
||||
{ 56, 34, 51, 104, 114, 102, 29, 93, 77 },
|
||||
{ 107, 54, 32, 26, 51, 1, 81, 43, 31 },
|
||||
{ 39, 28, 85, 171, 58, 165, 90, 98, 64 },
|
||||
{ 34, 22, 116, 206, 23, 34, 43, 166, 73 },
|
||||
{ 68, 25, 106, 22, 64, 171, 36, 225, 114 },
|
||||
{ 34, 19, 21, 102, 132, 188, 16, 76, 124 },
|
||||
{ 62, 18, 78, 95, 85, 57, 50, 48, 51 } },
|
||||
{ { 193, 101, 35, 159, 215, 111, 89, 46, 111 },
|
||||
{ 60, 148, 31, 172, 219, 228, 21, 18, 111 },
|
||||
{ 112, 113, 77, 85, 179, 255, 38, 120, 114 },
|
||||
{ 40, 42, 1, 196, 245, 209, 10, 25, 109 },
|
||||
{ 100, 80, 8, 43, 154, 1, 51, 26, 71 },
|
||||
{ 88, 43, 29, 140, 166, 213, 37, 43, 154 },
|
||||
{ 61, 63, 30, 155, 67, 45, 68, 1, 209 },
|
||||
{ 142, 78, 78, 16, 255, 128, 34, 197, 171 },
|
||||
{ 41, 40, 5, 102, 211, 183, 4, 1, 221 },
|
||||
{ 51, 50, 17, 168, 209, 192, 23, 25, 82 } },
|
||||
{ { 125, 98, 42, 88, 104, 85, 117, 175, 82 },
|
||||
{ 95, 84, 53, 89, 128, 100, 113, 101, 45 },
|
||||
{ 75, 79, 123, 47, 51, 128, 81, 171, 1 },
|
||||
{ 57, 17, 5, 71, 102, 57, 53, 41, 49 },
|
||||
{ 115, 21, 2, 10, 102, 255, 166, 23, 6 },
|
||||
{ 38, 33, 13, 121, 57, 73, 26, 1, 85 },
|
||||
{ 41, 10, 67, 138, 77, 110, 90, 47, 114 },
|
||||
{ 101, 29, 16, 10, 85, 128, 101, 196, 26 },
|
||||
{ 57, 18, 10, 102, 102, 213, 34, 20, 43 },
|
||||
{ 117, 20, 15, 36, 163, 128, 68, 1, 26 } },
|
||||
{ { 138, 31, 36, 171, 27, 166, 38, 44, 229 },
|
||||
{ 67, 87, 58, 169, 82, 115, 26, 59, 179 },
|
||||
{ 63, 59, 90, 180, 59, 166, 93, 73, 154 },
|
||||
{ 40, 40, 21, 116, 143, 209, 34, 39, 175 },
|
||||
{ 57, 46, 22, 24, 128, 1, 54, 17, 37 },
|
||||
{ 47, 15, 16, 183, 34, 223, 49, 45, 183 },
|
||||
{ 46, 17, 33, 183, 6, 98, 15, 32, 183 },
|
||||
{ 65, 32, 73, 115, 28, 128, 23, 128, 205 },
|
||||
{ 40, 3, 9, 115, 51, 192, 18, 6, 223 },
|
||||
{ 87, 37, 9, 115, 59, 77, 64, 21, 47 } },
|
||||
{ { 104, 55, 44, 218, 9, 54, 53, 130, 226 },
|
||||
{ 64, 90, 70, 205, 40, 41, 23, 26, 57 },
|
||||
{ 54, 57, 112, 184, 5, 41, 38, 166, 213 },
|
||||
{ 30, 34, 26, 133, 152, 116, 10, 32, 134 },
|
||||
{ 75, 32, 12, 51, 192, 255, 160, 43, 51 },
|
||||
{ 39, 19, 53, 221, 26, 114, 32, 73, 255 },
|
||||
{ 31, 9, 65, 234, 2, 15, 1, 118, 73 },
|
||||
{ 88, 31, 35, 67, 102, 85, 55, 186, 85 },
|
||||
{ 56, 21, 23, 111, 59, 205, 45, 37, 192 },
|
||||
{ 55, 38, 70, 124, 73, 102, 1, 34, 98 } },
|
||||
{ { 102, 61, 71, 37, 34, 53, 31, 243, 192 },
|
||||
{ 69, 60, 71, 38, 73, 119, 28, 222, 37 },
|
||||
{ 68, 45, 128, 34, 1, 47, 11, 245, 171 },
|
||||
{ 62, 17, 19, 70, 146, 85, 55, 62, 70 },
|
||||
{ 75, 15, 9, 9, 64, 255, 184, 119, 16 },
|
||||
{ 37, 43, 37, 154, 100, 163, 85, 160, 1 },
|
||||
{ 63, 9, 92, 136, 28, 64, 32, 201, 85 },
|
||||
{ 86, 6, 28, 5, 64, 255, 25, 248, 1 },
|
||||
{ 56, 8, 17, 132, 137, 255, 55, 116, 128 },
|
||||
{ 58, 15, 20, 82, 135, 57, 26, 121, 40 } },
|
||||
{ { 164, 50, 31, 137, 154, 133, 25, 35, 218 },
|
||||
{ 51, 103, 44, 131, 131, 123, 31, 6, 158 },
|
||||
{ 86, 40, 64, 135, 148, 224, 45, 183, 128 },
|
||||
{ 22, 26, 17, 131, 240, 154, 14, 1, 209 },
|
||||
{ 83, 12, 13, 54, 192, 255, 68, 47, 28 },
|
||||
{ 45, 16, 21, 91, 64, 222, 7, 1, 197 },
|
||||
{ 56, 21, 39, 155, 60, 138, 23, 102, 213 },
|
||||
{ 85, 26, 85, 85, 128, 128, 32, 146, 171 },
|
||||
{ 18, 11, 7, 63, 144, 171, 4, 4, 246 },
|
||||
{ 35, 27, 10, 146, 174, 171, 12, 26, 128 } },
|
||||
{ { 190, 80, 35, 99, 180, 80, 126, 54, 45 },
|
||||
{ 85, 126, 47, 87, 176, 51, 41, 20, 32 },
|
||||
{ 101, 75, 128, 139, 118, 146, 116, 128, 85 },
|
||||
{ 56, 41, 15, 176, 236, 85, 37, 9, 62 },
|
||||
{ 146, 36, 19, 30, 171, 255, 97, 27, 20 },
|
||||
{ 71, 30, 17, 119, 118, 255, 17, 18, 138 },
|
||||
{ 101, 38, 60, 138, 55, 70, 43, 26, 142 },
|
||||
{ 138, 45, 61, 62, 219, 1, 81, 188, 64 },
|
||||
{ 32, 41, 20, 117, 151, 142, 20, 21, 163 },
|
||||
{ 112, 19, 12, 61, 195, 128, 48, 4, 24 } }
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -10,26 +10,20 @@
|
||||
|
||||
#include "vp8/common/x86/filter_x86.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) =
|
||||
{
|
||||
{ 128, 128, 128, 128, 0, 0, 0, 0 },
|
||||
{ 112, 112, 112, 112, 16, 16, 16, 16 },
|
||||
{ 96, 96, 96, 96, 32, 32, 32, 32 },
|
||||
{ 80, 80, 80, 80, 48, 48, 48, 48 },
|
||||
{ 64, 64, 64, 64, 64, 64, 64, 64 },
|
||||
{ 48, 48, 48, 48, 80, 80, 80, 80 },
|
||||
{ 32, 32, 32, 32, 96, 96, 96, 96 },
|
||||
{ 16, 16, 16, 16, 112, 112, 112, 112 }
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) = {
|
||||
{ 128, 128, 128, 128, 0, 0, 0, 0 }, { 112, 112, 112, 112, 16, 16, 16, 16 },
|
||||
{ 96, 96, 96, 96, 32, 32, 32, 32 }, { 80, 80, 80, 80, 48, 48, 48, 48 },
|
||||
{ 64, 64, 64, 64, 64, 64, 64, 64 }, { 48, 48, 48, 48, 80, 80, 80, 80 },
|
||||
{ 32, 32, 32, 32, 96, 96, 96, 96 }, { 16, 16, 16, 16, 112, 112, 112, 112 }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]) =
|
||||
{
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 },
|
||||
{ 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
|
||||
{ 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
|
||||
{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
|
||||
{ 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
|
||||
{ 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
|
||||
{ 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 }
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]) = {
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 },
|
||||
{ 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
|
||||
{ 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
|
||||
{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
|
||||
{ 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
|
||||
{ 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
|
||||
{ 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 }
|
||||
};
|
||||
|
@ -15,114 +15,97 @@
|
||||
|
||||
extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
|
||||
|
||||
void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC)
|
||||
{
|
||||
short *sq = (short *) d->qcoeff;
|
||||
short *dq = (short *) d->dqcoeff;
|
||||
void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC) {
|
||||
short *sq = (short *)d->qcoeff;
|
||||
short *dq = (short *)d->dqcoeff;
|
||||
|
||||
vp8_dequantize_b_impl_mmx(sq, dq, DQC);
|
||||
vp8_dequantize_b_impl_mmx(sq, dq, DQC);
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_y_block_mmx
|
||||
(short *q, short *dq,
|
||||
unsigned char *dst, int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
void vp8_dequant_idct_add_y_block_mmx(short *q, short *dq, unsigned char *dst,
|
||||
int stride, char *eobs) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_mmx (q, dq, dst, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dst, stride, dst, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_mmx (q+16, dq, dst+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dst+4, stride,
|
||||
dst+4, stride);
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[2] > 1)
|
||||
vp8_dequant_idct_add_mmx (q+32, dq, dst+8, stride);
|
||||
else if (eobs[2] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[32]*dq[0], dst+8, stride,
|
||||
dst+8, stride);
|
||||
memset(q + 32, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[3] > 1)
|
||||
vp8_dequant_idct_add_mmx (q+48, dq, dst+12, stride);
|
||||
else if (eobs[3] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[48]*dq[0], dst+12, stride,
|
||||
dst+12, stride);
|
||||
memset(q + 48, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 64;
|
||||
dst += 4*stride;
|
||||
eobs += 4;
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_mmx(q, dq, dst, stride);
|
||||
else if (eobs[0] == 1) {
|
||||
vp8_dc_only_idct_add_mmx(q[0] * dq[0], dst, stride, dst, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_mmx(q + 16, dq, dst + 4, stride);
|
||||
else if (eobs[1] == 1) {
|
||||
vp8_dc_only_idct_add_mmx(q[16] * dq[0], dst + 4, stride, dst + 4, stride);
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[2] > 1)
|
||||
vp8_dequant_idct_add_mmx(q + 32, dq, dst + 8, stride);
|
||||
else if (eobs[2] == 1) {
|
||||
vp8_dc_only_idct_add_mmx(q[32] * dq[0], dst + 8, stride, dst + 8, stride);
|
||||
memset(q + 32, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[3] > 1)
|
||||
vp8_dequant_idct_add_mmx(q + 48, dq, dst + 12, stride);
|
||||
else if (eobs[3] == 1) {
|
||||
vp8_dc_only_idct_add_mmx(q[48] * dq[0], dst + 12, stride, dst + 12,
|
||||
stride);
|
||||
memset(q + 48, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 64;
|
||||
dst += 4 * stride;
|
||||
eobs += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_mmx
|
||||
(short *q, short *dq,
|
||||
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
void vp8_dequant_idct_add_uv_block_mmx(short *q, short *dq, unsigned char *dstu,
|
||||
unsigned char *dstv, int stride,
|
||||
char *eobs) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_mmx (q, dq, dstu, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_mmx (q+16, dq, dstu+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstu+4, stride,
|
||||
dstu+4, stride);
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += 4*stride;
|
||||
eobs += 2;
|
||||
for (i = 0; i < 2; i++) {
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_mmx(q, dq, dstu, stride);
|
||||
else if (eobs[0] == 1) {
|
||||
vp8_dc_only_idct_add_mmx(q[0] * dq[0], dstu, stride, dstu, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_mmx (q, dq, dstv, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_mmx (q+16, dq, dstv+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstv+4, stride,
|
||||
dstv+4, stride);
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += 4*stride;
|
||||
eobs += 2;
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_mmx(q + 16, dq, dstu + 4, stride);
|
||||
else if (eobs[1] == 1) {
|
||||
vp8_dc_only_idct_add_mmx(q[16] * dq[0], dstu + 4, stride, dstu + 4,
|
||||
stride);
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += 4 * stride;
|
||||
eobs += 2;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_mmx(q, dq, dstv, stride);
|
||||
else if (eobs[0] == 1) {
|
||||
vp8_dc_only_idct_add_mmx(q[0] * dq[0], dstv, stride, dstv, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_mmx(q + 16, dq, dstv + 4, stride);
|
||||
else if (eobs[1] == 1) {
|
||||
vp8_dc_only_idct_add_mmx(q[16] * dq[0], dstv + 4, stride, dstv + 4,
|
||||
stride);
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += 4 * stride;
|
||||
eobs += 2;
|
||||
}
|
||||
}
|
||||
|
@ -11,79 +11,68 @@
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
void vp8_idct_dequant_0_2x_sse2
|
||||
(short *q, short *dq ,
|
||||
unsigned char *dst, int dst_stride);
|
||||
void vp8_idct_dequant_full_2x_sse2
|
||||
(short *q, short *dq ,
|
||||
unsigned char *dst, int dst_stride);
|
||||
void vp8_idct_dequant_0_2x_sse2(short *q, short *dq, unsigned char *dst,
|
||||
int dst_stride);
|
||||
void vp8_idct_dequant_full_2x_sse2(short *q, short *dq, unsigned char *dst,
|
||||
int dst_stride);
|
||||
|
||||
void vp8_dequant_idct_add_y_block_sse2
|
||||
(short *q, short *dq,
|
||||
unsigned char *dst, int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
void vp8_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *dst,
|
||||
int stride, char *eobs) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (((short *)(eobs))[0])
|
||||
{
|
||||
if (((short *)(eobs))[0] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2 (q, dq, dst, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2 (q, dq, dst, stride);
|
||||
}
|
||||
if (((short *)(eobs))[1])
|
||||
{
|
||||
if (((short *)(eobs))[1] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2 (q+32, dq, dst+8, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2 (q+32, dq, dst+8, stride);
|
||||
}
|
||||
q += 64;
|
||||
dst += stride*4;
|
||||
eobs += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_sse2
|
||||
(short *q, short *dq,
|
||||
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
|
||||
{
|
||||
if (((short *)(eobs))[0])
|
||||
{
|
||||
if (((short *)(eobs))[0] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride);
|
||||
}
|
||||
q += 32;
|
||||
dstu += stride*4;
|
||||
|
||||
if (((short *)(eobs))[1])
|
||||
{
|
||||
if (((short *)(eobs))[1] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride);
|
||||
}
|
||||
q += 32;
|
||||
|
||||
if (((short *)(eobs))[2])
|
||||
{
|
||||
if (((short *)(eobs))[2] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride);
|
||||
}
|
||||
q += 32;
|
||||
dstv += stride*4;
|
||||
|
||||
if (((short *)(eobs))[3])
|
||||
{
|
||||
if (((short *)(eobs))[3] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride);
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (((short *)(eobs))[0]) {
|
||||
if (((short *)(eobs))[0] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2(q, dq, dst, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride);
|
||||
vp8_idct_dequant_0_2x_sse2(q, dq, dst, stride);
|
||||
}
|
||||
if (((short *)(eobs))[1]) {
|
||||
if (((short *)(eobs))[1] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2(q + 32, dq, dst + 8, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2(q + 32, dq, dst + 8, stride);
|
||||
}
|
||||
q += 64;
|
||||
dst += stride * 4;
|
||||
eobs += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_sse2(short *q, short *dq,
|
||||
unsigned char *dstu,
|
||||
unsigned char *dstv, int stride,
|
||||
char *eobs) {
|
||||
if (((short *)(eobs))[0]) {
|
||||
if (((short *)(eobs))[0] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2(q, dq, dstu, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2(q, dq, dstu, stride);
|
||||
}
|
||||
q += 32;
|
||||
dstu += stride * 4;
|
||||
|
||||
if (((short *)(eobs))[1]) {
|
||||
if (((short *)(eobs))[1] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2(q, dq, dstu, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2(q, dq, dstu, stride);
|
||||
}
|
||||
q += 32;
|
||||
|
||||
if (((short *)(eobs))[2]) {
|
||||
if (((short *)(eobs))[2] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2(q, dq, dstv, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2(q, dq, dstv, stride);
|
||||
}
|
||||
q += 32;
|
||||
dstv += stride * 4;
|
||||
|
||||
if (((short *)(eobs))[3]) {
|
||||
if (((short *)(eobs))[3] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2(q, dq, dstv, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2(q, dq, dstv, stride);
|
||||
}
|
||||
}
|
||||
|
@ -8,20 +8,19 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
|
||||
#define prototype_loopfilter(sym) \
|
||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
||||
const unsigned char *limit, const unsigned char *thresh, int count)
|
||||
#define prototype_loopfilter(sym) \
|
||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit, \
|
||||
const unsigned char *limit, const unsigned char *thresh, int count)
|
||||
|
||||
#define prototype_loopfilter_nc(sym) \
|
||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
||||
const unsigned char *limit, const unsigned char *thresh)
|
||||
#define prototype_loopfilter_nc(sym) \
|
||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit, \
|
||||
const unsigned char *limit, const unsigned char *thresh)
|
||||
|
||||
#define prototype_simple_loopfilter(sym) \
|
||||
void sym(unsigned char *y, int ystride, const unsigned char *blimit)
|
||||
void sym(unsigned char *y, int ystride, const unsigned char *blimit)
|
||||
|
||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx);
|
||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx);
|
||||
@ -47,152 +46,178 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
|
||||
|
||||
#if HAVE_MMX
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim,
|
||||
lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim,
|
||||
lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim,
|
||||
lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim,
|
||||
lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride,
|
||||
lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride,
|
||||
lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride,
|
||||
lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride,
|
||||
blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride,
|
||||
blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride,
|
||||
blimit);
|
||||
}
|
||||
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit);
|
||||
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* Horizontal MB filtering */
|
||||
#if HAVE_SSE2
|
||||
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim,
|
||||
lfi->lim, lfi->hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim,
|
||||
lfi->lim, lfi->hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
#if ARCH_X86_64
|
||||
vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr,
|
||||
2);
|
||||
#else
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride,
|
||||
lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride,
|
||||
lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride,
|
||||
lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
#endif
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride);
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride,
|
||||
lfi->blim, lfi->lim, lfi->hev_thr,
|
||||
v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride,
|
||||
blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride,
|
||||
blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride,
|
||||
blimit);
|
||||
}
|
||||
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
#if ARCH_X86_64
|
||||
vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr,
|
||||
2);
|
||||
#else
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim,
|
||||
lfi->hev_thr);
|
||||
#endif
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4);
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim,
|
||||
lfi->lim, lfi->hev_thr, v_ptr + 4);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit);
|
||||
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -8,70 +8,61 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "dboolhuff.h"
|
||||
#include "vp8/common/common.h"
|
||||
#include "vpx_dsp/vpx_dsp_common.h"
|
||||
|
||||
int vp8dx_start_decode(BOOL_DECODER *br,
|
||||
const unsigned char *source,
|
||||
unsigned int source_sz,
|
||||
vpx_decrypt_cb decrypt_cb,
|
||||
void *decrypt_state)
|
||||
{
|
||||
br->user_buffer_end = source+source_sz;
|
||||
br->user_buffer = source;
|
||||
br->value = 0;
|
||||
br->count = -8;
|
||||
br->range = 255;
|
||||
br->decrypt_cb = decrypt_cb;
|
||||
br->decrypt_state = decrypt_state;
|
||||
int vp8dx_start_decode(BOOL_DECODER *br, const unsigned char *source,
|
||||
unsigned int source_sz, vpx_decrypt_cb decrypt_cb,
|
||||
void *decrypt_state) {
|
||||
br->user_buffer_end = source + source_sz;
|
||||
br->user_buffer = source;
|
||||
br->value = 0;
|
||||
br->count = -8;
|
||||
br->range = 255;
|
||||
br->decrypt_cb = decrypt_cb;
|
||||
br->decrypt_state = decrypt_state;
|
||||
|
||||
if (source_sz && !source)
|
||||
return 1;
|
||||
if (source_sz && !source) return 1;
|
||||
|
||||
/* Populate the buffer */
|
||||
vp8dx_bool_decoder_fill(br);
|
||||
/* Populate the buffer */
|
||||
vp8dx_bool_decoder_fill(br);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vp8dx_bool_decoder_fill(BOOL_DECODER *br)
|
||||
{
|
||||
const unsigned char *bufptr = br->user_buffer;
|
||||
VP8_BD_VALUE value = br->value;
|
||||
int count = br->count;
|
||||
int shift = VP8_BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT);
|
||||
size_t bytes_left = br->user_buffer_end - bufptr;
|
||||
size_t bits_left = bytes_left * CHAR_BIT;
|
||||
int x = shift + CHAR_BIT - (int)bits_left;
|
||||
int loop_end = 0;
|
||||
unsigned char decrypted[sizeof(VP8_BD_VALUE) + 1];
|
||||
void vp8dx_bool_decoder_fill(BOOL_DECODER *br) {
|
||||
const unsigned char *bufptr = br->user_buffer;
|
||||
VP8_BD_VALUE value = br->value;
|
||||
int count = br->count;
|
||||
int shift = VP8_BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT);
|
||||
size_t bytes_left = br->user_buffer_end - bufptr;
|
||||
size_t bits_left = bytes_left * CHAR_BIT;
|
||||
int x = shift + CHAR_BIT - (int)bits_left;
|
||||
int loop_end = 0;
|
||||
unsigned char decrypted[sizeof(VP8_BD_VALUE) + 1];
|
||||
|
||||
if (br->decrypt_cb) {
|
||||
size_t n = VPXMIN(sizeof(decrypted), bytes_left);
|
||||
br->decrypt_cb(br->decrypt_state, bufptr, decrypted, (int)n);
|
||||
bufptr = decrypted;
|
||||
if (br->decrypt_cb) {
|
||||
size_t n = VPXMIN(sizeof(decrypted), bytes_left);
|
||||
br->decrypt_cb(br->decrypt_state, bufptr, decrypted, (int)n);
|
||||
bufptr = decrypted;
|
||||
}
|
||||
|
||||
if (x >= 0) {
|
||||
count += VP8_LOTS_OF_BITS;
|
||||
loop_end = x;
|
||||
}
|
||||
|
||||
if (x < 0 || bits_left) {
|
||||
while (shift >= loop_end) {
|
||||
count += CHAR_BIT;
|
||||
value |= (VP8_BD_VALUE)*bufptr << shift;
|
||||
++bufptr;
|
||||
++br->user_buffer;
|
||||
shift -= CHAR_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
if(x >= 0)
|
||||
{
|
||||
count += VP8_LOTS_OF_BITS;
|
||||
loop_end = x;
|
||||
}
|
||||
|
||||
if (x < 0 || bits_left)
|
||||
{
|
||||
while(shift >= loop_end)
|
||||
{
|
||||
count += CHAR_BIT;
|
||||
value |= (VP8_BD_VALUE)*bufptr << shift;
|
||||
++bufptr;
|
||||
++br->user_buffer;
|
||||
shift -= CHAR_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
br->value = value;
|
||||
br->count = count;
|
||||
br->value = value;
|
||||
br->count = count;
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user