Merge "Remove pair quantization"
This commit is contained in:
commit
c4b3625393
@ -457,21 +457,6 @@ add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
|
|||||||
specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/;
|
specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/;
|
||||||
$vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6;
|
$vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6;
|
||||||
|
|
||||||
add_proto qw/void vp8_regular_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
|
|
||||||
# no asm yet
|
|
||||||
|
|
||||||
add_proto qw/void vp8_fast_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
|
|
||||||
specialize qw/vp8_fast_quantize_b_pair neon/;
|
|
||||||
|
|
||||||
add_proto qw/void vp8_quantize_mb/, "struct macroblock *";
|
|
||||||
specialize qw/vp8_quantize_mb neon/;
|
|
||||||
|
|
||||||
add_proto qw/void vp8_quantize_mby/, "struct macroblock *";
|
|
||||||
specialize qw/vp8_quantize_mby neon/;
|
|
||||||
|
|
||||||
add_proto qw/void vp8_quantize_mbuv/, "struct macroblock *";
|
|
||||||
specialize qw/vp8_quantize_mbuv neon/;
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Block subtraction
|
# Block subtraction
|
||||||
#
|
#
|
||||||
|
@ -10,13 +10,12 @@
|
|||||||
|
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
#include "vp8/encoder/block.h"
|
#include "vp8/encoder/block.h"
|
||||||
#include "vpx_mem/vpx_mem.h"
|
|
||||||
|
|
||||||
static const uint16_t inv_zig_zag[16] = {
|
static const uint16_t inv_zig_zag[16] = {
|
||||||
0x0001, 0x0002, 0x0006, 0x0007,
|
1, 2, 6, 7,
|
||||||
0x0003, 0x0005, 0x0008, 0x000d,
|
3, 5, 8, 13,
|
||||||
0x0004, 0x0009, 0x000c, 0x000e,
|
4, 9, 12, 14,
|
||||||
0x000a, 0x000b, 0x000f, 0x0010
|
10, 11, 15, 16
|
||||||
};
|
};
|
||||||
|
|
||||||
void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
|
void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
|
||||||
@ -88,118 +87,3 @@ void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
|
|||||||
|
|
||||||
vst1_lane_s8((int8_t *)d->eob, vreinterpret_s8_u32(eob_d32), 0);
|
vst1_lane_s8((int8_t *)d->eob, vreinterpret_s8_u32(eob_d32), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp8_fast_quantize_b_pair_neon(BLOCK *b0, BLOCK *b1,
|
|
||||||
BLOCKD *d0, BLOCKD *d1) {
|
|
||||||
const int16x8_t one_q = vdupq_n_s16(0xff),
|
|
||||||
b0_z0 = vld1q_s16(b0->coeff),
|
|
||||||
b0_z1 = vld1q_s16(b0->coeff + 8),
|
|
||||||
b0_round0 = vld1q_s16(b0->round),
|
|
||||||
b0_round1 = vld1q_s16(b0->round + 8),
|
|
||||||
b0_quant0 = vld1q_s16(b0->quant_fast),
|
|
||||||
b0_quant1 = vld1q_s16(b0->quant_fast + 8),
|
|
||||||
d0_dequant0 = vld1q_s16(d0->dequant),
|
|
||||||
d0_dequant1 = vld1q_s16(d0->dequant + 8),
|
|
||||||
b1_z0 = vld1q_s16(b1->coeff),
|
|
||||||
b1_z1 = vld1q_s16(b1->coeff + 8),
|
|
||||||
b1_round0 = vld1q_s16(b1->round),
|
|
||||||
b1_round1 = vld1q_s16(b1->round + 8),
|
|
||||||
b1_quant0 = vld1q_s16(b1->quant_fast),
|
|
||||||
b1_quant1 = vld1q_s16(b1->quant_fast + 8),
|
|
||||||
d1_dequant0 = vld1q_s16(d1->dequant),
|
|
||||||
d1_dequant1 = vld1q_s16(d1->dequant + 8);
|
|
||||||
const uint16x8_t zig_zag0 = vld1q_u16(inv_zig_zag),
|
|
||||||
zig_zag1 = vld1q_u16(inv_zig_zag + 8);
|
|
||||||
int16x8_t b0_x0, b0_x1, b0_sz0, b0_sz1, b0_y0, b0_y1,
|
|
||||||
b1_x0, b1_x1, b1_sz0, b1_sz1, b1_y0, b1_y1;
|
|
||||||
uint16x8_t b0_eob0, b0_eob1,
|
|
||||||
b1_eob0, b1_eob1;
|
|
||||||
uint16x4_t b0_eob_d16, b1_eob_d16;
|
|
||||||
uint32x2_t b0_eob_d32, b1_eob_d32;
|
|
||||||
uint32x4_t b0_eob_q32, b1_eob_q32;
|
|
||||||
|
|
||||||
/* sign of z: z >> 15 */
|
|
||||||
b0_sz0 = vshrq_n_s16(b0_z0, 15);
|
|
||||||
b0_sz1 = vshrq_n_s16(b0_z1, 15);
|
|
||||||
b1_sz0 = vshrq_n_s16(b1_z0, 15);
|
|
||||||
b1_sz1 = vshrq_n_s16(b1_z1, 15);
|
|
||||||
|
|
||||||
/* x = abs(z) */
|
|
||||||
b0_x0 = vabsq_s16(b0_z0);
|
|
||||||
b0_x1 = vabsq_s16(b0_z1);
|
|
||||||
b1_x0 = vabsq_s16(b1_z0);
|
|
||||||
b1_x1 = vabsq_s16(b1_z1);
|
|
||||||
|
|
||||||
/* x += round */
|
|
||||||
b0_x0 = vaddq_s16(b0_x0, b0_round0);
|
|
||||||
b0_x1 = vaddq_s16(b0_x1, b0_round1);
|
|
||||||
b1_x0 = vaddq_s16(b1_x0, b1_round0);
|
|
||||||
b1_x1 = vaddq_s16(b1_x1, b1_round1);
|
|
||||||
|
|
||||||
/* y = 2 * (x * quant) >> 16 */
|
|
||||||
b0_y0 = vqdmulhq_s16(b0_x0, b0_quant0);
|
|
||||||
b0_y1 = vqdmulhq_s16(b0_x1, b0_quant1);
|
|
||||||
b1_y0 = vqdmulhq_s16(b1_x0, b1_quant0);
|
|
||||||
b1_y1 = vqdmulhq_s16(b1_x1, b1_quant1);
|
|
||||||
|
|
||||||
/* Compensate for doubling in vqdmulhq */
|
|
||||||
b0_y0 = vshrq_n_s16(b0_y0, 1);
|
|
||||||
b0_y1 = vshrq_n_s16(b0_y1, 1);
|
|
||||||
b1_y0 = vshrq_n_s16(b1_y0, 1);
|
|
||||||
b1_y1 = vshrq_n_s16(b1_y1, 1);
|
|
||||||
|
|
||||||
/* Restore sign bit */
|
|
||||||
b0_y0 = veorq_s16(b0_y0, b0_sz0);
|
|
||||||
b0_y1 = veorq_s16(b0_y1, b0_sz1);
|
|
||||||
b0_x0 = vsubq_s16(b0_y0, b0_sz0);
|
|
||||||
b0_x1 = vsubq_s16(b0_y1, b0_sz1);
|
|
||||||
b1_y0 = veorq_s16(b1_y0, b1_sz0);
|
|
||||||
b1_y1 = veorq_s16(b1_y1, b1_sz1);
|
|
||||||
b1_x0 = vsubq_s16(b1_y0, b1_sz0);
|
|
||||||
b1_x1 = vsubq_s16(b1_y1, b1_sz1);
|
|
||||||
|
|
||||||
/* find non-zero elements */
|
|
||||||
b0_eob0 = vtstq_s16(b0_x0, one_q);
|
|
||||||
b0_eob1 = vtstq_s16(b0_x1, one_q);
|
|
||||||
b1_eob0 = vtstq_s16(b1_x0, one_q);
|
|
||||||
b1_eob1 = vtstq_s16(b1_x1, one_q);
|
|
||||||
|
|
||||||
/* mask zig zag */
|
|
||||||
b0_eob0 = vandq_u16(b0_eob0, zig_zag0);
|
|
||||||
b0_eob1 = vandq_u16(b0_eob1, zig_zag1);
|
|
||||||
b1_eob0 = vandq_u16(b1_eob0, zig_zag0);
|
|
||||||
b1_eob1 = vandq_u16(b1_eob1, zig_zag1);
|
|
||||||
|
|
||||||
/* select the largest value */
|
|
||||||
b0_eob0 = vmaxq_u16(b0_eob0, b0_eob1);
|
|
||||||
b0_eob_d16 = vmax_u16(vget_low_u16(b0_eob0),
|
|
||||||
vget_high_u16(b0_eob0));
|
|
||||||
b0_eob_q32 = vmovl_u16(b0_eob_d16);
|
|
||||||
b0_eob_d32 = vmax_u32(vget_low_u32(b0_eob_q32),
|
|
||||||
vget_high_u32(b0_eob_q32));
|
|
||||||
b0_eob_d32 = vpmax_u32(b0_eob_d32, b0_eob_d32);
|
|
||||||
|
|
||||||
b1_eob0 = vmaxq_u16(b1_eob0, b1_eob1);
|
|
||||||
b1_eob_d16 = vmax_u16(vget_low_u16(b1_eob0),
|
|
||||||
vget_high_u16(b1_eob0));
|
|
||||||
b1_eob_q32 = vmovl_u16(b1_eob_d16);
|
|
||||||
b1_eob_d32 = vmax_u32(vget_low_u32(b1_eob_q32),
|
|
||||||
vget_high_u32(b1_eob_q32));
|
|
||||||
b1_eob_d32 = vpmax_u32(b1_eob_d32, b1_eob_d32);
|
|
||||||
|
|
||||||
/* qcoeff = x */
|
|
||||||
vst1q_s16(d0->qcoeff, b0_x0);
|
|
||||||
vst1q_s16(d0->qcoeff + 8, b0_x1);
|
|
||||||
vst1q_s16(d1->qcoeff, b1_x0);
|
|
||||||
vst1q_s16(d1->qcoeff + 8, b1_x1);
|
|
||||||
|
|
||||||
/* dqcoeff = x * dequant */
|
|
||||||
vst1q_s16(d0->dqcoeff, vmulq_s16(d0_dequant0, b0_x0));
|
|
||||||
vst1q_s16(d0->dqcoeff + 8, vmulq_s16(d0_dequant1, b0_x1));
|
|
||||||
vst1q_s16(d1->dqcoeff, vmulq_s16(d1_dequant0, b1_x0));
|
|
||||||
vst1q_s16(d1->dqcoeff + 8, vmulq_s16(d1_dequant1, b1_x1));
|
|
||||||
|
|
||||||
vst1_lane_s8((int8_t *)d0->eob, vreinterpret_s8_u32(b0_eob_d32), 0);
|
|
||||||
vst1_lane_s8((int8_t *)d1->eob, vreinterpret_s8_u32(b1_eob_d32), 0);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
@ -1,64 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Use of this source code is governed by a BSD-style license
|
|
||||||
* that can be found in the LICENSE file in the root of the source
|
|
||||||
* tree. An additional intellectual property rights grant can be found
|
|
||||||
* in the file PATENTS. All contributing project authors may
|
|
||||||
* be found in the AUTHORS file in the root of the source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#include "vpx_config.h"
|
|
||||||
#include "vp8_rtcd.h"
|
|
||||||
#include "vp8/encoder/block.h"
|
|
||||||
#include <math.h>
|
|
||||||
#include "vpx_mem/vpx_mem.h"
|
|
||||||
#include "vp8/encoder/quantize.h"
|
|
||||||
#include "vp8/common/entropy.h"
|
|
||||||
|
|
||||||
|
|
||||||
#if HAVE_NEON
|
|
||||||
|
|
||||||
/* vp8_quantize_mbX functions here differs from corresponding ones in
|
|
||||||
* quantize.c only by using quantize_b_pair function pointer instead of
|
|
||||||
* the regular quantize_b function pointer */
|
|
||||||
void vp8_quantize_mby_neon(MACROBLOCK *x)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
|
|
||||||
&& x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
|
|
||||||
|
|
||||||
for (i = 0; i < 16; i+=2)
|
|
||||||
x->quantize_b_pair(&x->block[i], &x->block[i+1],
|
|
||||||
&x->e_mbd.block[i], &x->e_mbd.block[i+1]);
|
|
||||||
|
|
||||||
if(has_2nd_order)
|
|
||||||
x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
|
|
||||||
}
|
|
||||||
|
|
||||||
void vp8_quantize_mb_neon(MACROBLOCK *x)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
|
|
||||||
&& x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
|
|
||||||
|
|
||||||
for (i = 0; i < 24; i+=2)
|
|
||||||
x->quantize_b_pair(&x->block[i], &x->block[i+1],
|
|
||||||
&x->e_mbd.block[i], &x->e_mbd.block[i+1]);
|
|
||||||
|
|
||||||
if (has_2nd_order)
|
|
||||||
x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void vp8_quantize_mbuv_neon(MACROBLOCK *x)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 16; i < 24; i+=2)
|
|
||||||
x->quantize_b_pair(&x->block[i], &x->block[i+1],
|
|
||||||
&x->e_mbd.block[i], &x->e_mbd.block[i+1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* HAVE_NEON */
|
|
@ -160,7 +160,6 @@ typedef struct macroblock
|
|||||||
void (*short_fdct8x4)(short *input, short *output, int pitch);
|
void (*short_fdct8x4)(short *input, short *output, int pitch);
|
||||||
void (*short_walsh4x4)(short *input, short *output, int pitch);
|
void (*short_walsh4x4)(short *input, short *output, int pitch);
|
||||||
void (*quantize_b)(BLOCK *b, BLOCKD *d);
|
void (*quantize_b)(BLOCK *b, BLOCKD *d);
|
||||||
void (*quantize_b_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
|
|
||||||
|
|
||||||
} MACROBLOCK;
|
} MACROBLOCK;
|
||||||
|
|
||||||
|
@ -1252,7 +1252,6 @@ int vp8cx_encode_inter_macroblock
|
|||||||
if(cpi->sf.use_fastquant_for_pick)
|
if(cpi->sf.use_fastquant_for_pick)
|
||||||
{
|
{
|
||||||
x->quantize_b = vp8_fast_quantize_b;
|
x->quantize_b = vp8_fast_quantize_b;
|
||||||
x->quantize_b_pair = vp8_fast_quantize_b_pair;
|
|
||||||
|
|
||||||
/* the fast quantizer does not use zbin_extra, so
|
/* the fast quantizer does not use zbin_extra, so
|
||||||
* do not recalculate */
|
* do not recalculate */
|
||||||
@ -1265,7 +1264,6 @@ int vp8cx_encode_inter_macroblock
|
|||||||
if (cpi->sf.improved_quant)
|
if (cpi->sf.improved_quant)
|
||||||
{
|
{
|
||||||
x->quantize_b = vp8_regular_quantize_b;
|
x->quantize_b = vp8_regular_quantize_b;
|
||||||
x->quantize_b_pair = vp8_regular_quantize_b_pair;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* restore cpi->zbin_mode_boost_enabled */
|
/* restore cpi->zbin_mode_boost_enabled */
|
||||||
|
@ -346,7 +346,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
|
|||||||
z->short_fdct8x4 = x->short_fdct8x4;
|
z->short_fdct8x4 = x->short_fdct8x4;
|
||||||
z->short_walsh4x4 = x->short_walsh4x4;
|
z->short_walsh4x4 = x->short_walsh4x4;
|
||||||
z->quantize_b = x->quantize_b;
|
z->quantize_b = x->quantize_b;
|
||||||
z->quantize_b_pair = x->quantize_b_pair;
|
|
||||||
z->optimize = x->optimize;
|
z->optimize = x->optimize;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1083,12 +1083,10 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
|||||||
if (cpi->sf.improved_quant)
|
if (cpi->sf.improved_quant)
|
||||||
{
|
{
|
||||||
cpi->mb.quantize_b = vp8_regular_quantize_b;
|
cpi->mb.quantize_b = vp8_regular_quantize_b;
|
||||||
cpi->mb.quantize_b_pair = vp8_regular_quantize_b_pair;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cpi->mb.quantize_b = vp8_fast_quantize_b;
|
cpi->mb.quantize_b = vp8_fast_quantize_b;
|
||||||
cpi->mb.quantize_b_pair = vp8_fast_quantize_b_pair;
|
|
||||||
}
|
}
|
||||||
if (cpi->sf.improved_quant != last_improved_quant)
|
if (cpi->sf.improved_quant != last_improved_quant)
|
||||||
vp8cx_init_quantizer(cpi);
|
vp8cx_init_quantizer(cpi);
|
||||||
|
@ -101,7 +101,7 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
|
|||||||
*d->eob = (char)(eob + 1);
|
*d->eob = (char)(eob + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp8_quantize_mby_c(MACROBLOCK *x)
|
void vp8_quantize_mby(MACROBLOCK *x)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
|
int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
|
||||||
@ -114,7 +114,7 @@ void vp8_quantize_mby_c(MACROBLOCK *x)
|
|||||||
x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
|
x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp8_quantize_mb_c(MACROBLOCK *x)
|
void vp8_quantize_mb(MACROBLOCK *x)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
|
int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
|
||||||
@ -125,7 +125,7 @@ void vp8_quantize_mb_c(MACROBLOCK *x)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void vp8_quantize_mbuv_c(MACROBLOCK *x)
|
void vp8_quantize_mbuv(MACROBLOCK *x)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -133,23 +133,6 @@ void vp8_quantize_mbuv_c(MACROBLOCK *x)
|
|||||||
x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
|
x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
|
|
||||||
* these two C functions if corresponding optimized routine is not available.
|
|
||||||
* NEON optimized version implements currently the fast quantization for pair
|
|
||||||
* of blocks. */
|
|
||||||
void vp8_regular_quantize_b_pair(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
|
|
||||||
{
|
|
||||||
vp8_regular_quantize_b(b1, d1);
|
|
||||||
vp8_regular_quantize_b(b2, d2);
|
|
||||||
}
|
|
||||||
|
|
||||||
void vp8_fast_quantize_b_pair_c(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
|
|
||||||
{
|
|
||||||
vp8_fast_quantize_b_c(b1, d1);
|
|
||||||
vp8_fast_quantize_b_c(b2, d2);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static const int qrounding_factors[129] =
|
static const int qrounding_factors[129] =
|
||||||
{
|
{
|
||||||
48, 48, 48, 48, 48, 48, 48, 48,
|
48, 48, 48, 48, 48, 48, 48, 48,
|
||||||
|
@ -18,6 +18,9 @@ extern "C" {
|
|||||||
|
|
||||||
struct VP8_COMP;
|
struct VP8_COMP;
|
||||||
struct macroblock;
|
struct macroblock;
|
||||||
|
extern void vp8_quantize_mb(struct macroblock *x);
|
||||||
|
extern void vp8_quantize_mby(struct macroblock *x);
|
||||||
|
extern void vp8_quantize_mbuv(struct macroblock *x);
|
||||||
extern void vp8_set_quantizer(struct VP8_COMP *cpi, int Q);
|
extern void vp8_set_quantizer(struct VP8_COMP *cpi, int Q);
|
||||||
extern void vp8cx_frame_init_quantizer(struct VP8_COMP *cpi);
|
extern void vp8cx_frame_init_quantizer(struct VP8_COMP *cpi);
|
||||||
extern void vp8_update_zbin_extra(struct VP8_COMP *cpi, struct macroblock *x);
|
extern void vp8_update_zbin_extra(struct VP8_COMP *cpi, struct macroblock *x);
|
||||||
|
@ -14,7 +14,6 @@ VP8_CX_SRCS-$(ARCH_ARM) += vp8cx_arm.mk
|
|||||||
#File list for arm
|
#File list for arm
|
||||||
# encoder
|
# encoder
|
||||||
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/dct_arm.c
|
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/dct_arm.c
|
||||||
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/quantize_arm.c
|
|
||||||
|
|
||||||
#File list for edsp
|
#File list for edsp
|
||||||
# encoder
|
# encoder
|
||||||
|
Loading…
x
Reference in New Issue
Block a user