Merge "Using stride (# of elements) instead of pitch (bytes) in fdct4x4."
This commit is contained in:
commit
9f09618bd4
@ -31,15 +31,15 @@ void fdct4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
|||||||
}
|
}
|
||||||
void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||||
int stride, int /*tx_type*/) {
|
int stride, int /*tx_type*/) {
|
||||||
vp9_idct4x4_16_add_c(out, dst, stride >> 1);
|
vp9_idct4x4_16_add_c(out, dst, stride);
|
||||||
}
|
}
|
||||||
void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
||||||
int stride, int tx_type) {
|
int stride, int tx_type) {
|
||||||
vp9_short_fht4x4_c(in, out, stride >> 1, tx_type);
|
vp9_short_fht4x4_c(in, out, stride, tx_type);
|
||||||
}
|
}
|
||||||
void iht4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
void iht4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||||
int stride, int tx_type) {
|
int stride, int tx_type) {
|
||||||
vp9_iht4x4_16_add_c(out, dst, stride >> 1, tx_type);
|
vp9_iht4x4_16_add_c(out, dst, stride, tx_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
class FwdTrans4x4Test : public ::testing::TestWithParam<int> {
|
class FwdTrans4x4Test : public ::testing::TestWithParam<int> {
|
||||||
@ -78,7 +78,7 @@ TEST_P(FwdTrans4x4Test, SignBiasCheck) {
|
|||||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
|
||||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 16);
|
DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 16);
|
||||||
const int pitch = 8;
|
const int pitch = 4;
|
||||||
int count_sign_block[16][2];
|
int count_sign_block[16][2];
|
||||||
const int count_test_block = 1000000;
|
const int count_test_block = 1000000;
|
||||||
|
|
||||||
@ -152,7 +152,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
|
|||||||
for (int j = 0; j < 16; ++j)
|
for (int j = 0; j < 16; ++j)
|
||||||
test_input_block[j] = src[j] - dst[j];
|
test_input_block[j] = src[j] - dst[j];
|
||||||
|
|
||||||
const int pitch = 8;
|
const int pitch = 4;
|
||||||
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||||
|
|
||||||
for (int j = 0; j < 16; ++j) {
|
for (int j = 0; j < 16; ++j) {
|
||||||
|
@ -698,7 +698,7 @@ specialize vp9_short_fht16x16 sse2
|
|||||||
prototype void vp9_short_fdct8x8 "int16_t *InputData, int16_t *OutputData, int stride"
|
prototype void vp9_short_fdct8x8 "int16_t *InputData, int16_t *OutputData, int stride"
|
||||||
specialize vp9_short_fdct8x8 sse2
|
specialize vp9_short_fdct8x8 sse2
|
||||||
|
|
||||||
prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
|
prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int stride"
|
||||||
specialize vp9_short_fdct4x4 sse2
|
specialize vp9_short_fdct4x4 sse2
|
||||||
|
|
||||||
prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int stride"
|
prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int stride"
|
||||||
|
@ -36,14 +36,13 @@ static void fdct4(const int16_t *input, int16_t *output) {
|
|||||||
output[3] = dct_const_round_shift(temp2);
|
output[3] = dct_const_round_shift(temp2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) {
|
void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int stride) {
|
||||||
// The 2D transform is done with two passes which are actually pretty
|
// The 2D transform is done with two passes which are actually pretty
|
||||||
// similar. In the first one, we transform the columns and transpose
|
// similar. In the first one, we transform the columns and transpose
|
||||||
// the results. In the second one, we transform the rows. To achieve that,
|
// the results. In the second one, we transform the rows. To achieve that,
|
||||||
// as the first pass results are transposed, we tranpose the columns (that
|
// as the first pass results are transposed, we tranpose the columns (that
|
||||||
// is the transposed rows) and transpose the results (so that it goes back
|
// is the transposed rows) and transpose the results (so that it goes back
|
||||||
// in normal/row positions).
|
// in normal/row positions).
|
||||||
const int stride = pitch >> 1;
|
|
||||||
int pass;
|
int pass;
|
||||||
// We need an intermediate buffer between passes.
|
// We need an intermediate buffer between passes.
|
||||||
int16_t intermediate[4 * 4];
|
int16_t intermediate[4 * 4];
|
||||||
@ -586,18 +585,17 @@ void vp9_short_fht8x8_c(int16_t *input, int16_t *output,
|
|||||||
|
|
||||||
/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
|
/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
|
||||||
pixel. */
|
pixel. */
|
||||||
void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int pitch) {
|
void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int stride) {
|
||||||
int i;
|
int i;
|
||||||
int a1, b1, c1, d1, e1;
|
int a1, b1, c1, d1, e1;
|
||||||
int16_t *ip = input;
|
int16_t *ip = input;
|
||||||
int16_t *op = output;
|
int16_t *op = output;
|
||||||
int pitch_short = pitch >> 1;
|
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
for (i = 0; i < 4; i++) {
|
||||||
a1 = ip[0 * pitch_short];
|
a1 = ip[0 * stride];
|
||||||
b1 = ip[1 * pitch_short];
|
b1 = ip[1 * stride];
|
||||||
c1 = ip[2 * pitch_short];
|
c1 = ip[2 * stride];
|
||||||
d1 = ip[3 * pitch_short];
|
d1 = ip[3 * stride];
|
||||||
|
|
||||||
a1 += b1;
|
a1 += b1;
|
||||||
d1 = d1 - c1;
|
d1 = d1 - c1;
|
||||||
|
@ -402,7 +402,7 @@ void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
|
|||||||
xoff = 4 * (block & twmask);
|
xoff = 4 * (block & twmask);
|
||||||
yoff = 4 * (block >> twl);
|
yoff = 4 * (block >> twl);
|
||||||
src_diff = p->src_diff + 4 * bw * yoff + xoff;
|
src_diff = p->src_diff + 4 * bw * yoff + xoff;
|
||||||
x->fwd_txm4x4(src_diff, coeff, bw * 8);
|
x->fwd_txm4x4(src_diff, coeff, bw * 4);
|
||||||
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
|
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
|
||||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||||
pd->dequant, p->zbin_extra, eob, scan, iscan);
|
pd->dequant, p->zbin_extra, eob, scan, iscan);
|
||||||
@ -612,7 +612,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
|||||||
if (tx_type != DCT_DCT)
|
if (tx_type != DCT_DCT)
|
||||||
vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
|
vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
|
||||||
else
|
else
|
||||||
x->fwd_txm4x4(src_diff, coeff, bw * 8);
|
x->fwd_txm4x4(src_diff, coeff, bw * 4);
|
||||||
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
|
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
|
||||||
p->quant_shift, qcoeff, dqcoeff,
|
p->quant_shift, qcoeff, dqcoeff,
|
||||||
pd->dequant, p->zbin_extra, eob, scan, iscan);
|
pd->dequant, p->zbin_extra, eob, scan, iscan);
|
||||||
|
@ -1089,7 +1089,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
|||||||
vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
|
vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
|
||||||
x->quantize_b_4x4(x, block, tx_type, 16);
|
x->quantize_b_4x4(x, block, tx_type, 16);
|
||||||
} else {
|
} else {
|
||||||
x->fwd_txm4x4(src_diff, coeff, 16);
|
x->fwd_txm4x4(src_diff, coeff, 8);
|
||||||
x->quantize_b_4x4(x, block, tx_type, 16);
|
x->quantize_b_4x4(x, block, tx_type, 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1563,7 +1563,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
|
|||||||
k += (idy * 2 + idx);
|
k += (idy * 2 + idx);
|
||||||
coeff = BLOCK_OFFSET(p->coeff, k);
|
coeff = BLOCK_OFFSET(p->coeff, k);
|
||||||
x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
|
x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
|
||||||
coeff, 16);
|
coeff, 8);
|
||||||
x->quantize_b_4x4(x, k, DCT_DCT, 16);
|
x->quantize_b_4x4(x, k, DCT_DCT, 16);
|
||||||
thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
|
thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
|
||||||
16, &ssz);
|
16, &ssz);
|
||||||
|
@ -12,14 +12,13 @@
|
|||||||
#include "vp9/common/vp9_idct.h" // for cospi constants
|
#include "vp9/common/vp9_idct.h" // for cospi constants
|
||||||
#include "vpx_ports/mem.h"
|
#include "vpx_ports/mem.h"
|
||||||
|
|
||||||
void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int pitch) {
|
void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int stride) {
|
||||||
// The 2D transform is done with two passes which are actually pretty
|
// The 2D transform is done with two passes which are actually pretty
|
||||||
// similar. In the first one, we transform the columns and transpose
|
// similar. In the first one, we transform the columns and transpose
|
||||||
// the results. In the second one, we transform the rows. To achieve that,
|
// the results. In the second one, we transform the rows. To achieve that,
|
||||||
// as the first pass results are transposed, we tranpose the columns (that
|
// as the first pass results are transposed, we tranpose the columns (that
|
||||||
// is the transposed rows) and transpose the results (so that it goes back
|
// is the transposed rows) and transpose the results (so that it goes back
|
||||||
// in normal/row positions).
|
// in normal/row positions).
|
||||||
const int stride = pitch >> 1;
|
|
||||||
int pass;
|
int pass;
|
||||||
// Constants
|
// Constants
|
||||||
// When we use them, in one case, they are all the same. In all others
|
// When we use them, in one case, they are all the same. In all others
|
||||||
|
Loading…
x
Reference in New Issue
Block a user