Merge "WIP: 4x4 idct/recon merge" into experimental

This commit is contained in:
Scott LaVarnway
2013-05-21 10:45:53 -07:00
committed by Gerrit Code Review
15 changed files with 124 additions and 246 deletions

View File

@@ -1006,14 +1006,10 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
pc->uv_dc_delta_q == 0 &&
pc->uv_ac_delta_q == 0;
if (xd->lossless) {
xd->inv_txm4x4_1 = vp9_short_iwalsh4x4_1;
xd->inv_txm4x4 = vp9_short_iwalsh4x4;
xd->itxm_add = vp9_idct_add_lossless_c;
xd->itxm_add_y_block = vp9_idct_add_y_block_lossless_c;
xd->itxm_add_uv_block = vp9_idct_add_uv_block_lossless_c;
} else {
xd->inv_txm4x4_1 = vp9_short_idct4x4_1;
xd->inv_txm4x4 = vp9_short_idct4x4;
xd->itxm_add = vp9_idct_add;
xd->itxm_add_y_block = vp9_idct_add_y_block;
xd->itxm_add_uv_block = vp9_idct_add_uv_block;

View File

@@ -84,23 +84,6 @@ void vp9_idct_add_uv_block_lossless_c(int16_t *q, uint8_t *dst, int stride,
}
}
static void add_residual(const int16_t *diff, uint8_t *dest, int stride,
int width, int height) {
int r, c;
for (r = 0; r < height; r++) {
for (c = 0; c < width; c++)
dest[c] = clip_pixel(diff[c] + dest[c]);
dest += stride;
diff += width;
}
}
void vp9_add_residual_4x4_c(const int16_t *diff, uint8_t *dest, int stride) {
add_residual(diff, dest, stride, 4, 4);
}
static void add_constant_residual(const int16_t diff, uint8_t *dest, int stride,
int width, int height) {
int r, c;
@@ -133,11 +116,8 @@ void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride,
if (tx_type == DCT_DCT) {
vp9_idct_add(input, dest, stride, eob);
} else {
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
vp9_short_iht4x4(input, output, 4, tx_type);
vp9_short_iht4x4_add(input, dest, stride, tx_type);
vpx_memset(input, 0, 32);
vp9_add_residual_4x4(output, dest, stride);
}
}
@@ -154,13 +134,9 @@ void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
}
void vp9_idct_add_c(int16_t *input, uint8_t *dest, int stride, int eob) {
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
if (eob > 1) {
// the idct halves ( >> 1) the pitch
vp9_short_idct4x4(input, output, 4 << 1);
vp9_short_idct4x4_add(input, dest, stride);
vpx_memset(input, 0, 32);
vp9_add_residual_4x4(output, dest, stride);
} else {
vp9_dc_only_idct_add(input[0], dest, dest, stride, stride);
((int *)input)[0] = 0;
@@ -168,38 +144,27 @@ void vp9_idct_add_c(int16_t *input, uint8_t *dest, int stride, int eob) {
}
void vp9_dc_idct_add_c(int16_t *input, uint8_t *dest, int stride, int dc) {
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
input[0] = dc;
// the idct halves ( >> 1) the pitch
vp9_short_idct4x4(input, output, 4 << 1);
vp9_short_idct4x4_add(input, dest, stride);
vpx_memset(input, 0, 32);
vp9_add_residual_4x4(output, dest, stride);
}
void vp9_idct_add_lossless_c(int16_t *input, uint8_t *dest, int stride,
int eob) {
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
if (eob > 1) {
vp9_short_iwalsh4x4_c(input, output, 4 << 1);
vp9_short_iwalsh4x4_add(input, dest, stride);
vpx_memset(input, 0, 32);
vp9_add_residual_4x4(output, dest, stride);
} else {
vp9_dc_only_inv_walsh_add(input[0], dest, dest, stride, stride);
vp9_short_iwalsh4x4_1_add_c(input, dest, stride);
((int *)input)[0] = 0;
}
}
void vp9_dc_idct_add_lossless_c(int16_t *input, uint8_t *dest,
int stride, int dc) {
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
input[0] = dc;
vp9_short_iwalsh4x4_c(input, output, 4 << 1);
vp9_short_iwalsh4x4_add(input, dest, stride);
vpx_memset(input, 0, 32);
vp9_add_residual_4x4(output, dest, stride);
}
void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) {

View File

@@ -15,49 +15,6 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_idct.h"
void vp9_add_residual_4x4_sse2(const int16_t *diff, uint8_t *dest, int stride) {
const int width = 4;
const __m128i zero = _mm_setzero_si128();
// Diff data
const __m128i d0 = _mm_loadl_epi64((const __m128i *)(diff + 0 * width));
const __m128i d1 = _mm_loadl_epi64((const __m128i *)(diff + 1 * width));
const __m128i d2 = _mm_loadl_epi64((const __m128i *)(diff + 2 * width));
const __m128i d3 = _mm_loadl_epi64((const __m128i *)(diff + 3 * width));
// Prediction data.
__m128i p0 = _mm_cvtsi32_si128(*(const int *)(dest + 0 * stride));
__m128i p1 = _mm_cvtsi32_si128(*(const int *)(dest + 1 * stride));
__m128i p2 = _mm_cvtsi32_si128(*(const int *)(dest + 2 * stride));
__m128i p3 = _mm_cvtsi32_si128(*(const int *)(dest + 3 * stride));
p0 = _mm_unpacklo_epi8(p0, zero);
p1 = _mm_unpacklo_epi8(p1, zero);
p2 = _mm_unpacklo_epi8(p2, zero);
p3 = _mm_unpacklo_epi8(p3, zero);
p0 = _mm_add_epi16(p0, d0);
p1 = _mm_add_epi16(p1, d1);
p2 = _mm_add_epi16(p2, d2);
p3 = _mm_add_epi16(p3, d3);
p0 = _mm_packus_epi16(p0, p1);
p2 = _mm_packus_epi16(p2, p3);
*(int *)dest = _mm_cvtsi128_si32(p0);
dest += stride;
p0 = _mm_srli_si128(p0, 8);
*(int *)dest = _mm_cvtsi128_si32(p0);
dest += stride;
*(int *)dest = _mm_cvtsi128_si32(p2);
dest += stride;
p2 = _mm_srli_si128(p2, 8);
*(int *)dest = _mm_cvtsi128_si32(p2);
}
void vp9_add_constant_residual_8x8_sse2(const int16_t diff, uint8_t *dest,
int stride) {
uint8_t abs_diff;