Merge "WIP: 4x4 idct/recon merge" into experimental
This commit is contained in:
@@ -1006,14 +1006,10 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
|
||||
pc->uv_dc_delta_q == 0 &&
|
||||
pc->uv_ac_delta_q == 0;
|
||||
if (xd->lossless) {
|
||||
xd->inv_txm4x4_1 = vp9_short_iwalsh4x4_1;
|
||||
xd->inv_txm4x4 = vp9_short_iwalsh4x4;
|
||||
xd->itxm_add = vp9_idct_add_lossless_c;
|
||||
xd->itxm_add_y_block = vp9_idct_add_y_block_lossless_c;
|
||||
xd->itxm_add_uv_block = vp9_idct_add_uv_block_lossless_c;
|
||||
} else {
|
||||
xd->inv_txm4x4_1 = vp9_short_idct4x4_1;
|
||||
xd->inv_txm4x4 = vp9_short_idct4x4;
|
||||
xd->itxm_add = vp9_idct_add;
|
||||
xd->itxm_add_y_block = vp9_idct_add_y_block;
|
||||
xd->itxm_add_uv_block = vp9_idct_add_uv_block;
|
||||
|
||||
@@ -84,23 +84,6 @@ void vp9_idct_add_uv_block_lossless_c(int16_t *q, uint8_t *dst, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void add_residual(const int16_t *diff, uint8_t *dest, int stride,
|
||||
int width, int height) {
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < height; r++) {
|
||||
for (c = 0; c < width; c++)
|
||||
dest[c] = clip_pixel(diff[c] + dest[c]);
|
||||
|
||||
dest += stride;
|
||||
diff += width;
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_add_residual_4x4_c(const int16_t *diff, uint8_t *dest, int stride) {
|
||||
add_residual(diff, dest, stride, 4, 4);
|
||||
}
|
||||
|
||||
static void add_constant_residual(const int16_t diff, uint8_t *dest, int stride,
|
||||
int width, int height) {
|
||||
int r, c;
|
||||
@@ -133,11 +116,8 @@ void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride,
|
||||
if (tx_type == DCT_DCT) {
|
||||
vp9_idct_add(input, dest, stride, eob);
|
||||
} else {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
|
||||
|
||||
vp9_short_iht4x4(input, output, 4, tx_type);
|
||||
vp9_short_iht4x4_add(input, dest, stride, tx_type);
|
||||
vpx_memset(input, 0, 32);
|
||||
vp9_add_residual_4x4(output, dest, stride);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -154,13 +134,9 @@ void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
|
||||
}
|
||||
|
||||
void vp9_idct_add_c(int16_t *input, uint8_t *dest, int stride, int eob) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
|
||||
|
||||
if (eob > 1) {
|
||||
// the idct halves ( >> 1) the pitch
|
||||
vp9_short_idct4x4(input, output, 4 << 1);
|
||||
vp9_short_idct4x4_add(input, dest, stride);
|
||||
vpx_memset(input, 0, 32);
|
||||
vp9_add_residual_4x4(output, dest, stride);
|
||||
} else {
|
||||
vp9_dc_only_idct_add(input[0], dest, dest, stride, stride);
|
||||
((int *)input)[0] = 0;
|
||||
@@ -168,38 +144,27 @@ void vp9_idct_add_c(int16_t *input, uint8_t *dest, int stride, int eob) {
|
||||
}
|
||||
|
||||
void vp9_dc_idct_add_c(int16_t *input, uint8_t *dest, int stride, int dc) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
|
||||
|
||||
input[0] = dc;
|
||||
|
||||
// the idct halves ( >> 1) the pitch
|
||||
vp9_short_idct4x4(input, output, 4 << 1);
|
||||
vp9_short_idct4x4_add(input, dest, stride);
|
||||
vpx_memset(input, 0, 32);
|
||||
vp9_add_residual_4x4(output, dest, stride);
|
||||
}
|
||||
|
||||
void vp9_idct_add_lossless_c(int16_t *input, uint8_t *dest, int stride,
|
||||
int eob) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
|
||||
|
||||
if (eob > 1) {
|
||||
vp9_short_iwalsh4x4_c(input, output, 4 << 1);
|
||||
vp9_short_iwalsh4x4_add(input, dest, stride);
|
||||
vpx_memset(input, 0, 32);
|
||||
vp9_add_residual_4x4(output, dest, stride);
|
||||
} else {
|
||||
vp9_dc_only_inv_walsh_add(input[0], dest, dest, stride, stride);
|
||||
vp9_short_iwalsh4x4_1_add_c(input, dest, stride);
|
||||
((int *)input)[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_dc_idct_add_lossless_c(int16_t *input, uint8_t *dest,
|
||||
int stride, int dc) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
|
||||
|
||||
input[0] = dc;
|
||||
vp9_short_iwalsh4x4_c(input, output, 4 << 1);
|
||||
vp9_short_iwalsh4x4_add(input, dest, stride);
|
||||
vpx_memset(input, 0, 32);
|
||||
vp9_add_residual_4x4(output, dest, stride);
|
||||
}
|
||||
|
||||
void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) {
|
||||
|
||||
@@ -15,49 +15,6 @@
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/vp9_idct.h"
|
||||
|
||||
void vp9_add_residual_4x4_sse2(const int16_t *diff, uint8_t *dest, int stride) {
|
||||
const int width = 4;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
// Diff data
|
||||
const __m128i d0 = _mm_loadl_epi64((const __m128i *)(diff + 0 * width));
|
||||
const __m128i d1 = _mm_loadl_epi64((const __m128i *)(diff + 1 * width));
|
||||
const __m128i d2 = _mm_loadl_epi64((const __m128i *)(diff + 2 * width));
|
||||
const __m128i d3 = _mm_loadl_epi64((const __m128i *)(diff + 3 * width));
|
||||
|
||||
// Prediction data.
|
||||
__m128i p0 = _mm_cvtsi32_si128(*(const int *)(dest + 0 * stride));
|
||||
__m128i p1 = _mm_cvtsi32_si128(*(const int *)(dest + 1 * stride));
|
||||
__m128i p2 = _mm_cvtsi32_si128(*(const int *)(dest + 2 * stride));
|
||||
__m128i p3 = _mm_cvtsi32_si128(*(const int *)(dest + 3 * stride));
|
||||
|
||||
p0 = _mm_unpacklo_epi8(p0, zero);
|
||||
p1 = _mm_unpacklo_epi8(p1, zero);
|
||||
p2 = _mm_unpacklo_epi8(p2, zero);
|
||||
p3 = _mm_unpacklo_epi8(p3, zero);
|
||||
|
||||
p0 = _mm_add_epi16(p0, d0);
|
||||
p1 = _mm_add_epi16(p1, d1);
|
||||
p2 = _mm_add_epi16(p2, d2);
|
||||
p3 = _mm_add_epi16(p3, d3);
|
||||
|
||||
p0 = _mm_packus_epi16(p0, p1);
|
||||
p2 = _mm_packus_epi16(p2, p3);
|
||||
|
||||
*(int *)dest = _mm_cvtsi128_si32(p0);
|
||||
dest += stride;
|
||||
|
||||
p0 = _mm_srli_si128(p0, 8);
|
||||
*(int *)dest = _mm_cvtsi128_si32(p0);
|
||||
dest += stride;
|
||||
|
||||
*(int *)dest = _mm_cvtsi128_si32(p2);
|
||||
dest += stride;
|
||||
|
||||
p2 = _mm_srli_si128(p2, 8);
|
||||
*(int *)dest = _mm_cvtsi128_si32(p2);
|
||||
}
|
||||
|
||||
void vp9_add_constant_residual_8x8_sse2(const int16_t diff, uint8_t *dest,
|
||||
int stride) {
|
||||
uint8_t abs_diff;
|
||||
|
||||
Reference in New Issue
Block a user