Fix warnings reported by -Wshadow: Part2: av1 directory

While we are at it:
- Rename some variables to more meaningful names
- Reuse some common consts from a header instead of redefining them.

Cherry-picked from aomedia/master: 863b0499

Change-Id: Ida5de713156dc0126a27f90fdd36d29a398a3c88
This commit is contained in:
Urvang Joshi
2016-10-14 16:51:44 -07:00
parent 03f6fdcfca
commit 454280dabf
20 changed files with 440 additions and 458 deletions

View File

@@ -580,7 +580,7 @@ static aom_codec_err_t decoder_decode(aom_codec_alg_priv_t *ctx,
// Initialize the decoder workers on the first frame. // Initialize the decoder workers on the first frame.
if (ctx->frame_workers == NULL) { if (ctx->frame_workers == NULL) {
const aom_codec_err_t res = init_decoder(ctx); res = init_decoder(ctx);
if (res != AOM_CODEC_OK) return res; if (res != AOM_CODEC_OK) return res;
} }
@@ -646,7 +646,6 @@ static aom_codec_err_t decoder_decode(aom_codec_alg_priv_t *ctx,
for (i = 0; i < frame_count; ++i) { for (i = 0; i < frame_count; ++i) {
const uint8_t *data_start_copy = data_start; const uint8_t *data_start_copy = data_start;
const uint32_t frame_size = frame_sizes[i]; const uint32_t frame_size = frame_sizes[i];
aom_codec_err_t res;
if (data_start < data || if (data_start < data ||
frame_size > (uint32_t)(data_end - data_start)) { frame_size > (uint32_t)(data_end - data_start)) {
set_error_detail(ctx, "Invalid frame size in index"); set_error_detail(ctx, "Invalid frame size in index");
@@ -662,8 +661,7 @@ static aom_codec_err_t decoder_decode(aom_codec_alg_priv_t *ctx,
} else { } else {
while (data_start < data_end) { while (data_start < data_end) {
const uint32_t frame_size = (uint32_t)(data_end - data_start); const uint32_t frame_size = (uint32_t)(data_end - data_start);
const aom_codec_err_t res = res = decode_one(ctx, &data_start, frame_size, user_priv, deadline);
decode_one(ctx, &data_start, frame_size, user_priv, deadline);
if (res != AOM_CODEC_OK) return res; if (res != AOM_CODEC_OK) return res;
// Account for suboptimal termination by the encoder. // Account for suboptimal termination by the encoder.

View File

@@ -12,18 +12,11 @@
#include <arm_neon.h> #include <arm_neon.h>
#include <assert.h> #include <assert.h>
#include "./av1_rtcd.h"
#include "./aom_config.h" #include "./aom_config.h"
#include "./av1_rtcd.h"
#include "aom_dsp/txfm_common.h"
#include "av1/common/common.h" #include "av1/common/common.h"
static int16_t sinpi_1_9 = 0x14a3;
static int16_t sinpi_2_9 = 0x26c9;
static int16_t sinpi_3_9 = 0x3441;
static int16_t sinpi_4_9 = 0x3b6c;
static int16_t cospi_8_64 = 0x3b21;
static int16_t cospi_16_64 = 0x2d41;
static int16_t cospi_24_64 = 0x187e;
static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) { static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) {
int32x4_t q8s32, q9s32; int32x4_t q8s32, q9s32;
int16x4x2_t d0x2s16, d1x2s16; int16x4x2_t d0x2s16, d1x2s16;
@@ -43,18 +36,18 @@ static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) {
static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16, static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16,
int16x4_t *d2s16) { int16x4_t *d2s16) {
*d0s16 = vdup_n_s16(cospi_8_64); *d0s16 = vdup_n_s16((int16_t)cospi_8_64);
*d1s16 = vdup_n_s16(cospi_16_64); *d1s16 = vdup_n_s16((int16_t)cospi_16_64);
*d2s16 = vdup_n_s16(cospi_24_64); *d2s16 = vdup_n_s16((int16_t)cospi_24_64);
return; return;
} }
static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16, static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16,
int16x4_t *d5s16, int16x8_t *q3s16) { int16x4_t *d5s16, int16x8_t *q3s16) {
*d3s16 = vdup_n_s16(sinpi_1_9); *d3s16 = vdup_n_s16((int16_t)sinpi_1_9);
*d4s16 = vdup_n_s16(sinpi_2_9); *d4s16 = vdup_n_s16((int16_t)sinpi_2_9);
*q3s16 = vdupq_n_s16(sinpi_3_9); *q3s16 = vdupq_n_s16((int16_t)sinpi_3_9);
*d5s16 = vdup_n_s16(sinpi_4_9); *d5s16 = vdup_n_s16((int16_t)sinpi_4_9);
return; return;
} }
@@ -121,7 +114,7 @@ static INLINE void IADST4x4_1D(int16x4_t *d3s16, int16x4_t *d4s16,
q10s32 = vaddq_s32(q10s32, q13s32); q10s32 = vaddq_s32(q10s32, q13s32);
q10s32 = vaddq_s32(q10s32, q8s32); q10s32 = vaddq_s32(q10s32, q8s32);
q11s32 = vsubq_s32(q11s32, q14s32); q11s32 = vsubq_s32(q11s32, q14s32);
q8s32 = vdupq_n_s32(sinpi_3_9); q8s32 = vdupq_n_s32((int32_t)sinpi_3_9);
q11s32 = vsubq_s32(q11s32, q9s32); q11s32 = vsubq_s32(q11s32, q9s32);
q15s32 = vmulq_s32(q15s32, q8s32); q15s32 = vmulq_s32(q15s32, q8s32);

View File

@@ -12,26 +12,11 @@
#include <arm_neon.h> #include <arm_neon.h>
#include <assert.h> #include <assert.h>
#include "./av1_rtcd.h"
#include "./aom_config.h" #include "./aom_config.h"
#include "./av1_rtcd.h"
#include "aom_dsp/txfm_common.h"
#include "av1/common/common.h" #include "av1/common/common.h"
static int16_t cospi_2_64 = 16305;
static int16_t cospi_4_64 = 16069;
static int16_t cospi_6_64 = 15679;
static int16_t cospi_8_64 = 15137;
static int16_t cospi_10_64 = 14449;
static int16_t cospi_12_64 = 13623;
static int16_t cospi_14_64 = 12665;
static int16_t cospi_16_64 = 11585;
static int16_t cospi_18_64 = 10394;
static int16_t cospi_20_64 = 9102;
static int16_t cospi_22_64 = 7723;
static int16_t cospi_24_64 = 6270;
static int16_t cospi_26_64 = 4756;
static int16_t cospi_28_64 = 3196;
static int16_t cospi_30_64 = 1606;
static INLINE void TRANSPOSE8X8(int16x8_t *q8s16, int16x8_t *q9s16, static INLINE void TRANSPOSE8X8(int16x8_t *q8s16, int16x8_t *q9s16,
int16x8_t *q10s16, int16x8_t *q11s16, int16x8_t *q10s16, int16x8_t *q11s16,
int16x8_t *q12s16, int16x8_t *q13s16, int16x8_t *q12s16, int16x8_t *q13s16,
@@ -108,10 +93,10 @@ static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32; int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32; int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
d0s16 = vdup_n_s16(cospi_28_64); d0s16 = vdup_n_s16((int16_t)cospi_28_64);
d1s16 = vdup_n_s16(cospi_4_64); d1s16 = vdup_n_s16((int16_t)cospi_4_64);
d2s16 = vdup_n_s16(cospi_12_64); d2s16 = vdup_n_s16((int16_t)cospi_12_64);
d3s16 = vdup_n_s16(cospi_20_64); d3s16 = vdup_n_s16((int16_t)cospi_20_64);
d16s16 = vget_low_s16(*q8s16); d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16); d17s16 = vget_high_s16(*q8s16);
@@ -164,7 +149,7 @@ static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
q6s16 = vcombine_s16(d12s16, d13s16); q6s16 = vcombine_s16(d12s16, d13s16);
q7s16 = vcombine_s16(d14s16, d15s16); q7s16 = vcombine_s16(d14s16, d15s16);
d0s16 = vdup_n_s16(cospi_16_64); d0s16 = vdup_n_s16((int16_t)cospi_16_64);
q2s32 = vmull_s16(d16s16, d0s16); q2s32 = vmull_s16(d16s16, d0s16);
q3s32 = vmull_s16(d17s16, d0s16); q3s32 = vmull_s16(d17s16, d0s16);
@@ -176,8 +161,8 @@ static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
q13s32 = vmlsl_s16(q13s32, d24s16, d0s16); q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
q15s32 = vmlsl_s16(q15s32, d25s16, d0s16); q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
d0s16 = vdup_n_s16(cospi_24_64); d0s16 = vdup_n_s16((int16_t)cospi_24_64);
d1s16 = vdup_n_s16(cospi_8_64); d1s16 = vdup_n_s16((int16_t)cospi_8_64);
d18s16 = vqrshrn_n_s32(q2s32, 14); d18s16 = vqrshrn_n_s32(q2s32, 14);
d19s16 = vqrshrn_n_s32(q3s32, 14); d19s16 = vqrshrn_n_s32(q3s32, 14);
@@ -217,7 +202,7 @@ static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
d28s16 = vget_low_s16(*q14s16); d28s16 = vget_low_s16(*q14s16);
d29s16 = vget_high_s16(*q14s16); d29s16 = vget_high_s16(*q14s16);
d16s16 = vdup_n_s16(cospi_16_64); d16s16 = vdup_n_s16((int16_t)cospi_16_64);
q9s32 = vmull_s16(d28s16, d16s16); q9s32 = vmull_s16(d28s16, d16s16);
q10s32 = vmull_s16(d29s16, d16s16); q10s32 = vmull_s16(d29s16, d16s16);
@@ -276,16 +261,16 @@ static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
d30s16 = vget_low_s16(*q15s16); d30s16 = vget_low_s16(*q15s16);
d31s16 = vget_high_s16(*q15s16); d31s16 = vget_high_s16(*q15s16);
d14s16 = vdup_n_s16(cospi_2_64); d14s16 = vdup_n_s16((int16_t)cospi_2_64);
d15s16 = vdup_n_s16(cospi_30_64); d15s16 = vdup_n_s16((int16_t)cospi_30_64);
q1s32 = vmull_s16(d30s16, d14s16); q1s32 = vmull_s16(d30s16, d14s16);
q2s32 = vmull_s16(d31s16, d14s16); q2s32 = vmull_s16(d31s16, d14s16);
q3s32 = vmull_s16(d30s16, d15s16); q3s32 = vmull_s16(d30s16, d15s16);
q4s32 = vmull_s16(d31s16, d15s16); q4s32 = vmull_s16(d31s16, d15s16);
d30s16 = vdup_n_s16(cospi_18_64); d30s16 = vdup_n_s16((int16_t)cospi_18_64);
d31s16 = vdup_n_s16(cospi_14_64); d31s16 = vdup_n_s16((int16_t)cospi_14_64);
q1s32 = vmlal_s16(q1s32, d16s16, d15s16); q1s32 = vmlal_s16(q1s32, d16s16, d15s16);
q2s32 = vmlal_s16(q2s32, d17s16, d15s16); q2s32 = vmlal_s16(q2s32, d17s16, d15s16);
@@ -324,15 +309,15 @@ static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
d7s16 = vqrshrn_n_s32(q4s32, 14); d7s16 = vqrshrn_n_s32(q4s32, 14);
*q12s16 = vcombine_s16(d24s16, d25s16); *q12s16 = vcombine_s16(d24s16, d25s16);
d0s16 = vdup_n_s16(cospi_10_64); d0s16 = vdup_n_s16((int16_t)cospi_10_64);
d1s16 = vdup_n_s16(cospi_22_64); d1s16 = vdup_n_s16((int16_t)cospi_22_64);
q4s32 = vmull_s16(d26s16, d0s16); q4s32 = vmull_s16(d26s16, d0s16);
q5s32 = vmull_s16(d27s16, d0s16); q5s32 = vmull_s16(d27s16, d0s16);
q2s32 = vmull_s16(d26s16, d1s16); q2s32 = vmull_s16(d26s16, d1s16);
q6s32 = vmull_s16(d27s16, d1s16); q6s32 = vmull_s16(d27s16, d1s16);
d30s16 = vdup_n_s16(cospi_26_64); d30s16 = vdup_n_s16((int16_t)cospi_26_64);
d31s16 = vdup_n_s16(cospi_6_64); d31s16 = vdup_n_s16((int16_t)cospi_6_64);
q4s32 = vmlal_s16(q4s32, d20s16, d1s16); q4s32 = vmlal_s16(q4s32, d20s16, d1s16);
q5s32 = vmlal_s16(q5s32, d21s16, d1s16); q5s32 = vmlal_s16(q5s32, d21s16, d1s16);
@@ -367,8 +352,8 @@ static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
q4s32 = vsubq_s32(q4s32, q0s32); q4s32 = vsubq_s32(q4s32, q0s32);
q5s32 = vsubq_s32(q5s32, q13s32); q5s32 = vsubq_s32(q5s32, q13s32);
d30s16 = vdup_n_s16(cospi_8_64); d30s16 = vdup_n_s16((int16_t)cospi_8_64);
d31s16 = vdup_n_s16(cospi_24_64); d31s16 = vdup_n_s16((int16_t)cospi_24_64);
d18s16 = vqrshrn_n_s32(q9s32, 14); d18s16 = vqrshrn_n_s32(q9s32, 14);
d19s16 = vqrshrn_n_s32(q10s32, 14); d19s16 = vqrshrn_n_s32(q10s32, 14);
@@ -423,7 +408,7 @@ static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
d15s16 = vqrshrn_n_s32(q0s32, 14); d15s16 = vqrshrn_n_s32(q0s32, 14);
*q14s16 = vcombine_s16(d28s16, d29s16); *q14s16 = vcombine_s16(d28s16, d29s16);
d30s16 = vdup_n_s16(cospi_16_64); d30s16 = vdup_n_s16((int16_t)cospi_16_64);
d22s16 = vget_low_s16(*q11s16); d22s16 = vget_low_s16(*q11s16);
d23s16 = vget_high_s16(*q11s16); d23s16 = vget_high_s16(*q11s16);

View File

@@ -9,8 +9,9 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent. * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/ */
#include "./av1_rtcd.h"
#include "av1/common/av1_fwd_txfm.h" #include "av1/common/av1_fwd_txfm.h"
#include <assert.h>
#include "./av1_rtcd.h"
void av1_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) { void av1_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty // The 2D transform is done with two passes which are actually pretty
@@ -22,36 +23,37 @@ void av1_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
int pass; int pass;
// We need an intermediate buffer between passes. // We need an intermediate buffer between passes.
tran_low_t intermediate[4 * 4]; tran_low_t intermediate[4 * 4];
const int16_t *in_pass0 = input; const tran_low_t *in_low = NULL;
const tran_low_t *in = NULL;
tran_low_t *out = intermediate; tran_low_t *out = intermediate;
// Do the two transform/transpose passes // Do the two transform/transpose passes
for (pass = 0; pass < 2; ++pass) { for (pass = 0; pass < 2; ++pass) {
tran_high_t input[4]; // canbe16 tran_high_t in_high[4]; // canbe16
tran_high_t step[4]; // canbe16 tran_high_t step[4]; // canbe16
tran_high_t temp1, temp2; // needs32 tran_high_t temp1, temp2; // needs32
int i; int i;
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
// Load inputs. // Load inputs.
if (0 == pass) { if (0 == pass) {
input[0] = in_pass0[0 * stride] * 16; in_high[0] = input[0 * stride] * 16;
input[1] = in_pass0[1 * stride] * 16; in_high[1] = input[1 * stride] * 16;
input[2] = in_pass0[2 * stride] * 16; in_high[2] = input[2 * stride] * 16;
input[3] = in_pass0[3 * stride] * 16; in_high[3] = input[3 * stride] * 16;
if (i == 0 && input[0]) { if (i == 0 && in_high[0]) {
input[0] += 1; in_high[0] += 1;
} }
} else { } else {
input[0] = in[0 * 4]; assert(in_low != NULL);
input[1] = in[1 * 4]; in_high[0] = in_low[0 * 4];
input[2] = in[2 * 4]; in_high[1] = in_low[1 * 4];
input[3] = in[3 * 4]; in_high[2] = in_low[2 * 4];
in_high[3] = in_low[3 * 4];
in_low++;
} }
// Transform. // Transform.
step[0] = input[0] + input[3]; step[0] = in_high[0] + in_high[3];
step[1] = input[1] + input[2]; step[1] = in_high[1] + in_high[2];
step[2] = input[1] - input[2]; step[2] = in_high[1] - in_high[2];
step[3] = input[0] - input[3]; step[3] = in_high[0] - in_high[3];
temp1 = (step[0] + step[1]) * cospi_16_64; temp1 = (step[0] + step[1]) * cospi_16_64;
temp2 = (step[0] - step[1]) * cospi_16_64; temp2 = (step[0] - step[1]) * cospi_16_64;
out[0] = (tran_low_t)fdct_round_shift(temp1); out[0] = (tran_low_t)fdct_round_shift(temp1);
@@ -61,12 +63,11 @@ void av1_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
out[1] = (tran_low_t)fdct_round_shift(temp1); out[1] = (tran_low_t)fdct_round_shift(temp1);
out[3] = (tran_low_t)fdct_round_shift(temp2); out[3] = (tran_low_t)fdct_round_shift(temp2);
// Do next column (which is a transposed row in second/horizontal pass) // Do next column (which is a transposed row in second/horizontal pass)
in_pass0++; input++;
in++;
out += 4; out += 4;
} }
// Setup in/out for next pass. // Setup in_low/out for next pass.
in = intermediate; in_low = intermediate;
out = output; out = output;
} }
@@ -101,7 +102,6 @@ void av1_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
tran_high_t t0, t1, t2, t3; // needs32 tran_high_t t0, t1, t2, t3; // needs32
tran_high_t x0, x1, x2, x3; // canbe16 tran_high_t x0, x1, x2, x3; // canbe16
int i;
for (i = 0; i < 8; i++) { for (i = 0; i < 8; i++) {
// stage 1 // stage 1
if (pass == 0) { if (pass == 0) {
@@ -193,56 +193,57 @@ void av1_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
int pass; int pass;
// We need an intermediate buffer between passes. // We need an intermediate buffer between passes.
tran_low_t intermediate[256]; tran_low_t intermediate[256];
const int16_t *in_pass0 = input; const tran_low_t *in_low = NULL;
const tran_low_t *in = NULL;
tran_low_t *out = intermediate; tran_low_t *out = intermediate;
// Do the two transform/transpose passes // Do the two transform/transpose passes
for (pass = 0; pass < 2; ++pass) { for (pass = 0; pass < 2; ++pass) {
tran_high_t step1[8]; // canbe16 tran_high_t step1[8]; // canbe16
tran_high_t step2[8]; // canbe16 tran_high_t step2[8]; // canbe16
tran_high_t step3[8]; // canbe16 tran_high_t step3[8]; // canbe16
tran_high_t input[8]; // canbe16 tran_high_t in_high[8]; // canbe16
tran_high_t temp1, temp2; // needs32 tran_high_t temp1, temp2; // needs32
int i; int i;
for (i = 0; i < 16; i++) { for (i = 0; i < 16; i++) {
if (0 == pass) { if (0 == pass) {
// Calculate input for the first 8 results. // Calculate input for the first 8 results.
input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) * 4; in_high[0] = (input[0 * stride] + input[15 * stride]) * 4;
input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) * 4; in_high[1] = (input[1 * stride] + input[14 * stride]) * 4;
input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) * 4; in_high[2] = (input[2 * stride] + input[13 * stride]) * 4;
input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) * 4; in_high[3] = (input[3 * stride] + input[12 * stride]) * 4;
input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) * 4; in_high[4] = (input[4 * stride] + input[11 * stride]) * 4;
input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) * 4; in_high[5] = (input[5 * stride] + input[10 * stride]) * 4;
input[6] = (in_pass0[6 * stride] + in_pass0[9 * stride]) * 4; in_high[6] = (input[6 * stride] + input[9 * stride]) * 4;
input[7] = (in_pass0[7 * stride] + in_pass0[8 * stride]) * 4; in_high[7] = (input[7 * stride] + input[8 * stride]) * 4;
// Calculate input for the next 8 results. // Calculate input for the next 8 results.
step1[0] = (in_pass0[7 * stride] - in_pass0[8 * stride]) * 4; step1[0] = (input[7 * stride] - input[8 * stride]) * 4;
step1[1] = (in_pass0[6 * stride] - in_pass0[9 * stride]) * 4; step1[1] = (input[6 * stride] - input[9 * stride]) * 4;
step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) * 4; step1[2] = (input[5 * stride] - input[10 * stride]) * 4;
step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) * 4; step1[3] = (input[4 * stride] - input[11 * stride]) * 4;
step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) * 4; step1[4] = (input[3 * stride] - input[12 * stride]) * 4;
step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) * 4; step1[5] = (input[2 * stride] - input[13 * stride]) * 4;
step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) * 4; step1[6] = (input[1 * stride] - input[14 * stride]) * 4;
step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) * 4; step1[7] = (input[0 * stride] - input[15 * stride]) * 4;
} else { } else {
// Calculate input for the first 8 results. // Calculate input for the first 8 results.
input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2); assert(in_low != NULL);
input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2); in_high[0] = ((in_low[0 * 16] + 1) >> 2) + ((in_low[15 * 16] + 1) >> 2);
input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2); in_high[1] = ((in_low[1 * 16] + 1) >> 2) + ((in_low[14 * 16] + 1) >> 2);
input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2); in_high[2] = ((in_low[2 * 16] + 1) >> 2) + ((in_low[13 * 16] + 1) >> 2);
input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2); in_high[3] = ((in_low[3 * 16] + 1) >> 2) + ((in_low[12 * 16] + 1) >> 2);
input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2); in_high[4] = ((in_low[4 * 16] + 1) >> 2) + ((in_low[11 * 16] + 1) >> 2);
input[6] = ((in[6 * 16] + 1) >> 2) + ((in[9 * 16] + 1) >> 2); in_high[5] = ((in_low[5 * 16] + 1) >> 2) + ((in_low[10 * 16] + 1) >> 2);
input[7] = ((in[7 * 16] + 1) >> 2) + ((in[8 * 16] + 1) >> 2); in_high[6] = ((in_low[6 * 16] + 1) >> 2) + ((in_low[9 * 16] + 1) >> 2);
in_high[7] = ((in_low[7 * 16] + 1) >> 2) + ((in_low[8 * 16] + 1) >> 2);
// Calculate input for the next 8 results. // Calculate input for the next 8 results.
step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[8 * 16] + 1) >> 2); step1[0] = ((in_low[7 * 16] + 1) >> 2) - ((in_low[8 * 16] + 1) >> 2);
step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[9 * 16] + 1) >> 2); step1[1] = ((in_low[6 * 16] + 1) >> 2) - ((in_low[9 * 16] + 1) >> 2);
step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2); step1[2] = ((in_low[5 * 16] + 1) >> 2) - ((in_low[10 * 16] + 1) >> 2);
step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2); step1[3] = ((in_low[4 * 16] + 1) >> 2) - ((in_low[11 * 16] + 1) >> 2);
step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2); step1[4] = ((in_low[3 * 16] + 1) >> 2) - ((in_low[12 * 16] + 1) >> 2);
step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2); step1[5] = ((in_low[2 * 16] + 1) >> 2) - ((in_low[13 * 16] + 1) >> 2);
step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2); step1[6] = ((in_low[1 * 16] + 1) >> 2) - ((in_low[14 * 16] + 1) >> 2);
step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2); step1[7] = ((in_low[0 * 16] + 1) >> 2) - ((in_low[15 * 16] + 1) >> 2);
in_low++;
} }
// Work on the first eight values; fdct8(input, even_results); // Work on the first eight values; fdct8(input, even_results);
{ {
@@ -251,14 +252,14 @@ void av1_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
tran_high_t x0, x1, x2, x3; // canbe16 tran_high_t x0, x1, x2, x3; // canbe16
// stage 1 // stage 1
s0 = input[0] + input[7]; s0 = in_high[0] + in_high[7];
s1 = input[1] + input[6]; s1 = in_high[1] + in_high[6];
s2 = input[2] + input[5]; s2 = in_high[2] + in_high[5];
s3 = input[3] + input[4]; s3 = in_high[3] + in_high[4];
s4 = input[3] - input[4]; s4 = in_high[3] - in_high[4];
s5 = input[2] - input[5]; s5 = in_high[2] - in_high[5];
s6 = input[1] - input[6]; s6 = in_high[1] - in_high[6];
s7 = input[0] - input[7]; s7 = in_high[0] - in_high[7];
// fdct4(step, step); // fdct4(step, step);
x0 = s0 + s3; x0 = s0 + s3;
@@ -353,12 +354,11 @@ void av1_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
out[15] = (tran_low_t)fdct_round_shift(temp2); out[15] = (tran_low_t)fdct_round_shift(temp2);
} }
// Do next column (which is a transposed row in second/horizontal pass) // Do next column (which is a transposed row in second/horizontal pass)
in++; input++;
in_pass0++;
out += 16; out += 16;
} }
// Setup in/out for next pass. // Setup in/out for next pass.
in = intermediate; in_low = intermediate;
out = output; out = output;
} }
} }

View File

@@ -889,42 +889,44 @@ void av1_setup_mask(AV1_COMMON *const cm, const int mi_row, const int mi_col,
break; break;
default: default:
for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) { for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
const int shift_y = shift_32_y[idx_32]; const int shift_y_32 = shift_32_y[idx_32];
const int shift_uv = shift_32_uv[idx_32]; const int shift_uv_32 = shift_32_uv[idx_32];
const int mi_32_col_offset = ((idx_32 & 1) << 2); const int mi_32_col_offset = ((idx_32 & 1) << 2);
const int mi_32_row_offset = ((idx_32 >> 1) << 2); const int mi_32_row_offset = ((idx_32 >> 1) << 2);
if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows) if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
continue; continue;
switch (mip[0]->mbmi.sb_type) { switch (mip[0]->mbmi.sb_type) {
case BLOCK_32X32: case BLOCK_32X32:
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
break; break;
case BLOCK_32X16: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); case BLOCK_32X16:
build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
if (supertx_enabled(&mip[0]->mbmi)) break; if (supertx_enabled(&mip[0]->mbmi)) break;
#endif #endif
if (mi_32_row_offset + 2 >= max_rows) continue; if (mi_32_row_offset + 2 >= max_rows) continue;
mip2 = mip + mode_info_stride * 2; mip2 = mip + mode_info_stride * 2;
build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm); build_masks(lfi_n, mip2[0], shift_y_32 + 16, shift_uv_32 + 4, lfm);
break; break;
case BLOCK_16X32: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); case BLOCK_16X32:
build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
if (supertx_enabled(&mip[0]->mbmi)) break; if (supertx_enabled(&mip[0]->mbmi)) break;
#endif #endif
if (mi_32_col_offset + 2 >= max_cols) continue; if (mi_32_col_offset + 2 >= max_cols) continue;
mip2 = mip + 2; mip2 = mip + 2;
build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); build_masks(lfi_n, mip2[0], shift_y_32 + 2, shift_uv_32 + 1, lfm);
break; break;
default: default:
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
if (mip[0]->mbmi.tx_size == TX_32X32) { if (mip[0]->mbmi.tx_size == TX_32X32) {
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
break; break;
} }
#endif #endif
for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; const int shift_y_32_16 = shift_y_32 + shift_16_y[idx_16];
const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; const int shift_uv_32_16 = shift_uv_32 + shift_16_uv[idx_16];
const int mi_16_col_offset = const int mi_16_col_offset =
mi_32_col_offset + ((idx_16 & 1) << 1); mi_32_col_offset + ((idx_16 & 1) << 1);
const int mi_16_row_offset = const int mi_16_row_offset =
@@ -935,16 +937,18 @@ void av1_setup_mask(AV1_COMMON *const cm, const int mi_row, const int mi_col,
switch (mip[0]->mbmi.sb_type) { switch (mip[0]->mbmi.sb_type) {
case BLOCK_16X16: case BLOCK_16X16:
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); build_masks(lfi_n, mip[0], shift_y_32_16, shift_uv_32_16,
lfm);
break; break;
case BLOCK_16X8: case BLOCK_16X8:
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
if (supertx_enabled(&mip[0]->mbmi)) break; if (supertx_enabled(&mip[0]->mbmi)) break;
#endif #endif
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); build_masks(lfi_n, mip[0], shift_y_32_16, shift_uv_32_16,
lfm);
if (mi_16_row_offset + 1 >= max_rows) continue; if (mi_16_row_offset + 1 >= max_rows) continue;
mip2 = mip + mode_info_stride; mip2 = mip + mode_info_stride;
build_y_mask(lfi_n, mip2[0], shift_y + 8, build_y_mask(lfi_n, mip2[0], shift_y_32_16 + 8,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
0, 0,
#endif #endif
@@ -954,29 +958,31 @@ void av1_setup_mask(AV1_COMMON *const cm, const int mi_row, const int mi_col,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
if (supertx_enabled(&mip[0]->mbmi)) break; if (supertx_enabled(&mip[0]->mbmi)) break;
#endif #endif
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); build_masks(lfi_n, mip[0], shift_y_32_16, shift_uv_32_16,
lfm);
if (mi_16_col_offset + 1 >= max_cols) continue; if (mi_16_col_offset + 1 >= max_cols) continue;
mip2 = mip + 1; mip2 = mip + 1;
build_y_mask(lfi_n, mip2[0], shift_y + 1, build_y_mask(lfi_n, mip2[0], shift_y_32_16 + 1,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
0, 0,
#endif #endif
lfm); lfm);
break; break;
default: { default: {
const int shift_y = const int shift_y_32_16_8_zero = shift_y_32_16 + shift_8_y[0];
shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0];
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
if (mip[0]->mbmi.tx_size == TX_16X16) { if (mip[0]->mbmi.tx_size == TX_16X16) {
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); build_masks(lfi_n, mip[0], shift_y_32_16_8_zero,
shift_uv_32_16, lfm);
break; break;
} }
#endif #endif
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); build_masks(lfi_n, mip[0], shift_y_32_16_8_zero,
shift_uv_32_16, lfm);
mip += offset[0]; mip += offset[0];
for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
const int shift_y = shift_32_y[idx_32] + const int shift_y_32_16_8 =
shift_16_y[idx_16] + shift_8_y[idx_8]; shift_y_32_16 + shift_8_y[idx_8];
const int mi_8_col_offset = const int mi_8_col_offset =
mi_16_col_offset + ((idx_8 & 1)); mi_16_col_offset + ((idx_8 & 1));
const int mi_8_row_offset = const int mi_8_row_offset =
@@ -985,7 +991,7 @@ void av1_setup_mask(AV1_COMMON *const cm, const int mi_row, const int mi_col,
if (mi_8_col_offset >= max_cols || if (mi_8_col_offset >= max_cols ||
mi_8_row_offset >= max_rows) mi_8_row_offset >= max_rows)
continue; continue;
build_y_mask(lfi_n, mip[0], shift_y, build_y_mask(lfi_n, mip[0], shift_y_32_16_8,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
supertx_enabled(&mip[0]->mbmi), supertx_enabled(&mip[0]->mbmi),
#endif #endif

View File

@@ -2388,7 +2388,6 @@ void av1_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
#define IDCT32_34 \ #define IDCT32_34 \
/* Stage1 */ \ /* Stage1 */ \
{ \ { \
const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], zero); \ const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], zero); \
const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], zero); \ const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], zero); \
\ \
@@ -2413,7 +2412,6 @@ void av1_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
\ \
/* Stage2 */ \ /* Stage2 */ \
{ \ { \
const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], zero); \ const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], zero); \
const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], zero); \ const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], zero); \
\ \
@@ -2440,7 +2438,6 @@ void av1_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
\ \
/* Stage3 */ \ /* Stage3 */ \
{ \ { \
const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], zero); \ const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], zero); \
const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], zero); \ const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], zero); \
\ \
@@ -2481,7 +2478,6 @@ void av1_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
\ \
/* Stage4 */ \ /* Stage4 */ \
{ \ { \
const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], zero); \ const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], zero); \
const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], zero); \ const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], zero); \
\ \
@@ -3018,6 +3014,7 @@ void av1_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
// Only upper-left 8x8 has non-zero coeff // Only upper-left 8x8 has non-zero coeff
void av1_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, void av1_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
int stride) { int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1 << 5); const __m128i final_rounding = _mm_set1_epi16(1 << 5);
@@ -3123,7 +3120,6 @@ void av1_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
col[31] = _mm_sub_epi16(stp1_0, stp1_31); col[31] = _mm_sub_epi16(stp1_0, stp1_31);
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
int j; int j;
const __m128i zero = _mm_setzero_si128();
// Transpose 32x8 block to 8x32 block // Transpose 32x8 block to 8x32 block
array_transpose_8x8(col + i * 8, in); array_transpose_8x8(col + i * 8, in);
IDCT32_34 IDCT32_34

View File

@@ -3465,8 +3465,6 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
setup_segmentation(cm, rb); setup_segmentation(cm, rb);
{
int i;
for (i = 0; i < MAX_SEGMENTS; ++i) { for (i = 0; i < MAX_SEGMENTS; ++i) {
const int qindex = cm->seg.enabled const int qindex = cm->seg.enabled
? av1_get_qindex(&cm->seg, i, cm->base_qindex) ? av1_get_qindex(&cm->seg, i, cm->base_qindex)
@@ -3475,7 +3473,6 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
xd->qindex[i] = qindex; xd->qindex[i] = qindex;
} }
}
setup_segmentation_dequant(cm); setup_segmentation_dequant(cm);
cm->tx_mode = cm->tx_mode =

View File

@@ -1492,7 +1492,6 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
} else { } else {
int ref;
int_mv ref_mv[2]; int_mv ref_mv[2];
ref_mv[0] = nearestmv[0]; ref_mv[0] = nearestmv[0];
ref_mv[1] = nearestmv[1]; ref_mv[1] = nearestmv[1];

View File

@@ -664,91 +664,92 @@ static void pack_mb_tokens(aom_writer *w, const TOKENEXTRA **tp,
#endif #endif
while (p < stop && p->token != EOSB_TOKEN) { while (p < stop && p->token != EOSB_TOKEN) {
const int t = p->token; const int token = p->token;
aom_tree_index index = 0;
#if !CONFIG_ANS #if !CONFIG_ANS
const struct av1_token *const a = &av1_coef_encodings[t]; const struct av1_token *const coef_encoding = &av1_coef_encodings[token];
int v = a->value; int coef_value = coef_encoding->value;
int n = a->len; int coef_length = coef_encoding->len;
#endif // !CONFIG_ANS #endif // !CONFIG_ANS
#if CONFIG_AOM_HIGHBITDEPTH #if CONFIG_AOM_HIGHBITDEPTH
const av1_extra_bit *b; const av1_extra_bit *const extra_bits_av1 =
if (bit_depth == AOM_BITS_12) (bit_depth == AOM_BITS_12)
b = &av1_extra_bits_high12[t]; ? &av1_extra_bits_high12[token]
else if (bit_depth == AOM_BITS_10) : (bit_depth == AOM_BITS_10) ? &av1_extra_bits_high10[token]
b = &av1_extra_bits_high10[t]; : &av1_extra_bits[token];
else
b = &av1_extra_bits[t];
#else #else
const av1_extra_bit *const b = &av1_extra_bits[t]; const av1_extra_bit *const extra_bits_av1 = &av1_extra_bits[token];
(void)bit_depth; (void)bit_depth;
#endif // CONFIG_AOM_HIGHBITDEPTH #endif // CONFIG_AOM_HIGHBITDEPTH
#if CONFIG_ANS #if CONFIG_ANS
/* skip one or two nodes */ /* skip one or two nodes */
if (!p->skip_eob_node) aom_write(w, t != EOB_TOKEN, p->context_tree[0]); if (!p->skip_eob_node) aom_write(w, token != EOB_TOKEN, p->context_tree[0]);
if (t != EOB_TOKEN) { if (token != EOB_TOKEN) {
aom_write(w, t != ZERO_TOKEN, p->context_tree[1]); aom_write(w, token != ZERO_TOKEN, p->context_tree[1]);
if (t != ZERO_TOKEN) { if (token != ZERO_TOKEN) {
aom_write_symbol(w, t - ONE_TOKEN, *p->token_cdf, aom_write_symbol(w, token - ONE_TOKEN, *p->token_cdf,
CATEGORY6_TOKEN - ONE_TOKEN + 1); CATEGORY6_TOKEN - ONE_TOKEN + 1);
} }
} }
#else #else
/* skip one or two nodes */ /* skip one or two nodes */
if (p->skip_eob_node) if (p->skip_eob_node)
n -= p->skip_eob_node; coef_length -= p->skip_eob_node;
else else
aom_write(w, t != EOB_TOKEN, p->context_tree[0]); aom_write(w, token != EOB_TOKEN, p->context_tree[0]);
if (t != EOB_TOKEN) { if (token != EOB_TOKEN) {
aom_write(w, t != ZERO_TOKEN, p->context_tree[1]); aom_write(w, token != ZERO_TOKEN, p->context_tree[1]);
if (t != ZERO_TOKEN) { if (token != ZERO_TOKEN) {
aom_write(w, t != ONE_TOKEN, p->context_tree[2]); aom_write(w, token != ONE_TOKEN, p->context_tree[2]);
if (t != ONE_TOKEN) { if (token != ONE_TOKEN) {
int len = UNCONSTRAINED_NODES - p->skip_eob_node; const int unconstrained_len = UNCONSTRAINED_NODES - p->skip_eob_node;
aom_write_tree(w, av1_coef_con_tree, aom_write_tree(w, av1_coef_con_tree,
av1_pareto8_full[p->context_tree[PIVOT_NODE] - 1], v, av1_pareto8_full[p->context_tree[PIVOT_NODE] - 1],
n - len, 0); coef_value, coef_length - unconstrained_len, 0);
} }
} }
} }
#endif // CONFIG_ANS #endif // CONFIG_ANS
if (b->base_val) { if (extra_bits_av1->base_val) {
const int e = p->extra, l = b->len; const int extra_bits = p->extra;
int skip_bits = (b->base_val == CAT6_MIN_VAL) const int extra_bits_av1_length = extra_bits_av1->len;
int skip_bits = (extra_bits_av1->base_val == CAT6_MIN_VAL)
? TX_SIZES - 1 - txsize_sqr_up_map[tx] ? TX_SIZES - 1 - txsize_sqr_up_map[tx]
: 0; : 0;
if (l) { if (extra_bits_av1_length) {
const unsigned char *pb = b->prob; const unsigned char *pb = extra_bits_av1->prob;
int v = e >> 1; const int value = extra_bits >> 1;
int n = l; /* number of bits in v, assumed nonzero */ int num_bits = extra_bits_av1_length; // number of bits in value
int i = 0; assert(num_bits > 0);
index = 0;
do { do {
const int bb = (v >> --n) & 1; const int bb = (value >> --num_bits) & 1;
if (skip_bits) { if (skip_bits) {
skip_bits--; --skip_bits;
assert(!bb); assert(!bb);
} else { } else {
aom_write(w, bb, pb[i >> 1]); aom_write(w, bb, pb[index >> 1]);
} }
i = b->tree[i + bb]; index = extra_bits_av1->tree[index + bb];
} while (n); } while (num_bits);
} }
aom_write_bit(w, e & 1); aom_write_bit(w, extra_bits & 1);
} }
++p; ++p;
#if CONFIG_VAR_TX #if CONFIG_VAR_TX
++count; ++count;
if (t == EOB_TOKEN || count == seg_eob) break; if (token == EOB_TOKEN || count == seg_eob) break;
#endif #endif
} }
@@ -2091,7 +2092,6 @@ static void update_coef_probs_common(aom_writer *const bc, AV1_COMP *cpi,
for (t = 0; t < entropy_nodes_update; ++t) { for (t = 0; t < entropy_nodes_update; ++t) {
aom_prob newp = new_coef_probs[i][j][k][l][t]; aom_prob newp = new_coef_probs[i][j][k][l][t];
aom_prob *oldp = old_coef_probs[i][j][k][l] + t; aom_prob *oldp = old_coef_probs[i][j][k][l] + t;
const aom_prob upd = DIFF_UPDATE_PROB;
int s; int s;
int u = 0; int u = 0;
if (t == PIVOT_NODE) if (t == PIVOT_NODE)

View File

@@ -26,7 +26,7 @@ typedef struct {
unsigned int sse; unsigned int sse;
int sum; int sum;
unsigned int var; unsigned int var;
} diff; } DIFF;
typedef struct macroblock_plane { typedef struct macroblock_plane {
DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]); DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]);

View File

@@ -1498,7 +1498,6 @@ void av1_fdct8x8_quant_c(const int16_t *input, int stride,
tran_high_t t0, t1, t2, t3; // needs32 tran_high_t t0, t1, t2, t3; // needs32
tran_high_t x0, x1, x2, x3; // canbe16 tran_high_t x0, x1, x2, x3; // canbe16
int i;
for (i = 0; i < 8; i++) { for (i = 0; i < 8; i++) {
// stage 1 // stage 1
s0 = (input[0 * stride] + input[7 * stride]) * 4; s0 = (input[0 * stride] + input[7 * stride]) * 4;

View File

@@ -1177,8 +1177,8 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_DUAL_FILTER #if CONFIG_DUAL_FILTER
update_filter_type_count(td->counts, xd, mbmi); update_filter_type_count(td->counts, xd, mbmi);
#else #else
const int ctx = av1_get_pred_context_switchable_interp(xd); const int switchable_ctx = av1_get_pred_context_switchable_interp(xd);
++td->counts->switchable_interp[ctx][mbmi->interp_filter]; ++td->counts->switchable_interp[switchable_ctx][mbmi->interp_filter];
#endif #endif
} }
} }
@@ -2436,7 +2436,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
int splits_below = 0; int splits_below = 0;
BLOCK_SIZE bs_type = mib[0]->mbmi.sb_type; BLOCK_SIZE bs_type = mib[0]->mbmi.sb_type;
int do_partition_search = 1; int do_partition_search = 1;
PICK_MODE_CONTEXT *ctx = &pc_tree->none; PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
int last_part_rate_nocoef = INT_MAX; int last_part_rate_nocoef = INT_MAX;
int none_rate_nocoef = INT_MAX; int none_rate_nocoef = INT_MAX;
@@ -2495,7 +2495,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
#if CONFIG_EXT_PARTITION_TYPES #if CONFIG_EXT_PARTITION_TYPES
PARTITION_NONE, PARTITION_NONE,
#endif #endif
bsize, ctx, INT64_MAX); bsize, ctx_none, INT64_MAX);
if (none_rdc.rate < INT_MAX) { if (none_rdc.rate < INT_MAX) {
none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
@@ -2522,7 +2522,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
#if CONFIG_EXT_PARTITION_TYPES #if CONFIG_EXT_PARTITION_TYPES
PARTITION_NONE, PARTITION_NONE,
#endif #endif
bsize, ctx, INT64_MAX); bsize, ctx_none, INT64_MAX);
break; break;
case PARTITION_HORZ: case PARTITION_HORZ:
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
@@ -2539,11 +2539,11 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
int rt_nocoef = 0; int rt_nocoef = 0;
#endif #endif
PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; PICK_MODE_CONTEXT *ctx_h = &pc_tree->horizontal[0];
av1_rd_cost_init(&tmp_rdc); av1_rd_cost_init(&tmp_rdc);
update_state(cpi, td, ctx, mi_row, mi_col, subsize, 1); update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize, encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
ctx, NULL); ctx_h, NULL);
rd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc, rd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
&rt_nocoef, &rt_nocoef,
@@ -2582,11 +2582,11 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
int rt_nocoef = 0; int rt_nocoef = 0;
#endif #endif
PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0]; PICK_MODE_CONTEXT *ctx_v = &pc_tree->vertical[0];
av1_rd_cost_init(&tmp_rdc); av1_rd_cost_init(&tmp_rdc);
update_state(cpi, td, ctx, mi_row, mi_col, subsize, 1); update_state(cpi, td, ctx_v, mi_row, mi_col, subsize, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize, encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
ctx, NULL); ctx_v, NULL);
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc, rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
&rt_nocoef, &rt_nocoef,
@@ -3295,7 +3295,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
const TOKENEXTRA *const tp_orig = *tp; const TOKENEXTRA *const tp_orig = *tp;
PICK_MODE_CONTEXT *ctx = &pc_tree->none; PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
const int *partition_cost = cpi->partition_cost[pl]; const int *partition_cost = cpi->partition_cost[pl];
int tmp_partition_cost[PARTITION_TYPES]; int tmp_partition_cost[PARTITION_TYPES];
@@ -3484,7 +3484,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_EXT_PARTITION_TYPES #if CONFIG_EXT_PARTITION_TYPES
PARTITION_NONE, PARTITION_NONE,
#endif #endif
bsize, ctx, best_rdc.rdcost); bsize, ctx_none, best_rdc.rdcost);
if (this_rdc.rate != INT_MAX) { if (this_rdc.rate != INT_MAX) {
if (bsize_at_least_8x8) { if (bsize_at_least_8x8) {
this_rdc.rate += partition_cost[PARTITION_NONE]; this_rdc.rate += partition_cost[PARTITION_NONE];
@@ -3518,7 +3518,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
// The dist & rate thresholds are set to 0 at speed 0 to disable the // The dist & rate thresholds are set to 0 at speed 0 to disable the
// early termination at that speed. // early termination at that speed.
if (!x->e_mbd.lossless[xd->mi[0]->mbmi.segment_id] && if (!x->e_mbd.lossless[xd->mi[0]->mbmi.segment_id] &&
(ctx->skippable && best_rdc.dist < dist_breakout_thr && (ctx_none->skippable && best_rdc.dist < dist_breakout_thr &&
best_rdc.rate < rate_breakout_thr)) { best_rdc.rate < rate_breakout_thr)) {
do_square_split = 0; do_square_split = 0;
do_rectangular_split = 0; do_rectangular_split = 0;
@@ -3576,7 +3576,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
} }
// store estimated motion vector // store estimated motion vector
if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx); if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
// PARTITION_SPLIT // PARTITION_SPLIT
// TODO(jingning): use the motion vectors given by the above search as // TODO(jingning): use the motion vectors given by the above search as
@@ -3588,11 +3588,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_DUAL_FILTER #if CONFIG_DUAL_FILTER
if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed) if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
pc_tree->leaf_split[0]->pred_interp_filter = pc_tree->leaf_split[0]->pred_interp_filter =
ctx->mic.mbmi.interp_filter[0]; ctx_none->mic.mbmi.interp_filter[0];
#else #else
if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed) if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
pc_tree->leaf_split[0]->pred_interp_filter = pc_tree->leaf_split[0]->pred_interp_filter =
ctx->mic.mbmi.interp_filter; ctx_none->mic.mbmi.interp_filter;
#endif #endif
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
@@ -3669,7 +3669,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
continue; continue;
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
pc_tree->split[idx]->index = idx; pc_tree->split[idx]->index = idx;
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
@@ -3769,16 +3769,17 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
if (partition_horz_allowed && if (partition_horz_allowed &&
(do_rectangular_split || av1_active_h_edge(cpi, mi_row, mi_step))) { (do_rectangular_split || av1_active_h_edge(cpi, mi_row, mi_step))) {
subsize = get_subsize(bsize, PARTITION_HORZ); subsize = get_subsize(bsize, PARTITION_HORZ);
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
#if CONFIG_DUAL_FILTER #if CONFIG_DUAL_FILTER
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed) partition_none_allowed)
pc_tree->horizontal[0].pred_interp_filter = pc_tree->horizontal[0].pred_interp_filter =
ctx->mic.mbmi.interp_filter[0]; ctx_none->mic.mbmi.interp_filter[0];
#else #else
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed) partition_none_allowed)
pc_tree->horizontal[0].pred_interp_filter = ctx->mic.mbmi.interp_filter; pc_tree->horizontal[0].pred_interp_filter =
ctx_none->mic.mbmi.interp_filter;
#endif #endif
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
@@ -3797,22 +3798,23 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
if (sum_rdc.rdcost < best_rdc.rdcost && if (sum_rdc.rdcost < best_rdc.rdcost &&
#endif // CONFIG_SUPERTX #endif // CONFIG_SUPERTX
!force_horz_split && bsize > BLOCK_8X8) { !force_horz_split && bsize > BLOCK_8X8) {
PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; PICK_MODE_CONTEXT *ctx_h = &pc_tree->horizontal[0];
update_state(cpi, td, ctx, mi_row, mi_col, subsize, 1); update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize, encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
ctx, NULL); ctx_h, NULL);
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_h);
#if CONFIG_DUAL_FILTER #if CONFIG_DUAL_FILTER
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed) partition_none_allowed)
pc_tree->horizontal[1].pred_interp_filter = pc_tree->horizontal[1].pred_interp_filter =
ctx->mic.mbmi.interp_filter[0]; ctx_h->mic.mbmi.interp_filter[0];
#else #else
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed) partition_none_allowed)
pc_tree->horizontal[1].pred_interp_filter = ctx->mic.mbmi.interp_filter; pc_tree->horizontal[1].pred_interp_filter =
ctx_none->mic.mbmi.interp_filter;
#endif #endif
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
@@ -3908,16 +3910,18 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
(do_rectangular_split || av1_active_v_edge(cpi, mi_col, mi_step))) { (do_rectangular_split || av1_active_v_edge(cpi, mi_col, mi_step))) {
subsize = get_subsize(bsize, PARTITION_VERT); subsize = get_subsize(bsize, PARTITION_VERT);
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
#if CONFIG_DUAL_FILTER #if CONFIG_DUAL_FILTER
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed) partition_none_allowed)
pc_tree->vertical[0].pred_interp_filter = ctx->mic.mbmi.interp_filter[0]; pc_tree->vertical[0].pred_interp_filter =
ctx_none->mic.mbmi.interp_filter[0];
#else #else
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed) partition_none_allowed)
pc_tree->vertical[0].pred_interp_filter = ctx->mic.mbmi.interp_filter; pc_tree->vertical[0].pred_interp_filter =
ctx_none->mic.mbmi.interp_filter;
#endif #endif
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
@@ -3939,17 +3943,18 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize, encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
&pc_tree->vertical[0], NULL); &pc_tree->vertical[0], NULL);
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
#if CONFIG_DUAL_FILTER #if CONFIG_DUAL_FILTER
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed) partition_none_allowed)
pc_tree->vertical[1].pred_interp_filter = pc_tree->vertical[1].pred_interp_filter =
ctx->mic.mbmi.interp_filter[0]; ctx_none->mic.mbmi.interp_filter[0];
#else #else
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed) partition_none_allowed)
pc_tree->vertical[1].pred_interp_filter = ctx->mic.mbmi.interp_filter; pc_tree->vertical[1].pred_interp_filter =
ctx_none->mic.mbmi.interp_filter;
#endif #endif
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
@@ -4045,7 +4050,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
partition_none_allowed) { partition_none_allowed) {
subsize = get_subsize(bsize, PARTITION_HORZ_A); subsize = get_subsize(bsize, PARTITION_HORZ_A);
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc, rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
pc_tree->horizontala, ctx, mi_row, mi_col, bsize, pc_tree->horizontala, ctx_none, mi_row, mi_col, bsize,
PARTITION_HORZ_A, PARTITION_HORZ_A,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
best_rd, &best_rate_nocoef, &x_ctx, best_rd, &best_rate_nocoef, &x_ctx,
@@ -4059,7 +4064,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
partition_none_allowed) { partition_none_allowed) {
subsize = get_subsize(bsize, PARTITION_HORZ_B); subsize = get_subsize(bsize, PARTITION_HORZ_B);
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc, rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
pc_tree->horizontalb, ctx, mi_row, mi_col, bsize, pc_tree->horizontalb, ctx_none, mi_row, mi_col, bsize,
PARTITION_HORZ_B, PARTITION_HORZ_B,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
best_rd, &best_rate_nocoef, &x_ctx, best_rd, &best_rate_nocoef, &x_ctx,
@@ -4073,7 +4078,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
partition_none_allowed) { partition_none_allowed) {
subsize = get_subsize(bsize, PARTITION_VERT_A); subsize = get_subsize(bsize, PARTITION_VERT_A);
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc, rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
pc_tree->verticala, ctx, mi_row, mi_col, bsize, pc_tree->verticala, ctx_none, mi_row, mi_col, bsize,
PARTITION_VERT_A, PARTITION_VERT_A,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
best_rd, &best_rate_nocoef, &x_ctx, best_rd, &best_rate_nocoef, &x_ctx,
@@ -4087,7 +4092,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
partition_none_allowed) { partition_none_allowed) {
subsize = get_subsize(bsize, PARTITION_VERT_B); subsize = get_subsize(bsize, PARTITION_VERT_B);
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc, rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
pc_tree->verticalb, ctx, mi_row, mi_col, bsize, pc_tree->verticalb, ctx_none, mi_row, mi_col, bsize,
PARTITION_VERT_B, PARTITION_VERT_B,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
best_rd, &best_rate_nocoef, &x_ctx, best_rd, &best_rate_nocoef, &x_ctx,
@@ -5185,7 +5190,7 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
#endif #endif
++td->counts->tx_size[tx_size_cat][tx_size_ctx][coded_tx_size]; ++td->counts->tx_size[tx_size_cat][tx_size_ctx][coded_tx_size];
} else { } else {
int x, y; int i, j;
TX_SIZE tx_size; TX_SIZE tx_size;
// The new intra coding scheme requires no change of transform size // The new intra coding scheme requires no change of transform size
if (is_inter_block(&mi->mbmi)) { if (is_inter_block(&mi->mbmi)) {
@@ -5202,10 +5207,10 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4; tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
} }
for (y = 0; y < mi_height; y++) for (j = 0; j < mi_height; j++)
for (x = 0; x < mi_width; x++) for (i = 0; i < mi_width; i++)
if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows) if (mi_col + i < cm->mi_cols && mi_row + j < cm->mi_rows)
mi_8x8[mis * y + x]->mbmi.tx_size = tx_size; mi_8x8[mis * j + i]->mbmi.tx_size = tx_size;
} }
++td->counts->tx_size_totals[txsize_sqr_map[mbmi->tx_size]]; ++td->counts->tx_size_totals[txsize_sqr_map[mbmi->tx_size]];
++td->counts ++td->counts

View File

@@ -111,8 +111,7 @@ static void build_nmv_component_cost_table(int *mvcost,
if (c == MV_CLASS_0) { if (c == MV_CLASS_0) {
cost += class0_cost[d]; cost += class0_cost[d];
} else { } else {
int i, b; const int b = c + CLASS0_BITS - 1; /* number of bits */
b = c + CLASS0_BITS - 1; /* number of bits */
for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)]; for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)];
} }
if (c == MV_CLASS_0) { if (c == MV_CLASS_0) {

View File

@@ -2284,7 +2284,8 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
av1_set_speed_features_framesize_dependent(cpi); av1_set_speed_features_framesize_dependent(cpi);
// Allocate memory to store variances for a frame. // Allocate memory to store variances for a frame.
CHECK_MEM_ERROR(cm, cpi->source_diff_var, aom_calloc(cm->MBs, sizeof(diff))); CHECK_MEM_ERROR(cm, cpi->source_diff_var,
aom_calloc(cm->MBs, sizeof(*cpi->source_diff_var)));
cpi->source_var_thresh = 0; cpi->source_var_thresh = 0;
cpi->frames_till_next_var_check = 0; cpi->frames_till_next_var_check = 0;

View File

@@ -519,7 +519,7 @@ typedef struct AV1_COMP {
// scaled. // scaled.
// Store frame variance info in SOURCE_VAR_BASED_PARTITION search type. // Store frame variance info in SOURCE_VAR_BASED_PARTITION search type.
diff *source_diff_var; DIFF *source_diff_var;
// The threshold used in SOURCE_VAR_BASED_PARTITION search type. // The threshold used in SOURCE_VAR_BASED_PARTITION search type.
unsigned int source_var_thresh; unsigned int source_var_thresh;
int frames_till_next_var_check; int frames_till_next_var_check;

View File

@@ -2712,7 +2712,6 @@ void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
// If this is an arf frame then we dont want to read the stats file or // If this is an arf frame then we dont want to read the stats file or
// advance the input pointer as we already have what we need. // advance the input pointer as we already have what we need.
if (gf_group->update_type[gf_group->index] == ARF_UPDATE) { if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
int target_rate;
configure_buffer_updates(cpi); configure_buffer_updates(cpi);
target_rate = gf_group->bit_allocation[gf_group->index]; target_rate = gf_group->bit_allocation[gf_group->index];
target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate); target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate);

View File

@@ -861,36 +861,35 @@ static INLINE void calc_int_cost_list(const MACROBLOCK *x,
const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
const int br = best_mv->row; const int br = best_mv->row;
const int bc = best_mv->col; const int bc = best_mv->col;
MV this_mv;
int i; int i;
unsigned int sse; unsigned int sse;
const MV this_mv = { br, bc };
this_mv.row = br;
this_mv.col = bc;
cost_list[0] = cost_list[0] =
fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
in_what->stride, &sse) + in_what->stride, &sse) +
mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
if (check_bounds(x, br, bc, 1)) { if (check_bounds(x, br, bc, 1)) {
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), get_buf_from_mv(in_what, &neighbor_mv),
in_what->stride, &sse) + in_what->stride, &sse) +
mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost,
x->mvcost, x->errorperbit); x->mvcost, x->errorperbit);
} }
} else { } else {
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
if (!is_mv_in(x, &this_mv)) if (!is_mv_in(x, &neighbor_mv))
cost_list[i + 1] = INT_MAX; cost_list[i + 1] = INT_MAX;
else else
cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, cost_list[i + 1] =
get_buf_from_mv(in_what, &this_mv), fn_ptr->vf(what->buf, what->stride,
in_what->stride, &sse) + get_buf_from_mv(in_what, &neighbor_mv), in_what->stride,
mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, &sse) +
x->mvcost, x->errorperbit); mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
x->errorperbit);
} }
} }
} }
@@ -1187,12 +1186,13 @@ static int pattern_search(MACROBLOCK *x, MV *start_mv, int search_param,
// cost_list[3]: cost/sad at delta { 0, 1} (right) from the best integer pel // cost_list[3]: cost/sad at delta { 0, 1} (right) from the best integer pel
// cost_list[4]: cost/sad at delta {-1, 0} (top) from the best integer pel // cost_list[4]: cost/sad at delta {-1, 0} (top) from the best integer pel
if (cost_list) { if (cost_list) {
const MV best_mv = { br, bc }; const MV best_int_mv = { br, bc };
if (last_is_4) { if (last_is_4) {
calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_mv, cost_list, calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_int_mv, cost_list,
use_mvcost, bestsad); use_mvcost, bestsad);
} else { } else {
calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_mv, cost_list); calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_int_mv,
cost_list);
} }
} }
x->best_mv.as_mv.row = br; x->best_mv.as_mv.row = br;
@@ -1692,7 +1692,7 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
const int ref_stride = xd->plane[0].pre[0].stride; const int ref_stride = xd->plane[0].pre[0].stride;
uint8_t const *ref_buf, *src_buf; uint8_t const *ref_buf, *src_buf;
MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv; MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv;
unsigned int best_sad, tmp_sad, this_sad[4]; unsigned int best_sad, tmp_sad, sad_arr[4];
MV this_mv; MV this_mv;
const int norm_factor = 3 + (bw >> 5); const int norm_factor = 3 + (bw >> 5);
const YV12_BUFFER_CONFIG *scaled_ref_frame = const YV12_BUFFER_CONFIG *scaled_ref_frame =
@@ -1762,23 +1762,23 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride, ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
}; };
cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad); cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, sad_arr);
} }
for (idx = 0; idx < 4; ++idx) { for (idx = 0; idx < 4; ++idx) {
if (this_sad[idx] < best_sad) { if (sad_arr[idx] < best_sad) {
best_sad = this_sad[idx]; best_sad = sad_arr[idx];
tmp_mv->row = search_pos[idx].row + this_mv.row; tmp_mv->row = search_pos[idx].row + this_mv.row;
tmp_mv->col = search_pos[idx].col + this_mv.col; tmp_mv->col = search_pos[idx].col + this_mv.col;
} }
} }
if (this_sad[0] < this_sad[3]) if (sad_arr[0] < sad_arr[3])
this_mv.row -= 1; this_mv.row -= 1;
else else
this_mv.row += 1; this_mv.row += 1;
if (this_sad[1] < this_sad[2]) if (sad_arr[1] < sad_arr[2])
this_mv.col -= 1; this_mv.col -= 1;
else else
this_mv.col += 1; this_mv.col += 1;

View File

@@ -776,20 +776,20 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const AV1_COMP *cpi,
if (frame_is_intra_only(cm)) { if (frame_is_intra_only(cm)) {
if (oxcf->rc_mode == AOM_Q) { if (oxcf->rc_mode == AOM_Q) {
int qindex = cq_level; const int qindex = cq_level;
double q = av1_convert_qindex_to_q(qindex, cm->bit_depth); const double q_val = av1_convert_qindex_to_q(qindex, cm->bit_depth);
int delta_qindex = av1_compute_qdelta(rc, q, q * 0.25, cm->bit_depth); const int delta_qindex =
av1_compute_qdelta(rc, q_val, q_val * 0.25, cm->bit_depth);
active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality); active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
} else if (rc->this_key_frame_forced) { } else if (rc->this_key_frame_forced) {
int qindex = rc->last_boosted_qindex; const int qindex = rc->last_boosted_qindex;
double last_boosted_q = av1_convert_qindex_to_q(qindex, cm->bit_depth); const double last_boosted_q =
int delta_qindex = av1_compute_qdelta( av1_convert_qindex_to_q(qindex, cm->bit_depth);
const int delta_qindex = av1_compute_qdelta(
rc, last_boosted_q, last_boosted_q * 0.75, cm->bit_depth); rc, last_boosted_q, last_boosted_q * 0.75, cm->bit_depth);
active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality); active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
} else { } else { // not first frame of one pass and kf_boost is set
// not first frame of one pass and kf_boost is set
double q_adj_factor = 1.0; double q_adj_factor = 1.0;
double q_val;
active_best_quality = get_kf_active_quality( active_best_quality = get_kf_active_quality(
rc, rc->avg_frame_qindex[KEY_FRAME], cm->bit_depth); rc, rc->avg_frame_qindex[KEY_FRAME], cm->bit_depth);
@@ -799,60 +799,56 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const AV1_COMP *cpi,
q_adj_factor -= 0.25; q_adj_factor -= 0.25;
} }
// Convert the adjustment factor to a qindex delta // Convert the adjustment factor to a qindex delta on active_best_quality.
// on active_best_quality. {
q_val = av1_convert_qindex_to_q(active_best_quality, cm->bit_depth); const double q_val =
av1_convert_qindex_to_q(active_best_quality, cm->bit_depth);
active_best_quality += active_best_quality +=
av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth); av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth);
} }
}
} else if (!rc->is_src_frame_alt_ref && } else if (!rc->is_src_frame_alt_ref &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
// Use the lower of active_worst_quality and recent // Use the lower of active_worst_quality and recent
// average Q as basis for GF/ARF best Q limit unless last frame was // average Q as basis for GF/ARF best Q limit unless last frame was
// a key frame. // a key frame.
if (rc->frames_since_key > 1 && q = (rc->frames_since_key > 1 &&
rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) { rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
q = rc->avg_frame_qindex[INTER_FRAME]; ? rc->avg_frame_qindex[INTER_FRAME]
} else { : rc->avg_frame_qindex[KEY_FRAME];
q = rc->avg_frame_qindex[KEY_FRAME];
}
// For constrained quality dont allow Q less than the cq level // For constrained quality dont allow Q less than the cq level
if (oxcf->rc_mode == AOM_CQ) { if (oxcf->rc_mode == AOM_CQ) {
if (q < cq_level) q = cq_level; if (q < cq_level) q = cq_level;
active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth); active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
// Constrained quality use slightly lower active best. // Constrained quality use slightly lower active best.
active_best_quality = active_best_quality * 15 / 16; active_best_quality = active_best_quality * 15 / 16;
} else if (oxcf->rc_mode == AOM_Q) { } else if (oxcf->rc_mode == AOM_Q) {
int qindex = cq_level; const int qindex = cq_level;
double q = av1_convert_qindex_to_q(qindex, cm->bit_depth); const double q_val = av1_convert_qindex_to_q(qindex, cm->bit_depth);
int delta_qindex; const int delta_qindex =
if (cpi->refresh_alt_ref_frame) (cpi->refresh_alt_ref_frame)
delta_qindex = av1_compute_qdelta(rc, q, q * 0.40, cm->bit_depth); ? av1_compute_qdelta(rc, q_val, q_val * 0.40, cm->bit_depth)
else : av1_compute_qdelta(rc, q_val, q_val * 0.50, cm->bit_depth);
delta_qindex = av1_compute_qdelta(rc, q, q * 0.50, cm->bit_depth);
active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality); active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
} else { } else {
active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth); active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
} }
} else { } else {
if (oxcf->rc_mode == AOM_Q) { if (oxcf->rc_mode == AOM_Q) {
int qindex = cq_level; const int qindex = cq_level;
double q = av1_convert_qindex_to_q(qindex, cm->bit_depth); const double q_val = av1_convert_qindex_to_q(qindex, cm->bit_depth);
double delta_rate[FIXED_GF_INTERVAL] = { 0.50, 1.0, 0.85, 1.0, const double delta_rate[FIXED_GF_INTERVAL] = { 0.50, 1.0, 0.85, 1.0,
0.70, 1.0, 0.85, 1.0 }; 0.70, 1.0, 0.85, 1.0 };
int delta_qindex = av1_compute_qdelta( const int delta_qindex = av1_compute_qdelta(
rc, q, q * delta_rate[cm->current_video_frame % FIXED_GF_INTERVAL], rc, q_val,
q_val * delta_rate[cm->current_video_frame % FIXED_GF_INTERVAL],
cm->bit_depth); cm->bit_depth);
active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality); active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
} else { } else {
// Use the lower of active_worst_quality and recent/average Q. // Use the lower of active_worst_quality and recent/average Q.
if (cm->current_video_frame > 1) active_best_quality = (cm->current_video_frame > 1)
active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]]; ? inter_minq[rc->avg_frame_qindex[INTER_FRAME]]
else : inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
// For the constrained quality mode we don't want // For the constrained quality mode we don't want
// q to fall below the cq level. // q to fall below the cq level.
if ((oxcf->rc_mode == AOM_CQ) && (active_best_quality < cq_level)) { if ((oxcf->rc_mode == AOM_CQ) && (active_best_quality < cq_level)) {

View File

@@ -1130,11 +1130,11 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
if (args->exit_early) return; if (args->exit_early) return;
if (!is_inter_block(mbmi)) { if (!is_inter_block(mbmi)) {
struct encode_b_args intra_arg = { struct encode_b_args b_args = {
x, NULL, &mbmi->skip, args->t_above, args->t_left, 1 x, NULL, &mbmi->skip, args->t_above, args->t_left, 1
}; };
av1_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size, av1_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
&intra_arg); &b_args);
if (args->cpi->sf.use_transform_domain_distortion) { if (args->cpi->sf.use_transform_domain_distortion) {
dist_block(args->cpi, x, plane, block, blk_row, blk_col, tx_size, &dist, dist_block(args->cpi, x, plane, block, blk_row, blk_col, tx_size, &dist,
@@ -2829,7 +2829,6 @@ static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, int *rate,
if (ref_best_rd < 0) is_cost_valid = 0; if (ref_best_rd < 0) is_cost_valid = 0;
if (is_inter_block(mbmi) && is_cost_valid) { if (is_inter_block(mbmi) && is_cost_valid) {
int plane;
for (plane = 1; plane < MAX_MB_PLANE; ++plane) for (plane = 1; plane < MAX_MB_PLANE; ++plane)
av1_subtract_plane(x, bsize, plane); av1_subtract_plane(x, bsize, plane);
} }
@@ -4922,8 +4921,8 @@ static int64_t rd_pick_best_sub8x8_mode(
int_mv mode_mv[MB_MODE_COUNT][2]; int_mv mode_mv[MB_MODE_COUNT][2];
int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME]; int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
PREDICTION_MODE mode_selected = ZEROMV; PREDICTION_MODE mode_selected = ZEROMV;
int64_t best_rd = INT64_MAX; int64_t new_best_rd = INT64_MAX;
const int i = idy * 2 + idx; const int index = idy * 2 + idx;
int ref; int ref;
#if CONFIG_REF_MV #if CONFIG_REF_MV
CANDIDATE_MV ref_mv_stack[2][MAX_REF_MV_STACK_SIZE]; CANDIDATE_MV ref_mv_stack[2][MAX_REF_MV_STACK_SIZE];
@@ -4938,7 +4937,7 @@ static int64_t rd_pick_best_sub8x8_mode(
const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
int_mv mv_ref_list[MAX_MV_REF_CANDIDATES]; int_mv mv_ref_list[MAX_MV_REF_CANDIDATES];
av1_update_mv_context(xd, mi, frame, mv_ref_list, i, mi_row, mi_col, av1_update_mv_context(xd, mi, frame, mv_ref_list, index, mi_row, mi_col,
NULL); NULL);
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
#if CONFIG_GLOBAL_MOTION #if CONFIG_GLOBAL_MOTION
@@ -4947,7 +4946,7 @@ static int64_t rd_pick_best_sub8x8_mode(
#else // CONFIG_GLOBAL_MOTION #else // CONFIG_GLOBAL_MOTION
frame_mv[ZEROMV][frame].as_int = 0; frame_mv[ZEROMV][frame].as_int = 0;
#endif // CONFIG_GLOBAL_MOTION #endif // CONFIG_GLOBAL_MOTION
av1_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col, av1_append_sub8x8_mvs_for_idx(cm, xd, index, ref, mi_row, mi_col,
#if CONFIG_REF_MV #if CONFIG_REF_MV
ref_mv_stack[ref], &ref_mv_count[ref], ref_mv_stack[ref], &ref_mv_count[ref],
#endif #endif
@@ -5022,7 +5021,7 @@ static int64_t rd_pick_best_sub8x8_mode(
for (ref = 0; ref < 1 + has_second_rf; ++ref) for (ref = 0; ref < 1 + has_second_rf; ++ref)
bsi->ref_mv[ref]->as_int = ref_mvs_sub8x8[mv_idx][ref].as_int; bsi->ref_mv[ref]->as_int = ref_mvs_sub8x8[mv_idx][ref].as_int;
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
bsi->rdstat[i][mode_idx].brdcost = INT64_MAX; bsi->rdstat[index][mode_idx].brdcost = INT64_MAX;
if (!(inter_mode_mask & (1 << this_mode))) continue; if (!(inter_mode_mask & (1 << this_mode))) continue;
#if CONFIG_REF_MV #if CONFIG_REF_MV
@@ -5030,16 +5029,16 @@ static int64_t rd_pick_best_sub8x8_mode(
#if !CONFIG_EXT_INTER #if !CONFIG_EXT_INTER
if (filter_idx > 0 && this_mode == NEWMV) { if (filter_idx > 0 && this_mode == NEWMV) {
BEST_SEG_INFO *ref_bsi = bsi_buf; BEST_SEG_INFO *ref_bsi = bsi_buf;
SEG_RDSTAT *ref_rdstat = &ref_bsi->rdstat[i][mode_idx]; SEG_RDSTAT *ref_rdstat = &ref_bsi->rdstat[index][mode_idx];
if (has_second_rf) { if (has_second_rf) {
if (seg_mvs[i][mbmi->ref_frame[0]].as_int == if (seg_mvs[index][mbmi->ref_frame[0]].as_int ==
ref_rdstat->mvs[0].as_int && ref_rdstat->mvs[0].as_int &&
ref_rdstat->mvs[0].as_int != INVALID_MV) ref_rdstat->mvs[0].as_int != INVALID_MV)
if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int) if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int)
--run_mv_search; --run_mv_search;
if (seg_mvs[i][mbmi->ref_frame[1]].as_int == if (seg_mvs[index][mbmi->ref_frame[1]].as_int ==
ref_rdstat->mvs[1].as_int && ref_rdstat->mvs[1].as_int &&
ref_rdstat->mvs[1].as_int != INVALID_MV) ref_rdstat->mvs[1].as_int != INVALID_MV)
if (bsi->ref_mv[1]->as_int == ref_rdstat->pred_mv[1].as_int) if (bsi->ref_mv[1]->as_int == ref_rdstat->pred_mv[1].as_int)
@@ -5048,23 +5047,24 @@ static int64_t rd_pick_best_sub8x8_mode(
if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int && if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int &&
ref_rdstat->mvs[0].as_int != INVALID_MV) { ref_rdstat->mvs[0].as_int != INVALID_MV) {
run_mv_search = 0; run_mv_search = 0;
seg_mvs[i][mbmi->ref_frame[0]].as_int = ref_rdstat->mvs[0].as_int; seg_mvs[index][mbmi->ref_frame[0]].as_int =
ref_rdstat->mvs[0].as_int;
} }
} }
if (run_mv_search != 0 && filter_idx > 1) { if (run_mv_search != 0 && filter_idx > 1) {
ref_bsi = bsi_buf + 1; ref_bsi = bsi_buf + 1;
ref_rdstat = &ref_bsi->rdstat[i][mode_idx]; ref_rdstat = &ref_bsi->rdstat[index][mode_idx];
run_mv_search = 2; run_mv_search = 2;
if (has_second_rf) { if (has_second_rf) {
if (seg_mvs[i][mbmi->ref_frame[0]].as_int == if (seg_mvs[index][mbmi->ref_frame[0]].as_int ==
ref_rdstat->mvs[0].as_int && ref_rdstat->mvs[0].as_int &&
ref_rdstat->mvs[0].as_int != INVALID_MV) ref_rdstat->mvs[0].as_int != INVALID_MV)
if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int) if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int)
--run_mv_search; --run_mv_search;
if (seg_mvs[i][mbmi->ref_frame[1]].as_int == if (seg_mvs[index][mbmi->ref_frame[1]].as_int ==
ref_rdstat->mvs[1].as_int && ref_rdstat->mvs[1].as_int &&
ref_rdstat->mvs[1].as_int != INVALID_MV) ref_rdstat->mvs[1].as_int != INVALID_MV)
if (bsi->ref_mv[1]->as_int == ref_rdstat->pred_mv[1].as_int) if (bsi->ref_mv[1]->as_int == ref_rdstat->pred_mv[1].as_int)
@@ -5073,7 +5073,7 @@ static int64_t rd_pick_best_sub8x8_mode(
if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int && if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int &&
ref_rdstat->mvs[0].as_int != INVALID_MV) { ref_rdstat->mvs[0].as_int != INVALID_MV) {
run_mv_search = 0; run_mv_search = 0;
seg_mvs[i][mbmi->ref_frame[0]].as_int = seg_mvs[index][mbmi->ref_frame[0]].as_int =
ref_rdstat->mvs[0].as_int; ref_rdstat->mvs[0].as_int;
} }
} }
@@ -5093,24 +5093,24 @@ static int64_t rd_pick_best_sub8x8_mode(
mbmi_ext->compound_mode_context, mbmi_ext->compound_mode_context,
#endif // CONFIG_REF_MV && CONFIG_EXT_INTER #endif // CONFIG_REF_MV && CONFIG_EXT_INTER
frame_mv, this_mode, mbmi->ref_frame, bsize, frame_mv, this_mode, mbmi->ref_frame, bsize,
i)) index))
continue; continue;
memcpy(orig_pre, pd->pre, sizeof(orig_pre)); memcpy(orig_pre, pd->pre, sizeof(orig_pre));
memcpy(bsi->rdstat[i][mode_idx].ta, t_above, memcpy(bsi->rdstat[index][mode_idx].ta, t_above,
sizeof(bsi->rdstat[i][mode_idx].ta)); sizeof(bsi->rdstat[index][mode_idx].ta));
memcpy(bsi->rdstat[i][mode_idx].tl, t_left, memcpy(bsi->rdstat[index][mode_idx].tl, t_left,
sizeof(bsi->rdstat[i][mode_idx].tl)); sizeof(bsi->rdstat[index][mode_idx].tl));
// motion search for newmv (single predictor case only) // motion search for newmv (single predictor case only)
if (!has_second_rf && if (!has_second_rf &&
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
have_newmv_in_inter_mode(this_mode) && have_newmv_in_inter_mode(this_mode) &&
(seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV || (seg_mvs[index][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV ||
av1_use_mv_hp(&bsi->ref_mv[0]->as_mv) == 0) av1_use_mv_hp(&bsi->ref_mv[0]->as_mv) == 0)
#else #else
this_mode == NEWMV && this_mode == NEWMV &&
(seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV || (seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV ||
run_mv_search) run_mv_search)
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
) { ) {
@@ -5127,7 +5127,7 @@ static int64_t rd_pick_best_sub8x8_mode(
/* Is the best so far sufficiently good that we cant justify doing /* Is the best so far sufficiently good that we cant justify doing
* and new motion search. */ * and new motion search. */
if (best_rd < label_mv_thresh) break; if (new_best_rd < label_mv_thresh) break;
if (cpi->oxcf.mode != BEST) { if (cpi->oxcf.mode != BEST) {
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
@@ -5135,18 +5135,18 @@ static int64_t rd_pick_best_sub8x8_mode(
#else #else
// use previous block's result as next block's MV predictor. // use previous block's result as next block's MV predictor.
#if !CONFIG_REF_MV #if !CONFIG_REF_MV
if (i > 0) { if (index > 0) {
bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int; bsi->mvp.as_int = mi->bmi[index - 1].as_mv[0].as_int;
if (i == 2) bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int; if (index == 2)
bsi->mvp.as_int = mi->bmi[index - 2].as_mv[0].as_int;
} }
#endif #endif
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
} }
if (i == 0) max_mv = (index == 0) ? (int)x->max_mv_context[mbmi->ref_frame[0]]
max_mv = x->max_mv_context[mbmi->ref_frame[0]]; : AOMMAX(abs(bsi->mvp.as_mv.row),
else abs(bsi->mvp.as_mv.col)) >>
max_mv = 3;
AOMMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
// Take wtd average of the step_params based on the last frame's // Take wtd average of the step_params based on the last frame's
@@ -5173,7 +5173,7 @@ static int64_t rd_pick_best_sub8x8_mode(
} }
// adjust src pointer for this block // adjust src pointer for this block
mi_buf_shift(x, i); mi_buf_shift(x, index);
av1_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv); av1_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
@@ -5202,7 +5202,6 @@ static int64_t rd_pick_best_sub8x8_mode(
const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
// Use up-sampled reference frames. // Use up-sampled reference frames.
struct macroblockd_plane *const pd = &xd->plane[0];
struct buf_2d backup_pred = pd->pre[0]; struct buf_2d backup_pred = pd->pre[0];
const YV12_BUFFER_CONFIG *upsampled_ref = const YV12_BUFFER_CONFIG *upsampled_ref =
get_upsampled_ref(cpi, mbmi->ref_frame[0]); get_upsampled_ref(cpi, mbmi->ref_frame[0]);
@@ -5216,7 +5215,7 @@ static int64_t rd_pick_best_sub8x8_mode(
// adjust pred pointer for this block // adjust pred pointer for this block
pd->pre[0].buf = pd->pre[0].buf =
&pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, i, &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, index,
pd->pre[0].stride)) pd->pre[0].stride))
<< 3]; << 3];
@@ -5270,9 +5269,9 @@ static int64_t rd_pick_best_sub8x8_mode(
// save motion search result for use in compound prediction // save motion search result for use in compound prediction
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv; seg_mvs[index][mv_idx][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv;
#else #else
seg_mvs[i][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv; seg_mvs[index][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv;
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
} }
@@ -5291,11 +5290,11 @@ static int64_t rd_pick_best_sub8x8_mode(
if (has_second_rf) { if (has_second_rf) {
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
if (seg_mvs[i][mv_idx][mbmi->ref_frame[1]].as_int == INVALID_MV || if (seg_mvs[index][mv_idx][mbmi->ref_frame[1]].as_int == INVALID_MV ||
seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV) seg_mvs[index][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV)
#else #else
if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV || if (seg_mvs[index][mbmi->ref_frame[1]].as_int == INVALID_MV ||
seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV)
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
continue; continue;
} }
@@ -5317,26 +5316,26 @@ static int64_t rd_pick_best_sub8x8_mode(
#endif #endif
{ {
// adjust src pointers // adjust src pointers
mi_buf_shift(x, i); mi_buf_shift(x, index);
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
int rate_mv; int rate_mv;
joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row, joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row,
mi_col, mi_col,
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
bsi->ref_mv, seg_mvs[i][mv_idx], bsi->ref_mv, seg_mvs[index][mv_idx],
#else #else
seg_mvs[i], seg_mvs[index],
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
&rate_mv, i); &rate_mv, index);
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
compound_seg_newmvs[i][0].as_int = compound_seg_newmvs[index][0].as_int =
frame_mv[this_mode][mbmi->ref_frame[0]].as_int; frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
compound_seg_newmvs[i][1].as_int = compound_seg_newmvs[index][1].as_int =
frame_mv[this_mode][mbmi->ref_frame[1]].as_int; frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
#else #else
seg_mvs[i][mbmi->ref_frame[0]].as_int = seg_mvs[index][mbmi->ref_frame[0]].as_int =
frame_mv[this_mode][mbmi->ref_frame[0]].as_int; frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
seg_mvs[i][mbmi->ref_frame[1]].as_int = seg_mvs[index][mbmi->ref_frame[1]].as_int =
frame_mv[this_mode][mbmi->ref_frame[1]].as_int; frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
} }
@@ -5344,42 +5343,42 @@ static int64_t rd_pick_best_sub8x8_mode(
mi_buf_restore(x, orig_src, orig_pre); mi_buf_restore(x, orig_src, orig_pre);
} }
bsi->rdstat[i][mode_idx].brate = set_and_cost_bmi_mvs( bsi->rdstat[index][mode_idx].brate = set_and_cost_bmi_mvs(
cpi, x, xd, i, this_mode, mode_mv[this_mode], frame_mv, cpi, x, xd, index, this_mode, mode_mv[this_mode], frame_mv,
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
seg_mvs[i][mv_idx], compound_seg_newmvs[i], seg_mvs[index][mv_idx], compound_seg_newmvs[index],
#else #else
seg_mvs[i], seg_mvs[index],
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
bsi->ref_mv, x->nmvjointcost, x->mvcost); bsi->ref_mv, x->nmvjointcost, x->mvcost);
for (ref = 0; ref < 1 + has_second_rf; ++ref) { for (ref = 0; ref < 1 + has_second_rf; ++ref) {
bsi->rdstat[i][mode_idx].mvs[ref].as_int = bsi->rdstat[index][mode_idx].mvs[ref].as_int =
mode_mv[this_mode][ref].as_int; mode_mv[this_mode][ref].as_int;
if (num_4x4_blocks_wide > 1) if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int = bsi->rdstat[index + 1][mode_idx].mvs[ref].as_int =
mode_mv[this_mode][ref].as_int; mode_mv[this_mode][ref].as_int;
if (num_4x4_blocks_high > 1) if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int = bsi->rdstat[index + 2][mode_idx].mvs[ref].as_int =
mode_mv[this_mode][ref].as_int; mode_mv[this_mode][ref].as_int;
#if CONFIG_REF_MV #if CONFIG_REF_MV
bsi->rdstat[i][mode_idx].pred_mv[ref].as_int = bsi->rdstat[index][mode_idx].pred_mv[ref].as_int =
mi->bmi[i].pred_mv[ref].as_int; mi->bmi[index].pred_mv[ref].as_int;
if (num_4x4_blocks_wide > 1) if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].pred_mv[ref].as_int = bsi->rdstat[index + 1][mode_idx].pred_mv[ref].as_int =
mi->bmi[i].pred_mv[ref].as_int; mi->bmi[index].pred_mv[ref].as_int;
if (num_4x4_blocks_high > 1) if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].pred_mv[ref].as_int = bsi->rdstat[index + 2][mode_idx].pred_mv[ref].as_int =
mi->bmi[i].pred_mv[ref].as_int; mi->bmi[index].pred_mv[ref].as_int;
#endif #endif
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
bsi->rdstat[i][mode_idx].ref_mv[ref].as_int = bsi->rdstat[index][mode_idx].ref_mv[ref].as_int =
bsi->ref_mv[ref]->as_int; bsi->ref_mv[ref]->as_int;
if (num_4x4_blocks_wide > 1) if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].ref_mv[ref].as_int = bsi->rdstat[index + 1][mode_idx].ref_mv[ref].as_int =
bsi->ref_mv[ref]->as_int; bsi->ref_mv[ref]->as_int;
if (num_4x4_blocks_high > 1) if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].ref_mv[ref].as_int = bsi->rdstat[index + 2][mode_idx].ref_mv[ref].as_int =
bsi->ref_mv[ref]->as_int; bsi->ref_mv[ref]->as_int;
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
} }
@@ -5398,17 +5397,18 @@ static int64_t rd_pick_best_sub8x8_mode(
subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv); subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
if (have_newmv_in_inter_mode(this_mode)) if (have_newmv_in_inter_mode(this_mode))
have_ref &= ((mode_mv[this_mode][ref].as_int == have_ref &=
ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int) && ((mode_mv[this_mode][ref].as_int ==
ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int) &&
(bsi->ref_mv[ref]->as_int == (bsi->ref_mv[ref]->as_int ==
ref_bsi->rdstat[i][mode_idx].ref_mv[ref].as_int)); ref_bsi->rdstat[index][mode_idx].ref_mv[ref].as_int));
else else
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
have_ref &= mode_mv[this_mode][ref].as_int == have_ref &= mode_mv[this_mode][ref].as_int ==
ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int; ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int;
} }
have_ref &= ref_bsi->rdstat[i][mode_idx].brate > 0; have_ref &= ref_bsi->rdstat[index][mode_idx].brate > 0;
if (filter_idx > 1 && !subpelmv && !have_ref) { if (filter_idx > 1 && !subpelmv && !have_ref) {
ref_bsi = bsi_buf + 1; ref_bsi = bsi_buf + 1;
@@ -5416,118 +5416,126 @@ static int64_t rd_pick_best_sub8x8_mode(
for (ref = 0; ref < 1 + has_second_rf; ++ref) for (ref = 0; ref < 1 + has_second_rf; ++ref)
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
if (have_newmv_in_inter_mode(this_mode)) if (have_newmv_in_inter_mode(this_mode))
have_ref &= ((mode_mv[this_mode][ref].as_int == have_ref &=
ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int) && ((mode_mv[this_mode][ref].as_int ==
ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int) &&
(bsi->ref_mv[ref]->as_int == (bsi->ref_mv[ref]->as_int ==
ref_bsi->rdstat[i][mode_idx].ref_mv[ref].as_int)); ref_bsi->rdstat[index][mode_idx].ref_mv[ref].as_int));
else else
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
have_ref &= mode_mv[this_mode][ref].as_int == have_ref &= mode_mv[this_mode][ref].as_int ==
ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int; ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int;
have_ref &= ref_bsi->rdstat[i][mode_idx].brate > 0; have_ref &= ref_bsi->rdstat[index][mode_idx].brate > 0;
} }
if (!subpelmv && have_ref && if (!subpelmv && have_ref &&
ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { ref_bsi->rdstat[index][mode_idx].brdcost < INT64_MAX) {
#if CONFIG_REF_MV #if CONFIG_REF_MV
bsi->rdstat[i][mode_idx].byrate = bsi->rdstat[index][mode_idx].byrate =
ref_bsi->rdstat[i][mode_idx].byrate; ref_bsi->rdstat[index][mode_idx].byrate;
bsi->rdstat[i][mode_idx].bdist = ref_bsi->rdstat[i][mode_idx].bdist; bsi->rdstat[index][mode_idx].bdist =
bsi->rdstat[i][mode_idx].bsse = ref_bsi->rdstat[i][mode_idx].bsse; ref_bsi->rdstat[index][mode_idx].bdist;
bsi->rdstat[i][mode_idx].brate += bsi->rdstat[index][mode_idx].bsse =
ref_bsi->rdstat[i][mode_idx].byrate; ref_bsi->rdstat[index][mode_idx].bsse;
bsi->rdstat[i][mode_idx].eobs = ref_bsi->rdstat[i][mode_idx].eobs; bsi->rdstat[index][mode_idx].brate +=
ref_bsi->rdstat[index][mode_idx].byrate;
bsi->rdstat[index][mode_idx].eobs =
ref_bsi->rdstat[index][mode_idx].eobs;
bsi->rdstat[i][mode_idx].brdcost = bsi->rdstat[index][mode_idx].brdcost =
RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate, RDCOST(x->rdmult, x->rddiv, bsi->rdstat[index][mode_idx].brate,
bsi->rdstat[i][mode_idx].bdist); bsi->rdstat[index][mode_idx].bdist);
memcpy(bsi->rdstat[i][mode_idx].ta, ref_bsi->rdstat[i][mode_idx].ta, memcpy(bsi->rdstat[index][mode_idx].ta,
sizeof(bsi->rdstat[i][mode_idx].ta)); ref_bsi->rdstat[index][mode_idx].ta,
memcpy(bsi->rdstat[i][mode_idx].tl, ref_bsi->rdstat[i][mode_idx].tl, sizeof(bsi->rdstat[index][mode_idx].ta));
sizeof(bsi->rdstat[i][mode_idx].tl)); memcpy(bsi->rdstat[index][mode_idx].tl,
ref_bsi->rdstat[index][mode_idx].tl,
sizeof(bsi->rdstat[index][mode_idx].tl));
#else #else
memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx], memcpy(&bsi->rdstat[index][mode_idx],
sizeof(SEG_RDSTAT)); &ref_bsi->rdstat[index][mode_idx], sizeof(SEG_RDSTAT));
#endif #endif
if (num_4x4_blocks_wide > 1) if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].eobs = bsi->rdstat[index + 1][mode_idx].eobs =
ref_bsi->rdstat[i + 1][mode_idx].eobs; ref_bsi->rdstat[index + 1][mode_idx].eobs;
if (num_4x4_blocks_high > 1) if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].eobs = bsi->rdstat[index + 2][mode_idx].eobs =
ref_bsi->rdstat[i + 2][mode_idx].eobs; ref_bsi->rdstat[index + 2][mode_idx].eobs;
if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { if (bsi->rdstat[index][mode_idx].brdcost < new_best_rd) {
#if CONFIG_REF_MV #if CONFIG_REF_MV
// If the NEWMV mode is using the same motion vector as the // If the NEWMV mode is using the same motion vector as the
// NEARESTMV mode, skip the rest rate-distortion calculations // NEARESTMV mode, skip the rest rate-distortion calculations
// and use the inferred motion vector modes. // and use the inferred motion vector modes.
if (this_mode == NEWMV) { if (this_mode == NEWMV) {
if (has_second_rf) { if (has_second_rf) {
if (bsi->rdstat[i][mode_idx].mvs[0].as_int == if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
bsi->ref_mv[0]->as_int && bsi->ref_mv[0]->as_int &&
bsi->rdstat[i][mode_idx].mvs[1].as_int == bsi->rdstat[index][mode_idx].mvs[1].as_int ==
bsi->ref_mv[1]->as_int) bsi->ref_mv[1]->as_int)
continue; continue;
} else { } else {
if (bsi->rdstat[i][mode_idx].mvs[0].as_int == if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
bsi->ref_mv[0]->as_int) bsi->ref_mv[0]->as_int)
continue; continue;
} }
} }
#endif #endif
mode_selected = this_mode; mode_selected = this_mode;
best_rd = bsi->rdstat[i][mode_idx].brdcost; new_best_rd = bsi->rdstat[index][mode_idx].brdcost;
} }
continue; continue;
} }
} }
bsi->rdstat[i][mode_idx].brdcost = encode_inter_mb_segment( bsi->rdstat[index][mode_idx].brdcost = encode_inter_mb_segment(
cpi, x, bsi->segment_rd - this_segment_rd, i, cpi, x, bsi->segment_rd - this_segment_rd, index,
&bsi->rdstat[i][mode_idx].byrate, &bsi->rdstat[i][mode_idx].bdist, &bsi->rdstat[index][mode_idx].byrate,
&bsi->rdstat[i][mode_idx].bsse, bsi->rdstat[i][mode_idx].ta, &bsi->rdstat[index][mode_idx].bdist,
bsi->rdstat[i][mode_idx].tl, idy, idx, mi_row, mi_col); &bsi->rdstat[index][mode_idx].bsse, bsi->rdstat[index][mode_idx].ta,
bsi->rdstat[index][mode_idx].tl, idy, idx, mi_row, mi_col);
if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { if (bsi->rdstat[index][mode_idx].brdcost < INT64_MAX) {
bsi->rdstat[i][mode_idx].brdcost += bsi->rdstat[index][mode_idx].brdcost += RDCOST(
RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate, 0); x->rdmult, x->rddiv, bsi->rdstat[index][mode_idx].brate, 0);
bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate; bsi->rdstat[index][mode_idx].brate +=
bsi->rdstat[i][mode_idx].eobs = p->eobs[i]; bsi->rdstat[index][mode_idx].byrate;
bsi->rdstat[index][mode_idx].eobs = p->eobs[index];
if (num_4x4_blocks_wide > 1) if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1]; bsi->rdstat[index + 1][mode_idx].eobs = p->eobs[index + 1];
if (num_4x4_blocks_high > 1) if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2]; bsi->rdstat[index + 2][mode_idx].eobs = p->eobs[index + 2];
} }
if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { if (bsi->rdstat[index][mode_idx].brdcost < new_best_rd) {
#if CONFIG_REF_MV #if CONFIG_REF_MV
// If the NEWMV mode is using the same motion vector as the // If the NEWMV mode is using the same motion vector as the
// NEARESTMV mode, skip the rest rate-distortion calculations // NEARESTMV mode, skip the rest rate-distortion calculations
// and use the inferred motion vector modes. // and use the inferred motion vector modes.
if (this_mode == NEWMV) { if (this_mode == NEWMV) {
if (has_second_rf) { if (has_second_rf) {
if (bsi->rdstat[i][mode_idx].mvs[0].as_int == if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
bsi->ref_mv[0]->as_int && bsi->ref_mv[0]->as_int &&
bsi->rdstat[i][mode_idx].mvs[1].as_int == bsi->rdstat[index][mode_idx].mvs[1].as_int ==
bsi->ref_mv[1]->as_int) bsi->ref_mv[1]->as_int)
continue; continue;
} else { } else {
if (bsi->rdstat[i][mode_idx].mvs[0].as_int == if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
bsi->ref_mv[0]->as_int) bsi->ref_mv[0]->as_int)
continue; continue;
} }
} }
#endif #endif
mode_selected = this_mode; mode_selected = this_mode;
best_rd = bsi->rdstat[i][mode_idx].brdcost; new_best_rd = bsi->rdstat[index][mode_idx].brdcost;
} }
} /*for each 4x4 mode*/ } /*for each 4x4 mode*/
if (best_rd == INT64_MAX) { if (new_best_rd == INT64_MAX) {
int iy, midx; int iy, midx;
for (iy = i + 1; iy < 4; ++iy) for (iy = index + 1; iy < 4; ++iy)
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx) for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx)
#else #else
@@ -5539,33 +5547,33 @@ static int64_t rd_pick_best_sub8x8_mode(
} }
mode_idx = INTER_OFFSET(mode_selected); mode_idx = INTER_OFFSET(mode_selected);
memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above)); memcpy(t_above, bsi->rdstat[index][mode_idx].ta, sizeof(t_above));
memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left)); memcpy(t_left, bsi->rdstat[index][mode_idx].tl, sizeof(t_left));
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
mv_idx = (mode_selected == NEWFROMNEARMV) ? 1 : 0; mv_idx = (mode_selected == NEWFROMNEARMV) ? 1 : 0;
bsi->ref_mv[0]->as_int = bsi->rdstat[i][mode_idx].ref_mv[0].as_int; bsi->ref_mv[0]->as_int = bsi->rdstat[index][mode_idx].ref_mv[0].as_int;
if (has_second_rf) if (has_second_rf)
bsi->ref_mv[1]->as_int = bsi->rdstat[i][mode_idx].ref_mv[1].as_int; bsi->ref_mv[1]->as_int = bsi->rdstat[index][mode_idx].ref_mv[1].as_int;
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected], set_and_cost_bmi_mvs(cpi, x, xd, index, mode_selected,
frame_mv, mode_mv[mode_selected], frame_mv,
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
seg_mvs[i][mv_idx], compound_seg_newmvs[i], seg_mvs[index][mv_idx], compound_seg_newmvs[index],
#else #else
seg_mvs[i], seg_mvs[index],
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
bsi->ref_mv, x->nmvjointcost, x->mvcost); bsi->ref_mv, x->nmvjointcost, x->mvcost);
br += bsi->rdstat[i][mode_idx].brate; br += bsi->rdstat[index][mode_idx].brate;
bd += bsi->rdstat[i][mode_idx].bdist; bd += bsi->rdstat[index][mode_idx].bdist;
block_sse += bsi->rdstat[i][mode_idx].bsse; block_sse += bsi->rdstat[index][mode_idx].bsse;
segmentyrate += bsi->rdstat[i][mode_idx].byrate; segmentyrate += bsi->rdstat[index][mode_idx].byrate;
this_segment_rd += bsi->rdstat[i][mode_idx].brdcost; this_segment_rd += bsi->rdstat[index][mode_idx].brdcost;
if (this_segment_rd > bsi->segment_rd) { if (this_segment_rd > bsi->segment_rd) {
int iy, midx; int iy, midx;
for (iy = i + 1; iy < 4; ++iy) for (iy = index + 1; iy < 4; ++iy)
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx) for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx)
#else #else
@@ -5913,9 +5921,9 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
x->best_mv.as_int = INVALID_MV; x->best_mv.as_int = INVALID_MV;
if (scaled_ref_frame) { if (scaled_ref_frame) {
int i; int j;
for (i = 0; i < MAX_MB_PLANE; ++i) for (j = 0; j < MAX_MB_PLANE; ++j)
xd->plane[i].pre[ref_idx] = backup_yv12[i]; xd->plane[j].pre[ref_idx] = backup_yv12[j];
} }
return; return;
} }
@@ -8528,6 +8536,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
int rate2 = 0, rate_y = 0, rate_uv = 0; int rate2 = 0, rate_y = 0, rate_uv = 0;
int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
int skippable = 0; int skippable = 0;
int i;
int this_skip2 = 0; int this_skip2 = 0;
int64_t total_sse = INT64_MAX; int64_t total_sse = INT64_MAX;
#if CONFIG_REF_MV #if CONFIG_REF_MV

View File

@@ -336,7 +336,7 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
((mb_rows - 1 - mb_row) * 16) + (17 - 2 * AOM_INTERP_EXTEND); ((mb_rows - 1 - mb_row) * 16) + (17 - 2 * AOM_INTERP_EXTEND);
for (mb_col = 0; mb_col < mb_cols; mb_col++) { for (mb_col = 0; mb_col < mb_cols; mb_col++) {
int i, j, k; int j, k;
int stride; int stride;
memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0])); memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0]));