From 96e41cb461893e240e24950242879a538aee7728 Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Thu, 14 Mar 2013 09:49:38 -0700 Subject: [PATCH 01/19] Removed shadow warnings : bitstream.c encodeframe.c onyx_if.c ratectrl.c and quantize.c Adding -Wshadow to CFLAGS generated a bunch of warnings. This patch removes these warnings. Change-Id: I8c8faa9fde57c1c49662d332a90bc8d9a0f4a2ce --- vp8/encoder/bitstream.c | 22 +++++++++++----------- vp8/encoder/encodeframe.c | 3 +-- vp8/encoder/onyx_if.c | 2 +- vp8/encoder/quantize.c | 6 +++--- vp8/encoder/ratectrl.c | 4 ++-- 5 files changed, 18 insertions(+), 19 deletions(-) diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index 8f94171e6..5c41ec8a1 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -90,17 +90,17 @@ static void update_mode( if (new_b + (n << 8) < old_b) { - int i = 0; + int j = 0; vp8_write_bit(w, 1); do { - const vp8_prob p = Pnew[i]; + const vp8_prob p = Pnew[j]; - vp8_write_literal(w, Pcur[i] = p ? p : 1, 8); + vp8_write_literal(w, Pcur[j] = p ? p : 1, 8); } - while (++i < n); + while (++j < n); } else vp8_write_bit(w, 0); @@ -245,15 +245,15 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount) if (L) { - const unsigned char *pp = b->prob; - int v = e >> 1; - int n = L; /* number of bits in v, assumed nonzero */ - int i = 0; + const unsigned char *proba = b->prob; + const int v2 = e >> 1; + int n2 = L; /* number of bits in v2, assumed nonzero */ + i = 0; do { - const int bb = (v >> --n) & 1; - split = 1 + (((range - 1) * pp[i>>1]) >> 8); + const int bb = (v2 >> --n2) & 1; + split = 1 + (((range - 1) * proba[i>>1]) >> 8); i = b->tree[i+bb]; if (bb) @@ -301,7 +301,7 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount) lowvalue <<= shift; } - while (n); + while (n2); } diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index d1b647be9..07bc33ced 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -852,11 +852,10 @@ void vp8_encode_frame(VP8_COMP *cpi) if (xd->segmentation_enabled) { - int i, j; + int j; if (xd->segmentation_enabled) { - for (i = 0; i < cpi->encoding_thread_count; i++) { for (j = 0; j < 4; j++) diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 555a9e4bc..60dd856f4 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -826,7 +826,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) { unsigned int sum = 0; unsigned int total_mbs = cm->MBs; - int i, thresh; + int thresh; unsigned int total_skip; int min = 2000; diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index 4e2fef793..fda997ff6 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -184,17 +184,17 @@ void vp8_strict_quantize_b_c(BLOCK *b, BLOCKD *d) for (i = 0; i < 16; i++) { int dq; - int round; + int rounding; /*TODO: These arrays should be stored in zig-zag order.*/ rc = vp8_default_zig_zag1d[i]; z = coeff_ptr[rc]; dq = dequant_ptr[rc]; - round = dq >> 1; + rounding = dq >> 1; /* Sign of z. */ sz = -(z < 0); x = (z + sz) ^ sz; - x += round; + x += rounding; if (x >= dq) { /* Quantize x. */ diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index 65fd0c5be..8e3c01d93 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -614,7 +614,6 @@ static void calc_gf_params(VP8_COMP *cpi) static void calc_pframe_target_size(VP8_COMP *cpi) { int min_frame_target; - int Adjustment; int old_per_frame_bandwidth = cpi->per_frame_bandwidth; if ( cpi->current_layer > 0) @@ -658,6 +657,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) /* 1 pass */ else { + int Adjustment; /* Make rate adjustment to recover bits spent in key frame * Test to see if the key frame inter data rate correction * should still be in force @@ -688,7 +688,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) */ if ((cpi->gf_overspend_bits > 0) && (cpi->this_frame_target > min_frame_target)) { - int Adjustment = (cpi->non_gf_bitrate_adjustment <= cpi->gf_overspend_bits) ? cpi->non_gf_bitrate_adjustment : cpi->gf_overspend_bits; + Adjustment = (cpi->non_gf_bitrate_adjustment <= cpi->gf_overspend_bits) ? cpi->non_gf_bitrate_adjustment : cpi->gf_overspend_bits; if (Adjustment > (cpi->this_frame_target - min_frame_target)) Adjustment = (cpi->this_frame_target - min_frame_target); From 5d79720d57a4356e5dacfdb66b4afce17dcd34f5 Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Thu, 14 Mar 2013 14:23:13 -0700 Subject: [PATCH 02/19] Removed shadow warnings : mcomp.c rdopt.c Adding -Wshadow to CFLAGS generated a bunch of warnings. This patch removes these warnings. Change-Id: Ib498de4b8652051d257cf86dcb40d2968a5013ae --- vp8/encoder/mcomp.c | 12 ++++++------ vp8/encoder/rdopt.c | 19 +++++++++---------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index a34af6428..038213007 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -233,7 +233,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #if ARCH_X86 || ARCH_X86_64 MACROBLOCKD *xd = &x->e_mbd; - unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; + unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; unsigned char *y; int buf_r1, buf_r2, buf_c1; @@ -244,7 +244,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, y_stride = 32; /* Copy to intermediate buffer before searching. */ - vfp->copymem(y0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2); + vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2); y = xd->y_buf + y_stride*buf_r1 +buf_c1; #else unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; @@ -375,12 +375,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #if ARCH_X86 || ARCH_X86_64 MACROBLOCKD *xd = &x->e_mbd; - unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; + unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; unsigned char *y; y_stride = 32; /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ - vfp->copymem(y0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); + vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); y = xd->y_buf + y_stride + 1; #else unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; @@ -686,12 +686,12 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #if ARCH_X86 || ARCH_X86_64 MACROBLOCKD *xd = &x->e_mbd; - unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; + unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; unsigned char *y; y_stride = 32; /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ - vfp->copymem(y0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); + vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); y = xd->y_buf + y_stride + 1; #else unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 3d60bebda..57114fb64 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -884,8 +884,8 @@ static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate, for (mode = DC_PRED; mode <= TM_PRED; mode++) { - int rate; - int distortion; + int this_rate; + int this_distortion; int this_rd; xd->mode_info_context->mbmi.uv_mode = mode; @@ -907,17 +907,17 @@ static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate, vp8_quantize_mbuv(x); rate_to = rd_cost_mbuv(x); - rate = rate_to + x->intra_uv_mode_cost[xd->frame_type][xd->mode_info_context->mbmi.uv_mode]; + this_rate = rate_to + x->intra_uv_mode_cost[xd->frame_type][xd->mode_info_context->mbmi.uv_mode]; - distortion = vp8_mbuverror(x) / 4; + this_distortion = vp8_mbuverror(x) / 4; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { best_rd = this_rd; - d = distortion; - r = rate; + d = this_distortion; + r = this_rate; *rate_tokenonly = rate_to; mode_selected = mode; } @@ -1294,12 +1294,11 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { - int distortion; + int disto; unsigned int sse; cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost, - &distortion, &sse); - + &disto, &sse); } } /* NEW4X4 */ From ae64e7b408b32a8c6c3ce94c1811b6c47d884a48 Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Thu, 14 Mar 2013 14:45:23 -0700 Subject: [PATCH 03/19] Removed shadow warnings : postproc.c decodframe.c threading.c and denoising.c Adding -Wshadow to CFLAGS generated a bunch of warnings. This patch removes these warnings. Change-Id: I434a9f4bfac9ad4ab7d2a67a35ef21e6636280da --- vp8/common/postproc.c | 77 ++++++++++++++++++++-------------------- vp8/decoder/decodframe.c | 6 ++-- vp8/decoder/threading.c | 1 - vp8/encoder/denoising.c | 2 -- 4 files changed, 41 insertions(+), 45 deletions(-) diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c index e40fb111c..0266f4c09 100644 --- a/vp8/common/postproc.c +++ b/vp8/common/postproc.c @@ -439,29 +439,28 @@ static void fillrd(struct postproc_state *state, int q, int a) char char_dist[300]; double sigma; - int ai = a, qi = q, i; + int i; vp8_clear_system_state(); - sigma = ai + .5 + .6 * (63 - qi) / 63.0; + sigma = a + .5 + .6 * (63 - q) / 63.0; /* set up a lookup table of 256 entries that matches * a gaussian distribution with sigma determined by q. */ { - double i; int next, j; next = 0; for (i = -32; i < 32; i++) { - int a = (int)(.5 + 256 * vp8_gaussian(sigma, 0, i)); + const int v = (int)(.5 + 256 * vp8_gaussian(sigma, 0, i)); - if (a) + if (v) { - for (j = 0; j < a; j++) + for (j = 0; j < v; j++) { char_dist[next+j] = (char) i; } @@ -544,12 +543,12 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise, * filled with the same color block. */ void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v, - int y1, int u1, int v1, int alpha, int stride) + int y_1, int u_1, int v_1, int alpha, int stride) { int i, j; - int y1_const = y1*((1<<16)-alpha); - int u1_const = u1*((1<<16)-alpha); - int v1_const = v1*((1<<16)-alpha); + int y1_const = y_1*((1<<16)-alpha); + int u1_const = u_1*((1<<16)-alpha); + int v1_const = v_1*((1<<16)-alpha); y += 2*stride + 2; for (i = 0; i < 12; i++) @@ -582,12 +581,12 @@ void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v, * unblended to allow for other visualizations to be layered. */ void vp8_blend_mb_outer_c (unsigned char *y, unsigned char *u, unsigned char *v, - int y1, int u1, int v1, int alpha, int stride) + int y_1, int u_1, int v_1, int alpha, int stride) { int i, j; - int y1_const = y1*((1<<16)-alpha); - int u1_const = u1*((1<<16)-alpha); - int v1_const = v1*((1<<16)-alpha); + int y1_const = y_1*((1<<16)-alpha); + int u1_const = u_1*((1<<16)-alpha); + int v1_const = v_1*((1<<16)-alpha); for (i = 0; i < 2; i++) { @@ -646,12 +645,12 @@ void vp8_blend_mb_outer_c (unsigned char *y, unsigned char *u, unsigned char *v, } void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v, - int y1, int u1, int v1, int alpha, int stride) + int y_1, int u_1, int v_1, int alpha, int stride) { int i, j; - int y1_const = y1*((1<<16)-alpha); - int u1_const = u1*((1<<16)-alpha); - int v1_const = v1*((1<<16)-alpha); + int y1_const = y_1*((1<<16)-alpha); + int u1_const = u_1*((1<<16)-alpha); + int v1_const = v_1*((1<<16)-alpha); for (i = 0; i < 4; i++) { @@ -676,46 +675,46 @@ void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v, } } -static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height) +static void constrain_line (int x_0, int *x_1, int y_0, int *y_1, int width, int height) { int dx; int dy; - if (*x1 > width) + if (*x_1 > width) { - dx = *x1 - x0; - dy = *y1 - y0; + dx = *x_1 - x_0; + dy = *y_1 - y_0; - *x1 = width; + *x_1 = width; if (dx) - *y1 = ((width-x0)*dy)/dx + y0; + *y_1 = ((width-x_0)*dy)/dx + y_0; } - if (*x1 < 0) + if (*x_1 < 0) { - dx = *x1 - x0; - dy = *y1 - y0; + dx = *x_1 - x_0; + dy = *y_1 - y_0; - *x1 = 0; + *x_1 = 0; if (dx) - *y1 = ((0-x0)*dy)/dx + y0; + *y_1 = ((0-x_0)*dy)/dx + y_0; } - if (*y1 > height) + if (*y_1 > height) { - dx = *x1 - x0; - dy = *y1 - y0; + dx = *x_1 - x_0; + dy = *y_1 - y_0; - *y1 = height; + *y_1 = height; if (dy) - *x1 = ((height-y0)*dx)/dy + x0; + *x_1 = ((height-y_0)*dx)/dy + x_0; } - if (*y1 < 0) + if (*y_1 < 0) { - dx = *x1 - x0; - dy = *y1 - y0; + dx = *x_1 - x_0; + dy = *y_1 - y_0; - *y1 = 0; + *y_1 = 0; if (dy) - *x1 = ((0-y0)*dx)/dy + x0; + *x_1 = ((0-y_0)*dx)/dy + x_0; } } diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 6f8282a64..a6b193b07 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -1335,11 +1335,11 @@ int vp8_decode_frame(VP8D_COMP *pbi) #if CONFIG_MULTITHREAD if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) { - unsigned int i; + unsigned int thread; vp8mt_decode_mb_rows(pbi, xd); vp8_yv12_extend_frame_borders(yv12_fb_new); - for (i = 0; i < pbi->decoding_thread_count; ++i) - corrupt_tokens |= pbi->mb_row_di[i].mbd.corrupted; + for (thread = 0; thread < pbi->decoding_thread_count; ++thread) + corrupt_tokens |= pbi->mb_row_di[thread].mbd.corrupted; } else #endif diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index 73f9a8356..73031898e 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -343,7 +343,6 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) for (mb_row = start_mb_row; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) { - int i; int recon_yoffset, recon_uvoffset; int mb_col; int filter_level; diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c index 1ee1cb59f..781926547 100644 --- a/vp8/encoder/denoising.c +++ b/vp8/encoder/denoising.c @@ -206,8 +206,6 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, MB_MODE_INFO saved_mbmi; MACROBLOCKD *filter_xd = &x->e_mbd; MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi; - int mv_col; - int mv_row; int sse_diff = zero_mv_sse - best_sse; saved_mbmi = *mbmi; From c4195e0eb8671fcbc88be59c117e74dfe0dac402 Mon Sep 17 00:00:00 2001 From: James Zern Date: Thu, 4 Apr 2013 19:00:31 -0700 Subject: [PATCH 04/19] tests: use a portable rand() implementation the one from gtest in this case: testing::internal::Random. this will make the tests deterministic between platforms. addresses issue #568. Change-Id: I5a8a92f5c33f52cb0a219c1dd3d02335acbbf163 --- test/acm_random.h | 22 ++++++++++++---------- test/fdct8x8_test.cc | 28 ++++++++++++++++++---------- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/test/acm_random.h b/test/acm_random.h index 514894eda..13903c66a 100644 --- a/test/acm_random.h +++ b/test/acm_random.h @@ -11,7 +11,7 @@ #ifndef LIBVPX_TEST_ACM_RANDOM_H_ #define LIBVPX_TEST_ACM_RANDOM_H_ -#include +#include "third_party/googletest/src/include/gtest/gtest.h" #include "vpx/vpx_integer.h" @@ -19,24 +19,23 @@ namespace libvpx_test { class ACMRandom { public: - ACMRandom() { - Reset(DeterministicSeed()); - } + ACMRandom() : random_(DeterministicSeed()) {} - explicit ACMRandom(int seed) { - Reset(seed); - } + explicit ACMRandom(int seed) : random_(seed) {} void Reset(int seed) { - srand(seed); + random_.Reseed(seed); } uint8_t Rand8(void) { - return (rand() >> 8) & 0xff; + const uint32_t value = + random_.Generate(testing::internal::Random::kMaxRange); + // There's a bit more entropy in the upper bits of this implementation. + return (value >> 24) & 0xff; } int PseudoUniform(int range) { - return (rand() >> 8) % range; + return random_.Generate(range); } int operator()(int n) { @@ -46,6 +45,9 @@ class ACMRandom { static int DeterministicSeed(void) { return 0xbaba; } + + private: + testing::internal::Random random_; }; } // namespace libvpx_test diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc index d82f7c3bd..5967d36c4 100644 --- a/test/fdct8x8_test.cc +++ b/test/fdct8x8_test.cc @@ -51,11 +51,15 @@ TEST(VP9Fdct8x8Test, SignBiasCheck) { } for (int j = 0; j < 64; ++j) { - const bool bias_acceptable = (abs(count_sign_block[j][0] - - count_sign_block[j][1]) < 1000); - EXPECT_TRUE(bias_acceptable) - << "Error: 8x8 FDCT has a sign bias > 1%" - << " for input range [-255, 255] at index " << j; + const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]); + const int max_diff = 1125; + EXPECT_LT(diff, max_diff) + << "Error: 8x8 FDCT has a sign bias > " + << 1. * max_diff / count_test_block * 100 << "%" + << " for input range [-255, 255] at index " << j + << " count0: " << count_sign_block[j][0] + << " count1: " << count_sign_block[j][1] + << " diff: " << diff; } memset(count_sign_block, 0, sizeof(count_sign_block)); @@ -76,11 +80,15 @@ TEST(VP9Fdct8x8Test, SignBiasCheck) { } for (int j = 0; j < 64; ++j) { - const bool bias_acceptable = (abs(count_sign_block[j][0] - - count_sign_block[j][1]) < 10000); - EXPECT_TRUE(bias_acceptable) - << "Error: 8x8 FDCT has a sign bias > 10%" - << " for input range [-15, 15] at index " << j; + const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]); + const int max_diff = 10000; + EXPECT_LT(diff, max_diff) + << "Error: 4x4 FDCT has a sign bias > " + << 1. * max_diff / count_test_block * 100 << "%" + << " for input range [-15, 15] at index " << j + << " count0: " << count_sign_block[j][0] + << " count1: " << count_sign_block[j][1] + << " diff: " << diff; } }; From 8b4b28a5ea877a2d5c3b818f614b6b0d5205cea5 Mon Sep 17 00:00:00 2001 From: James Zern Date: Fri, 5 Apr 2013 11:56:54 -0700 Subject: [PATCH 05/19] fix make test invocation for msvc win64 Change-Id: If5d4b7ffa67223ed72b53a6c9b9e42b4de5718f2 --- build/make/configure.sh | 2 ++ libs.mk | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/build/make/configure.sh b/build/make/configure.sh index 1ac303525..050ae57a7 100755 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -1087,10 +1087,12 @@ EOF win32) add_asflags -f win32 enabled debug && add_asflags -g cv8 + EXE_SFX=.exe ;; win64) add_asflags -f x64 enabled debug && add_asflags -g cv8 + EXE_SFX=.exe ;; linux*|solaris*|android*) add_asflags -f elf${bits} diff --git a/libs.mk b/libs.mk index 872a16bae..2281bd059 100644 --- a/libs.mk +++ b/libs.mk @@ -436,7 +436,7 @@ test_libvpx.vcproj: $(LIBVPX_TEST_SRCS) PROJECTS-$(CONFIG_MSVS) += test_libvpx.vcproj test:: testdata - @set -e; for t in $(addprefix Win32/Release/,$(notdir $(LIBVPX_TEST_BINS:.cc=.exe))); do $$t; done + @set -e; for t in $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BINS:.cc=.exe))); do $$t; done endif else From 282c963923eb969c146d63e934bbece433a95282 Mon Sep 17 00:00:00 2001 From: Marco Paniconi Date: Thu, 11 Apr 2013 18:19:18 -0700 Subject: [PATCH 06/19] Fix for multi-res-encoding: Use local variable for setting the improved prediction mode. cpi->sf.improved_mv_pred is set/fixed at the frame level and should not be changed inside pick_inter_mode. Change-Id: Ie28d9171ac000e631af0e30204970e3d4fff3078 --- vp8/encoder/pickinter.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 4c2527d68..c5279fed2 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -594,6 +594,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX; #endif + int sf_improved_mv_pred = cpi->sf.improved_mv_pred; int_mv mvp; int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; @@ -882,7 +883,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, last frame motion info is not stored, then we can not use improved_mv_pred. */ if (cpi->oxcf.mr_encoder_id && !parent_ref_valid) - cpi->sf.improved_mv_pred = 0; + sf_improved_mv_pred = 0; if (parent_ref_valid && parent_ref_frame) { @@ -899,7 +900,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, }else #endif { - if(cpi->sf.improved_mv_pred) + if(sf_improved_mv_pred) { if(!saddone) { From 6c3f06a4d78973dafbdfe6effe7f3fd3fdc8c643 Mon Sep 17 00:00:00 2001 From: Johann Date: Mon, 15 Apr 2013 12:19:06 -0700 Subject: [PATCH 07/19] Include RTCD header in encodeframe.c The file uses functions defined in vp8_rtcd.h but did not include the header. Change-Id: I110196ddc9181e533be1fe656e21c1791cabe226 --- vp8/encoder/encodeframe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index d1b647be9..d17ed370b 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -10,6 +10,7 @@ #include "vpx_config.h" +#include "vp8_rtcd.h" #include "encodemb.h" #include "encodemv.h" #include "vp8/common/common.h" From 94649bc0ef85813dd42834a3df985753864b9be6 Mon Sep 17 00:00:00 2001 From: Jim Bankoski Date: Tue, 16 Apr 2013 14:49:30 -0700 Subject: [PATCH 08/19] set up a speed 1 slightly worse results for faster encodes Change-Id: Ic5b38fcde7a2e334c4724e125b558bcb97783af6 --- vp9/encoder/vp9_rdopt.c | 136 ++++++++++++++++++++++++++-------------- 1 file changed, 89 insertions(+), 47 deletions(-) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 0083e8ae1..7434e5cdd 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -290,7 +290,6 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { } else { cpi->rd_threshes[i] = INT_MAX; } - cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; } } else { @@ -302,7 +301,6 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { } else { cpi->rd_threshes[i] = INT_MAX; } - cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i]; } } @@ -4319,6 +4317,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) && this_mode != ZEROMV) continue; + if (mbmi->second_ref_frame > 0 && (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 || yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) && @@ -5204,6 +5203,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_pred_diff[NB_PREDICTION_TYPES]; int64_t best_pred_rd[NB_PREDICTION_TYPES]; MB_MODE_INFO best_mbmode; + int j; int mode_index, best_mode_index = 0; unsigned int ref_costs[MAX_REF_FRAMES]; #if CONFIG_COMP_INTERINTRA_PRED @@ -5225,6 +5225,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int dist_uv_16x16 = 0, uv_skip_16x16 = 0; MB_PREDICTION_MODE mode_uv_16x16 = NEARESTMV; struct scale_factors scale_factor[4]; + unsigned int ref_frame_mask = 0; + unsigned int mode_mask = 0; xd->mode_info_context->mbmi.segment_id = segment_id; estimate_ref_frame_costs(cpi, segment_id, ref_costs); @@ -5235,58 +5237,87 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < NB_TXFM_MODES; i++) best_txfm_rd[i] = INT64_MAX; + // Create a mask set to 1 for each frame used by a smaller resolution.p + if (cpi->Speed > 0) { + switch (block_size) { + case BLOCK_64X64: + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + ref_frame_mask |= (1 << x->mb_context[i][j].mic.mbmi.ref_frame); + mode_mask |= (1 << x->mb_context[i][j].mic.mbmi.mode); + } + } + for (i = 0; i < 4; i++) { + ref_frame_mask |= (1 << x->sb32_context[i].mic.mbmi.ref_frame); + mode_mask |= (1 << x->sb32_context[i].mic.mbmi.mode); + } + break; + case BLOCK_32X32: + for (i = 0; i < 4; i++) { + ref_frame_mask |= (1 + << x->mb_context[xd->sb_index][i].mic.mbmi.ref_frame); + mode_mask |= (1 << x->mb_context[xd->sb_index][i].mic.mbmi.mode); + } + break; + } + } + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size, - mb_row, mb_col, frame_mv[NEARESTMV], - frame_mv[NEARMV], frame_mdcounts, - yv12_mb, scale_factor); + mb_row, mb_col, frame_mv[NEARESTMV], frame_mv[NEARMV], + frame_mdcounts, yv12_mb, scale_factor); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; } - - if (block_size == BLOCK_64X64) { - mbmi->mode = DC_PRED; - if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) { - mbmi->txfm_size = TX_4X4; - rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_4x4, &rate_uv_tokenonly_4x4, + // Disallow intra if none of the smaller prediction sizes used intra and + // speed > 0 ; + if (cpi->Speed == 0 + || ( cpi->Speed > 0 && (ref_frame_mask & (1 << INTRA_FRAME)))) { + if (block_size == BLOCK_64X64) { + mbmi->mode = DC_PRED; + if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) { + mbmi->txfm_size = TX_4X4; + rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_4x4, &rate_uv_tokenonly_4x4, + &dist_uv_4x4, &uv_skip_4x4); + mode_uv_4x4 = mbmi->uv_mode; + } + if (cm->txfm_mode != ONLY_4X4) { + mbmi->txfm_size = TX_8X8; + rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_8x8, &rate_uv_tokenonly_8x8, + &dist_uv_8x8, &uv_skip_8x8); + mode_uv_8x8 = mbmi->uv_mode; + } + if (cm->txfm_mode >= ALLOW_32X32) { + mbmi->txfm_size = TX_32X32; + rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_16x16, + &rate_uv_tokenonly_16x16, &dist_uv_16x16, + &uv_skip_16x16); + mode_uv_16x16 = mbmi->uv_mode; + } + } else { + assert(block_size == BLOCK_32X32); + mbmi->mode = DC_PRED; + if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) { + mbmi->txfm_size = TX_4X4; + rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_4x4, &rate_uv_tokenonly_4x4, &dist_uv_4x4, &uv_skip_4x4); - mode_uv_4x4 = mbmi->uv_mode; - } - if (cm->txfm_mode != ONLY_4X4) { - mbmi->txfm_size = TX_8X8; - rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_8x8, &rate_uv_tokenonly_8x8, + mode_uv_4x4 = mbmi->uv_mode; + } + if (cm->txfm_mode != ONLY_4X4) { + mbmi->txfm_size = TX_8X8; + rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_8x8, &rate_uv_tokenonly_8x8, &dist_uv_8x8, &uv_skip_8x8); - mode_uv_8x8 = mbmi->uv_mode; - } - if (cm->txfm_mode >= ALLOW_32X32) { - mbmi->txfm_size = TX_32X32; - rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_16x16, - &rate_uv_tokenonly_16x16, - &dist_uv_16x16, &uv_skip_16x16); - mode_uv_16x16 = mbmi->uv_mode; - } - } else { - assert(block_size == BLOCK_32X32); - mbmi->mode = DC_PRED; - if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) { - mbmi->txfm_size = TX_4X4; - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_4x4, &rate_uv_tokenonly_4x4, - &dist_uv_4x4, &uv_skip_4x4); - mode_uv_4x4 = mbmi->uv_mode; - } - if (cm->txfm_mode != ONLY_4X4) { - mbmi->txfm_size = TX_8X8; - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_8x8, &rate_uv_tokenonly_8x8, - &dist_uv_8x8, &uv_skip_8x8); - mode_uv_8x8 = mbmi->uv_mode; - } - if (cm->txfm_mode >= ALLOW_32X32) { - mbmi->txfm_size = TX_32X32; - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_16x16, &rate_uv_tokenonly_16x16, - &dist_uv_16x16, &uv_skip_16x16); - mode_uv_16x16 = mbmi->uv_mode; + mode_uv_8x8 = mbmi->uv_mode; + } + if (cm->txfm_mode >= ALLOW_32X32) { + mbmi->txfm_size = TX_32X32; + rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_16x16, + &rate_uv_tokenonly_16x16, &dist_uv_16x16, + &uv_skip_16x16); + mode_uv_16x16 = mbmi->uv_mode; + } } } @@ -5313,10 +5344,21 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, x->skip = 0; this_mode = vp9_mode_order[mode_index].mode; ref_frame = vp9_mode_order[mode_index].ref_frame; - if (!(ref_frame == INTRA_FRAME || - (cpi->ref_frame_flags & flag_list[ref_frame]))) { + if (!(ref_frame == INTRA_FRAME + || (cpi->ref_frame_flags & flag_list[ref_frame]))) { continue; } + if (cpi->Speed > 0) { + if (!(ref_frame_mask & (1 << ref_frame))) { + continue; + } + if (vp9_mode_order[mode_index].second_ref_frame != NONE + && !(ref_frame_mask + & (1 << vp9_mode_order[mode_index].second_ref_frame))) { + continue; + } + } + mbmi->ref_frame = ref_frame; mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, From 5b6d33f9afee6da7a370cdd94aae4ac85c8588c9 Mon Sep 17 00:00:00 2001 From: Christian Duvivier Date: Mon, 25 Mar 2013 16:18:38 -0700 Subject: [PATCH 09/19] Faster vp9_short_fdct4x4 and vp9_short_fdct8x4. Scalar path is about 1.3x faster (2.1% overall encoder speedup). SSE2 path is about 5.0x faster (8.4% overall encoder speedup). Change-Id: I360d167b5ad6f387bba00406129323e2fe6e7dda --- vp9/common/vp9_rtcd_defs.sh | 4 +- vp9/encoder/vp9_dct.c | 82 ++-- vp9/encoder/x86/vp9_dct_sse2.asm | 432 ---------------------- vp9/encoder/x86/vp9_dct_sse2_intrinsics.c | 105 ++++++ vp9/vp9cx.mk | 1 - 5 files changed, 167 insertions(+), 457 deletions(-) delete mode 100644 vp9/encoder/x86/vp9_dct_sse2.asm diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 8b6efc384..46495cb11 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -645,10 +645,10 @@ prototype void vp9_short_fdct8x8 "int16_t *InputData, int16_t *OutputData, int p specialize vp9_short_fdct8x8 sse2 prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch" -specialize vp9_short_fdct4x4 +specialize vp9_short_fdct4x4 sse2 prototype void vp9_short_fdct8x4 "int16_t *InputData, int16_t *OutputData, int pitch" -specialize vp9_short_fdct8x4 +specialize vp9_short_fdct8x4 sse2 prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int pitch" specialize vp9_short_fdct32x32 diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index aeef9c6df..ebf40e4e6 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -37,30 +37,68 @@ static void fdct4_1d(int16_t *input, int16_t *output) { } void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) { - int16_t out[4 * 4]; - int16_t *outptr = &out[0]; - const int short_pitch = pitch >> 1; - int i, j; - int16_t temp_in[4], temp_out[4]; - - // Columns - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = input[j * short_pitch + i] << 4; - if (i == 0 && temp_in[0]) - temp_in[0] += 1; - fdct4_1d(temp_in, temp_out); - for (j = 0; j < 4; ++j) - outptr[j * 4 + i] = temp_out[j]; + // The 2D transform is done with two passes which are actually pretty + // similar. In the first one, we transform the columns and transpose + // the results. In the second one, we transform the rows. To achieve that, + // as the first pass results are transposed, we tranpose the columns (that + // is the transposed rows) and transpose the results (so that it goes back + // in normal/row positions). + const int stride = pitch >> 1; + int pass; + // We need an intermediate buffer between passes. + int16_t intermediate[4 * 4]; + int16_t *in = input; + int16_t *out = intermediate; + // Do the two transform/transpose passes + for (pass = 0; pass < 2; ++pass) { + /*canbe16*/ int input[4]; + /*canbe16*/ int step[4]; + /*needs32*/ int temp1, temp2; + int i; + for (i = 0; i < 4; ++i) { + // Load inputs. + if (0 == pass) { + input[0] = in[0 * stride] << 4; + input[1] = in[1 * stride] << 4; + input[2] = in[2 * stride] << 4; + input[3] = in[3 * stride] << 4; + if (i == 0 && input[0]) { + input[0] += 1; + } + } else { + input[0] = in[0 * 4]; + input[1] = in[1 * 4]; + input[2] = in[2 * 4]; + input[3] = in[3 * 4]; + } + // Transform. + step[0] = input[0] + input[3]; + step[1] = input[1] + input[2]; + step[2] = input[1] - input[2]; + step[3] = input[0] - input[3]; + temp1 = (step[0] + step[1]) * cospi_16_64; + temp2 = (step[0] - step[1]) * cospi_16_64; + out[0] = dct_const_round_shift(temp1); + out[2] = dct_const_round_shift(temp2); + temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; + temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; + out[1] = dct_const_round_shift(temp1); + out[3] = dct_const_round_shift(temp2); + // Do next column (which is a transposed row in second/horizontal pass) + in++; + out += 4; + } + // Setup in/out for next pass. + in = intermediate; + out = output; } - // Rows - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j + i * 4]; - fdct4_1d(temp_in, temp_out); - for (j = 0; j < 4; ++j) - output[j + i * 4] = (temp_out[j] + 1) >> 2; + { + int i, j; + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + output[j + i * 4] = (output[j + i * 4] + 1) >> 2; + } } } diff --git a/vp9/encoder/x86/vp9_dct_sse2.asm b/vp9/encoder/x86/vp9_dct_sse2.asm deleted file mode 100644 index bbd6086da..000000000 --- a/vp9/encoder/x86/vp9_dct_sse2.asm +++ /dev/null @@ -1,432 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%macro STACK_FRAME_CREATE 0 -%if ABI_IS_32BIT - %define input rsi - %define output rdi - %define pitch rax - push rbp - mov rbp, rsp - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) - mov rdi, arg(1) - - movsxd rax, dword ptr arg(2) - lea rcx, [rsi + rax*2] -%else - %if LIBVPX_YASM_WIN64 - %define input rcx - %define output rdx - %define pitch r8 - SAVE_XMM 7, u - %else - %define input rdi - %define output rsi - %define pitch rdx - %endif -%endif -%endmacro - -%macro STACK_FRAME_DESTROY 0 - %define input - %define output - %define pitch - -%if ABI_IS_32BIT - pop rdi - pop rsi - RESTORE_GOT - pop rbp -%else - %if LIBVPX_YASM_WIN64 - RESTORE_XMM - %endif -%endif - ret -%endmacro - -;void vp9_short_fdct4x4_sse2(short *input, short *output, int pitch) -global sym(vp9_short_fdct4x4_sse2) PRIVATE -sym(vp9_short_fdct4x4_sse2): - - STACK_FRAME_CREATE - - movq xmm0, MMWORD PTR[input ] ;03 02 01 00 - movq xmm2, MMWORD PTR[input+ pitch] ;13 12 11 10 - lea input, [input+2*pitch] - movq xmm1, MMWORD PTR[input ] ;23 22 21 20 - movq xmm3, MMWORD PTR[input+ pitch] ;33 32 31 30 - - punpcklqdq xmm0, xmm2 ;13 12 11 10 03 02 01 00 - punpcklqdq xmm1, xmm3 ;33 32 31 30 23 22 21 20 - - movdqa xmm2, xmm0 - punpckldq xmm0, xmm1 ;23 22 03 02 21 20 01 00 - punpckhdq xmm2, xmm1 ;33 32 13 12 31 30 11 10 - movdqa xmm1, xmm0 - punpckldq xmm0, xmm2 ;31 21 30 20 11 10 01 00 - pshufhw xmm1, xmm1, 0b1h ;22 23 02 03 xx xx xx xx - pshufhw xmm2, xmm2, 0b1h ;32 33 12 13 xx xx xx xx - - punpckhdq xmm1, xmm2 ;32 33 22 23 12 13 02 03 - movdqa xmm3, xmm0 - paddw xmm0, xmm1 ;b1 a1 b1 a1 b1 a1 b1 a1 - psubw xmm3, xmm1 ;c1 d1 c1 d1 c1 d1 c1 d1 - psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3 - psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3 - - movdqa xmm1, xmm0 - pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 - pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 - movdqa xmm4, xmm3 - pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352 - pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)];d1*2217 - c1*5352 - - paddd xmm3, XMMWORD PTR[GLOBAL(_14500)] - paddd xmm4, XMMWORD PTR[GLOBAL(_7500)] - psrad xmm3, 12 ;(c1 * 2217 + d1 * 5352 + 14500)>>12 - psrad xmm4, 12 ;(d1 * 2217 - c1 * 5352 + 7500)>>12 - - packssdw xmm0, xmm1 ;op[2] op[0] - packssdw xmm3, xmm4 ;op[3] op[1] - ; 23 22 21 20 03 02 01 00 - ; - ; 33 32 31 30 13 12 11 10 - ; - movdqa xmm2, xmm0 - punpcklqdq xmm0, xmm3 ;13 12 11 10 03 02 01 00 - punpckhqdq xmm2, xmm3 ;23 22 21 20 33 32 31 30 - - movdqa xmm3, xmm0 - punpcklwd xmm0, xmm2 ;32 30 22 20 12 10 02 00 - punpckhwd xmm3, xmm2 ;33 31 23 21 13 11 03 01 - movdqa xmm2, xmm0 - punpcklwd xmm0, xmm3 ;13 12 11 10 03 02 01 00 - punpckhwd xmm2, xmm3 ;33 32 31 30 23 22 21 20 - - movdqa xmm5, XMMWORD PTR[GLOBAL(_7)] - pshufd xmm2, xmm2, 04eh - movdqa xmm3, xmm0 - paddw xmm0, xmm2 ;b1 b1 b1 b1 a1 a1 a1 a1 - psubw xmm3, xmm2 ;c1 c1 c1 c1 d1 d1 d1 d1 - - pshufd xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 b1 a1 a1 - movdqa xmm2, xmm3 ;save d1 for compare - pshufd xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 c1 d1 d1 - pshuflw xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 a1 b1 a1 - pshuflw xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 d1 c1 d1 - pshufhw xmm0, xmm0, 0d8h ;b1 a1 b1 a1 b1 a1 b1 a1 - pshufhw xmm3, xmm3, 0d8h ;c1 d1 c1 d1 c1 d1 c1 d1 - movdqa xmm1, xmm0 - pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 - pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 - - pxor xmm4, xmm4 ;zero out for compare - paddd xmm0, xmm5 - paddd xmm1, xmm5 - pcmpeqw xmm2, xmm4 - psrad xmm0, 4 ;(a1 + b1 + 7)>>4 - psrad xmm1, 4 ;(a1 - b1 + 7)>>4 - pandn xmm2, XMMWORD PTR[GLOBAL(_cmp_mask)] ;clear upper, - ;and keep bit 0 of lower - - movdqa xmm4, xmm3 - pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352 - pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)] ;d1*2217 - c1*5352 - paddd xmm3, XMMWORD PTR[GLOBAL(_12000)] - paddd xmm4, XMMWORD PTR[GLOBAL(_51000)] - packssdw xmm0, xmm1 ;op[8] op[0] - psrad xmm3, 16 ;(c1 * 2217 + d1 * 5352 + 12000)>>16 - psrad xmm4, 16 ;(d1 * 2217 - c1 * 5352 + 51000)>>16 - - packssdw xmm3, xmm4 ;op[12] op[4] - movdqa xmm1, xmm0 - paddw xmm3, xmm2 ;op[4] += (d1!=0) - punpcklqdq xmm0, xmm3 ;op[4] op[0] - punpckhqdq xmm1, xmm3 ;op[12] op[8] - - movdqa XMMWORD PTR[output + 0], xmm0 - movdqa XMMWORD PTR[output + 16], xmm1 - - STACK_FRAME_DESTROY - -;void vp9_short_fdct8x4_sse2(short *input, short *output, int pitch) -global sym(vp9_short_fdct8x4_sse2) PRIVATE -sym(vp9_short_fdct8x4_sse2): - - STACK_FRAME_CREATE - - ; read the input data - movdqa xmm0, [input ] - movdqa xmm2, [input+ pitch] - lea input, [input+2*pitch] - movdqa xmm4, [input ] - movdqa xmm3, [input+ pitch] - - ; transpose for the first stage - movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07 - movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27 - - punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13 - punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17 - - punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33 - punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37 - - movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13 - punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31 - - punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33 - - movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17 - punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35 - - punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37 - movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33 - - punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37 - punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36 - - movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31 - punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34 - - punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35 - - ; xmm0 0 - ; xmm1 1 - ; xmm2 2 - ; xmm3 3 - - ; first stage - movdqa xmm5, xmm0 - movdqa xmm4, xmm1 - - paddw xmm0, xmm3 ; a1 = 0 + 3 - paddw xmm1, xmm2 ; b1 = 1 + 2 - - psubw xmm4, xmm2 ; c1 = 1 - 2 - psubw xmm5, xmm3 ; d1 = 0 - 3 - - psllw xmm5, 3 - psllw xmm4, 3 - - psllw xmm0, 3 - psllw xmm1, 3 - - ; output 0 and 2 - movdqa xmm2, xmm0 ; a1 - - paddw xmm0, xmm1 ; op[0] = a1 + b1 - psubw xmm2, xmm1 ; op[2] = a1 - b1 - - ; output 1 and 3 - ; interleave c1, d1 - movdqa xmm1, xmm5 ; d1 - punpcklwd xmm1, xmm4 ; c1 d1 - punpckhwd xmm5, xmm4 ; c1 d1 - - movdqa xmm3, xmm1 - movdqa xmm4, xmm5 - - pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - - pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - - paddd xmm1, XMMWORD PTR[GLOBAL(_14500)] - paddd xmm4, XMMWORD PTR[GLOBAL(_14500)] - paddd xmm3, XMMWORD PTR[GLOBAL(_7500)] - paddd xmm5, XMMWORD PTR[GLOBAL(_7500)] - - psrad xmm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 - psrad xmm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 - psrad xmm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 - psrad xmm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 - - packssdw xmm1, xmm4 ; op[1] - packssdw xmm3, xmm5 ; op[3] - - ; done with vertical - ; transpose for the second stage - movdqa xmm4, xmm0 ; 00 10 20 30 04 14 24 34 - movdqa xmm5, xmm2 ; 02 12 22 32 06 16 26 36 - - punpcklwd xmm0, xmm1 ; 00 01 10 11 20 21 30 31 - punpckhwd xmm4, xmm1 ; 04 05 14 15 24 25 34 35 - - punpcklwd xmm2, xmm3 ; 02 03 12 13 22 23 32 33 - punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37 - - movdqa xmm1, xmm0 ; 00 01 10 11 20 21 30 31 - punpckldq xmm0, xmm2 ; 00 01 02 03 10 11 12 13 - - punpckhdq xmm1, xmm2 ; 20 21 22 23 30 31 32 33 - - movdqa xmm2, xmm4 ; 04 05 14 15 24 25 34 35 - punpckldq xmm2, xmm5 ; 04 05 06 07 14 15 16 17 - - punpckhdq xmm4, xmm5 ; 24 25 26 27 34 35 36 37 - movdqa xmm3, xmm1 ; 20 21 22 23 30 31 32 33 - - punpckhqdq xmm3, xmm4 ; 30 31 32 33 34 35 36 37 - punpcklqdq xmm1, xmm4 ; 20 21 22 23 24 25 26 27 - - movdqa xmm4, xmm0 ; 00 01 02 03 10 11 12 13 - punpcklqdq xmm0, xmm2 ; 00 01 02 03 04 05 06 07 - - punpckhqdq xmm4, xmm2 ; 10 11 12 13 14 15 16 17 - - ; xmm0 0 - ; xmm1 4 - ; xmm2 1 - ; xmm3 3 - - movdqa xmm5, xmm0 - movdqa xmm2, xmm1 - - paddw xmm0, xmm3 ; a1 = 0 + 3 - paddw xmm1, xmm4 ; b1 = 1 + 2 - - psubw xmm4, xmm2 ; c1 = 1 - 2 - psubw xmm5, xmm3 ; d1 = 0 - 3 - - pxor xmm6, xmm6 ; zero out for compare - - pcmpeqw xmm6, xmm5 ; d1 != 0 - - pandn xmm6, XMMWORD PTR[GLOBAL(_cmp_mask8x4)] ; clear upper, - ; and keep bit 0 of lower - - ; output 0 and 2 - movdqa xmm2, xmm0 ; a1 - - paddw xmm0, xmm1 ; a1 + b1 - psubw xmm2, xmm1 ; a1 - b1 - - paddw xmm0, XMMWORD PTR[GLOBAL(_7w)] - paddw xmm2, XMMWORD PTR[GLOBAL(_7w)] - - psraw xmm0, 4 ; op[0] = (a1 + b1 + 7)>>4 - psraw xmm2, 4 ; op[8] = (a1 - b1 + 7)>>4 - - ; output 1 and 3 - ; interleave c1, d1 - movdqa xmm1, xmm5 ; d1 - punpcklwd xmm1, xmm4 ; c1 d1 - punpckhwd xmm5, xmm4 ; c1 d1 - - movdqa xmm3, xmm1 - movdqa xmm4, xmm5 - - pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - - pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - - paddd xmm1, XMMWORD PTR[GLOBAL(_12000)] - paddd xmm4, XMMWORD PTR[GLOBAL(_12000)] - paddd xmm3, XMMWORD PTR[GLOBAL(_51000)] - paddd xmm5, XMMWORD PTR[GLOBAL(_51000)] - - psrad xmm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 - psrad xmm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 - psrad xmm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 - psrad xmm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 - - packssdw xmm1, xmm4 ; op[4] - packssdw xmm3, xmm5 ; op[12] - - paddw xmm1, xmm6 ; op[4] += (d1!=0) - - movdqa xmm4, xmm0 - movdqa xmm5, xmm2 - - punpcklqdq xmm0, xmm1 - punpckhqdq xmm4, xmm1 - - punpcklqdq xmm2, xmm3 - punpckhqdq xmm5, xmm3 - - movdqa XMMWORD PTR[output + 0 ], xmm0 - movdqa XMMWORD PTR[output + 16], xmm2 - movdqa XMMWORD PTR[output + 32], xmm4 - movdqa XMMWORD PTR[output + 48], xmm5 - - STACK_FRAME_DESTROY - -SECTION_RODATA -align 16 -_5352_2217: - dw 5352 - dw 2217 - dw 5352 - dw 2217 - dw 5352 - dw 2217 - dw 5352 - dw 2217 -align 16 -_2217_neg5352: - dw 2217 - dw -5352 - dw 2217 - dw -5352 - dw 2217 - dw -5352 - dw 2217 - dw -5352 -align 16 -_mult_add: - times 8 dw 1 -align 16 -_cmp_mask: - times 4 dw 1 - times 4 dw 0 -align 16 -_cmp_mask8x4: - times 8 dw 1 -align 16 -_mult_sub: - dw 1 - dw -1 - dw 1 - dw -1 - dw 1 - dw -1 - dw 1 - dw -1 -align 16 -_7: - times 4 dd 7 -align 16 -_7w: - times 8 dw 7 -align 16 -_14500: - times 4 dd 14500 -align 16 -_7500: - times 4 dd 7500 -align 16 -_12000: - times 4 dd 12000 -align 16 -_51000: - times 4 dd 51000 diff --git a/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c b/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c index 358d979eb..49cb837e0 100644 --- a/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c +++ b/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c @@ -11,6 +11,111 @@ #include // SSE2 #include "vp9/common/vp9_idct.h" // for cospi constants +void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int pitch) { + // The 2D transform is done with two passes which are actually pretty + // similar. In the first one, we transform the columns and transpose + // the results. In the second one, we transform the rows. To achieve that, + // as the first pass results are transposed, we tranpose the columns (that + // is the transposed rows) and transpose the results (so that it goes back + // in normal/row positions). + const int stride = pitch >> 1; + int pass; + // Constants + // When we use them, in one case, they are all the same. In all others + // it's a pair of them that we need to repeat four times. This is done + // by constructing the 32 bit constant corresponding to that pair. + const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); + const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); + const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); + const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); + const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); + const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1); + const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); + const __m128i kOne = _mm_set1_epi16(1); + __m128i in0, in1, in2, in3; + // Load inputs. + { + in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); + in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); + in2 = _mm_loadl_epi64((const __m128i *)(input + 2 * stride)); + in3 = _mm_loadl_epi64((const __m128i *)(input + 3 * stride)); + // x = x << 4 + in0 = _mm_slli_epi16(in0, 4); + in1 = _mm_slli_epi16(in1, 4); + in2 = _mm_slli_epi16(in2, 4); + in3 = _mm_slli_epi16(in3, 4); + // if (i == 0 && input[0]) input[0] += 1; + { + // The mask will only contain wether the first value is zero, all + // other comparison will fail as something shifted by 4 (above << 4) + // can never be equal to one. To increment in the non-zero case, we + // add the mask and one for the first element: + // - if zero, mask = -1, v = v - 1 + 1 = v + // - if non-zero, mask = 0, v = v + 0 + 1 = v + 1 + __m128i mask = _mm_cmpeq_epi16(in0, k__nonzero_bias_a); + in0 = _mm_add_epi16(in0, mask); + in0 = _mm_add_epi16(in0, k__nonzero_bias_b); + } + } + // Do the two transform/transpose passes + for (pass = 0; pass < 2; ++pass) { + // Transform 1/2: Add/substract + const __m128i r0 = _mm_add_epi16(in0, in3); + const __m128i r1 = _mm_add_epi16(in1, in2); + const __m128i r2 = _mm_sub_epi16(in1, in2); + const __m128i r3 = _mm_sub_epi16(in0, in3); + // Transform 1/2: Interleave to do the multiply by constants which gets us + // into 32 bits. + const __m128i t0 = _mm_unpacklo_epi16(r0, r1); + const __m128i t2 = _mm_unpacklo_epi16(r2, r3); + const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16); + const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16); + const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08); + const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24); + const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); + const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING); + const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING); + const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING); + const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS); + const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS); + const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS); + const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS); + // Combine and transpose + const __m128i res0 = _mm_packs_epi32(w0, w2); + const __m128i res1 = _mm_packs_epi32(w4, w6); + // 00 01 02 03 20 21 22 23 + // 10 11 12 13 30 31 32 33 + const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1); + const __m128i tr0_1 = _mm_unpackhi_epi16(res0, res1); + // 00 10 01 11 02 12 03 13 + // 20 30 21 31 22 32 23 33 + in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); + in2 = _mm_unpackhi_epi32(tr0_0, tr0_1); + // 00 10 20 30 01 11 21 31 in0 contains 0 followed by 1 + // 02 12 22 32 03 13 23 33 in2 contains 2 followed by 3 + if (0 == pass) { + // Extract values in the high part for second pass as transform code + // only uses the first four values. + in1 = _mm_unpackhi_epi64(in0, in0); + in3 = _mm_unpackhi_epi64(in2, in2); + } else { + // Post-condition output and store it (v + 1) >> 2, taking advantage + // of the fact 1/3 are stored just after 0/2. + __m128i out01 = _mm_add_epi16(in0, kOne); + __m128i out23 = _mm_add_epi16(in2, kOne); + out01 = _mm_srai_epi16(out01, 2); + out23 = _mm_srai_epi16(out23, 2); + _mm_storeu_si128((__m128i *)(output + 0 * 4), out01); + _mm_storeu_si128((__m128i *)(output + 2 * 4), out23); + } + } +} + +void vp9_short_fdct8x4_sse2(int16_t *input, int16_t *output, int pitch) { + vp9_short_fdct4x4_sse2(input, output, pitch); + vp9_short_fdct4x4_sse2(input + 4, output + 16, pitch); +} + void vp9_short_fdct8x8_sse2(int16_t *input, int16_t *output, int pitch) { const int stride = pitch >> 1; int pass; diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 43dba1373..13785f71b 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -90,7 +90,6 @@ VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.h VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_subtract_mmx.asm -VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm From 3810bca9a9cfac4e3f3beb3d4a26a065102e3593 Mon Sep 17 00:00:00 2001 From: Johann Date: Wed, 17 Apr 2013 10:52:50 -0700 Subject: [PATCH 10/19] Fix Android ndk-build Add the config directory to the rtcd generation script. libvpx is configured in the jni directory but ndk-build is intended to be run from the next directory up. Currently it needs to be run from the jni directory but this is being looked in to. Add a trailing slash to allow the variable to be empty. Reduce offset generation to the files which are actually used. Change-Id: Ia84fac37e8998ba647423d0ee45fc66a891ce10c --- build/make/Android.mk | 38 ++++++++++++++++++++------------------ libs.mk | 2 +- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/build/make/Android.mk b/build/make/Android.mk index cf6221017..1ff0884fc 100644 --- a/build/make/Android.mk +++ b/build/make/Android.mk @@ -48,7 +48,7 @@ # Running ndk-build will build libvpx and include it in your project. # -CONFIG_DIR := $(LOCAL_PATH) +CONFIG_DIR := $(LOCAL_PATH)/ LIBVPX_PATH := $(LOCAL_PATH)/libvpx ASM_CNV_PATH_LOCAL := $(TARGET_ARCH_ABI)/ads2gas ASM_CNV_PATH := $(LOCAL_PATH)/$(ASM_CNV_PATH_LOCAL) @@ -56,9 +56,9 @@ ASM_CNV_PATH := $(LOCAL_PATH)/$(ASM_CNV_PATH_LOCAL) # Makefiles created by the libvpx configure process # This will need to be fixed to handle x86. ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) - include $(CONFIG_DIR)/libs-armv7-android-gcc.mk + include $(CONFIG_DIR)libs-armv7-android-gcc.mk else - include $(CONFIG_DIR)/libs-armv5te-android-gcc.mk + include $(CONFIG_DIR)libs-armv5te-android-gcc.mk endif # Rule that is normally in Makefile created by libvpx @@ -106,26 +106,25 @@ $$(eval $$(call ev-build-file)) $(1) : $$(_OBJ) $(2) @mkdir -p $$(dir $$@) - @grep $(OFFSET_PATTERN) $$< | tr -d '\#' | $(CONFIG_DIR)/$(ASM_CONVERSION) > $$@ + @grep $(OFFSET_PATTERN) $$< | tr -d '\#' | $(CONFIG_DIR)$(ASM_CONVERSION) > $$@ endef # Use ads2gas script to convert from RVCT format to GAS format. This passes # puts the processed file under $(ASM_CNV_PATH). Local clean rule # to handle removing these -ASM_CNV_OFFSETS_DEPEND = $(ASM_CNV_PATH)/vp8_asm_com_offsets.asm -ifeq ($(CONFIG_VP8_DECODER), yes) - ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_dec_offsets.asm -endif ifeq ($(CONFIG_VP8_ENCODER), yes) ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm endif +ifeq ($(HAVE_NEON), yes) + ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm +endif .PRECIOUS: %.asm.s $(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm $(ASM_CNV_OFFSETS_DEPEND) @mkdir -p $(dir $@) - @$(CONFIG_DIR)/$(ASM_CONVERSION) <$< > $@ + @$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@ -# For building vpx_rtcd.h, which has a rule in libs.mk +# For building *_rtcd.h, which have rules in libs.mk TGT_ISA:=$(word 1, $(subst -, ,$(TOOLCHAIN))) target := libs @@ -177,7 +176,14 @@ ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes) LOCAL_STATIC_LIBRARIES := cpufeatures endif -$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_rtcd.h +# Add a dependency to force generation of the RTCD files. +ifeq ($(CONFIG_VP8), yes) +$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp8_rtcd.h +endif +ifeq ($(CONFIG_VP9), yes) +$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp9_rtcd.h +endif +$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_scale_rtcd.h .PHONY: clean clean: @@ -189,14 +195,10 @@ clean: include $(BUILD_SHARED_LIBRARY) -$(eval $(call asm_offsets_template,\ - $(ASM_CNV_PATH)/vp8_asm_com_offsets.asm, \ - $(LIBVPX_PATH)/vp8/common/vp8_asm_com_offsets.c)) - -ifeq ($(CONFIG_VP8_DECODER), yes) +ifeq ($(HAVE_NEON), yes) $(eval $(call asm_offsets_template,\ - $(ASM_CNV_PATH)/vp8_asm_dec_offsets.asm, \ - $(LIBVPX_PATH)/vp8/decoder/vp8_asm_dec_offsets.c)) + $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm, \ + $(LIBVPX_PATH)/vpx_scale/vpx_scale_asm_offsets.c)) endif ifeq ($(CONFIG_VP8_ENCODER), yes) diff --git a/libs.mk b/libs.mk index 2281bd059..adcde33f7 100644 --- a/libs.mk +++ b/libs.mk @@ -51,7 +51,7 @@ $$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2) @echo " [CREATE] $$@" $$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$$(TGT_ISA) \ --sym=$(1) \ - --config=$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \ + --config=$$(CONFIG_DIR)$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \ $$(RTCD_OPTIONS) $$^ > $$@ CLEAN-OBJS += $$(BUILD_PFX)$(1).h RTCD += $$(BUILD_PFX)$(1).h From 2bb8ecad02e2dd1edd6495b2eb8162eb591ba171 Mon Sep 17 00:00:00 2001 From: Frank Galligan Date: Wed, 17 Apr 2013 15:46:12 -0700 Subject: [PATCH 11/19] libvpx: Fix vp9 clang build. - UNINITIALIZED_IS_SAFE Macro triggers a warning in Clang for structs. Change-Id: Ib02c82f1fede7826564e17ccb7171c6fb18b8e44 --- vp9/decoder/vp9_decodframe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index fea6433b2..eb1b4896e 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -1550,7 +1550,7 @@ static void decode_tiles(VP9D_COMP *pbi, if (pbi->oxcf.inv_tile_order) { const int n_cols = pc->tile_columns; const uint8_t *data_ptr2[4][1 << 6]; - BOOL_DECODER UNINITIALIZED_IS_SAFE(bc_bak); + BOOL_DECODER bc_bak = {0}; // pre-initialize the offsets, we're going to read in inverse order data_ptr2[0][0] = data_ptr; From dbd050c59f1969405958d00421af42fb0ad33a2a Mon Sep 17 00:00:00 2001 From: James Zern Date: Thu, 18 Apr 2013 13:01:57 -0700 Subject: [PATCH 12/19] vpxdec: correct VP[89] fourccs should have no effect as they are used in nestegg mappings, but aligns the defines with vpxenc.c Change-Id: Ic2295cd63701894c2963274239602b54cbb58631 --- vpxdec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vpxdec.c b/vpxdec.c index 41c654fae..df0b8199c 100644 --- a/vpxdec.c +++ b/vpxdec.c @@ -49,8 +49,8 @@ static const char *exec_name; -#define VP8_FOURCC (0x00385056) -#define VP9_FOURCC (0x00395056) +#define VP8_FOURCC (0x30385056) +#define VP9_FOURCC (0x30395056) static const struct { char const *name; const vpx_codec_iface_t *(*iface)(void); From ac980b71cff73a80c2ea020aeed3520731cb37cf Mon Sep 17 00:00:00 2001 From: Johann Date: Tue, 23 Apr 2013 09:55:03 -0700 Subject: [PATCH 13/19] Improve sign consistency. Fix warning on windows: signed/unsigned mismatch on lines 415, 454 Comparison was between size_t data_sz >= int index_sz on 415 and unsigned int data_sz >= int index_sz on 454. Both might be changed to size_t but that would be tracing and replacing all comparisons is outside the scope of this change. In the rest of these two functions ensure unsigned values are used consistently. Change-Id: I922b399ceca612a92f44b9d1d331c1c6bae9d768 --- vp9/vp9_dx_iface.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index d0c23f07a..69f08a7af 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -408,9 +408,9 @@ static void parse_superframe_index(const uint8_t *data, *count = 0; if ((marker & 0xe0) == 0xc0) { - const int frames = (marker & 0x7) + 1; - const int mag = ((marker >> 3) & 3) + 1; - const int index_sz = 2 + mag * frames; + const uint32_t frames = (marker & 0x7) + 1; + const uint32_t mag = ((marker >> 3) & 0x3) + 1; + const size_t index_sz = 2 + mag * frames; if (data_sz >= index_sz && data[data_sz - index_sz] == marker) { // found a valid superframe index @@ -418,7 +418,7 @@ static void parse_superframe_index(const uint8_t *data, const uint8_t *x = data + data_sz - index_sz + 1; for (i = 0; i < frames; i++) { - int this_sz = 0; + uint32_t this_sz = 0; for (j = 0; j < mag; j++) this_sz |= (*x++) << (j * 8); @@ -447,9 +447,9 @@ static vpx_codec_err_t vp9_decode(vpx_codec_alg_priv_t *ctx, // Skip over the superframe index, if present if (data_sz && (*data_start & 0xe0) == 0xc0) { const uint8_t marker = *data_start; - const int frames = (marker & 0x7) + 1; - const int mag = ((marker >> 3) & 3) + 1; - const int index_sz = 2 + mag * frames; + const uint32_t frames = (marker & 0x7) + 1; + const uint32_t mag = ((marker >> 3) & 0x3) + 1; + const uint32_t index_sz = 2 + mag * frames; if (data_sz >= index_sz && data_start[index_sz - 1] == marker) { data_start += index_sz; From 7af58d43389a3ef1b5c900dd8848c824c57b666f Mon Sep 17 00:00:00 2001 From: Johann Date: Tue, 23 Apr 2013 10:10:10 -0700 Subject: [PATCH 14/19] Resolve declaration and implementation. Clean Windows build warnings: warning C4028: formal parameter different from declaration This was fixed independently in master and experimental but the fixes were in opposite directions. One added const to the declaration and the other removed it from the implementation. Also update the variable names. This doesn't modify the data so call it ref, matching the functions in the vicinity, rather than dst. Change-Id: I2ffc6b4a874cb98c26487b909d20a5e099b5582c --- vp9/common/vp9_findnearmv.c | 20 ++++++++++---------- vp9/common/vp9_rtcd_defs.sh | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index f6d6932cc..a063ca477 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -77,11 +77,11 @@ unsigned int vp9_variance16x2_c(const uint8_t *src_ptr, } unsigned int vp9_sub_pixel_variance16x2_c(const uint8_t *src_ptr, - int src_pixels_per_line, + int source_stride, int xoffset, int yoffset, - const uint8_t *dst_ptr, - int dst_pixels_per_line, + const uint8_t *ref_ptr, + int ref_stride, unsigned int *sse) { uint16_t FData3[16 * 3]; // Temp data buffer used in filtering uint8_t temp2[2 * 16]; @@ -91,18 +91,18 @@ unsigned int vp9_sub_pixel_variance16x2_c(const uint8_t *src_ptr, VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, FData3, - src_pixels_per_line, 1, 3, 16, HFilter); + source_stride, 1, 3, 16, HFilter); var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 2, 16, VFilter); - return vp9_variance16x2_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); + return vp9_variance16x2_c(temp2, 16, ref_ptr, ref_stride, sse); } unsigned int vp9_sub_pixel_variance2x16_c(const uint8_t *src_ptr, - int src_pixels_per_line, + int source_stride, int xoffset, int yoffset, - const uint8_t *dst_ptr, - int dst_pixels_per_line, + const uint8_t *ref_ptr, + int ref_stride, unsigned int *sse) { uint16_t FData3[2 * 17]; // Temp data buffer used in filtering uint8_t temp2[2 * 16]; @@ -112,10 +112,10 @@ unsigned int vp9_sub_pixel_variance2x16_c(const uint8_t *src_ptr, VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, FData3, - src_pixels_per_line, 1, 17, 2, HFilter); + source_stride, 1, 17, 2, HFilter); var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 16, 2, VFilter); - return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse); + return vp9_variance2x16_c(temp2, 2, ref_ptr, ref_stride, sse); } #if CONFIG_USESELECTREFMV diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 46495cb11..43bc3cb1f 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -252,7 +252,7 @@ specialize vp9_sad16x3 sse2 prototype unsigned int vp9_sad3x16 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride" specialize vp9_sad3x16 sse2 -prototype unsigned int vp9_sub_pixel_variance16x2 "const uint8_t *src_ptr, const int source_stride, const int xoffset, const int yoffset, const uint8_t *ref_ptr, const int ref_stride, unsigned int *sse" +prototype unsigned int vp9_sub_pixel_variance16x2 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_sub_pixel_variance16x2 sse2 # From fe74c4286aca842aee5f89c76385e7d16018443e Mon Sep 17 00:00:00 2001 From: Johann Date: Wed, 24 Apr 2013 09:08:56 -0700 Subject: [PATCH 15/19] Rename quantize_sse2_intrinsics.c The only reason for the _intrinsics part of the file name was for the interim period where only one of the functions was redone and the base file name was the same. Change-Id: I7851154f1633d48821bee885b1cadb2148e65a23 --- .../x86/{quantize_sse2_intrinsics.c => quantize_sse2.c} | 0 vp8/vp8cx.mk | 6 +++--- 2 files changed, 3 insertions(+), 3 deletions(-) rename vp8/encoder/x86/{quantize_sse2_intrinsics.c => quantize_sse2.c} (100%) diff --git a/vp8/encoder/x86/quantize_sse2_intrinsics.c b/vp8/encoder/x86/quantize_sse2.c similarity index 100% rename from vp8/encoder/x86/quantize_sse2_intrinsics.c rename to vp8/encoder/x86/quantize_sse2.c diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index ca9f6a62e..7d1904aaf 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -89,12 +89,12 @@ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/subtract_mmx.asm VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm -VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2_intrinsics.c +VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c # TODO(johann) make this generic ifeq ($(HAVE_SSE2),yes) -vp8/encoder/x86/quantize_sse2_intrinsics.c.o: CFLAGS += -msse2 -vp8/encoder/x86/quantize_sse2_intrinsics.c.d: CFLAGS += -msse2 +vp8/encoder/x86/quantize_sse2.c.o: CFLAGS += -msse2 +vp8/encoder/x86/quantize_sse2.c.d: CFLAGS += -msse2 endif ifeq ($(CONFIG_TEMPORAL_DENOISING),yes) From 53a4620271975ef41b0d71181849f7291bd5a587 Mon Sep 17 00:00:00 2001 From: Johann Date: Wed, 24 Apr 2013 14:26:35 -0700 Subject: [PATCH 16/19] Change default iOS dev path This can be manually overridden with --libc= Change-Id: I0b857c751d5dc5423f79785e934bc8a714758e75 --- build/make/configure.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/make/configure.sh b/build/make/configure.sh index 4d0cad23e..23dc87f31 100755 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -918,7 +918,7 @@ process_common_toolchain() { add_ldflags -arch_only ${tgt_isa} if [ -z "${alt_libc}" ]; then - alt_libc=${SDK_PATH}/SDKs/iPhoneOS5.1.sdk + alt_libc=${SDK_PATH}/SDKs/iPhoneOS6.0.sdk fi add_cflags "-isysroot ${alt_libc}" From c5b127afea85176e4b43c525eac238924e4d2116 Mon Sep 17 00:00:00 2001 From: Johann Date: Thu, 25 Apr 2013 11:13:02 -0700 Subject: [PATCH 17/19] Rename vp9_idct_x86.c Remove similarly named header file. It is obsolete. Move file to match naming style. Adjust make file to include the file correctly and remove extra unnecessary #if guard. Change-Id: Ifba07ba9938a5df08a9f4eda54a3ac4d6983f7bf --- ...{vp9_idct_x86.c => vp9_idct_intrin_sse2.c} | 2 - vp9/common/x86/vp9_idct_x86.h | 51 ------------------- vp9/vp9_common.mk | 7 ++- 3 files changed, 3 insertions(+), 57 deletions(-) rename vp9/common/x86/{vp9_idct_x86.c => vp9_idct_intrin_sse2.c} (99%) delete mode 100644 vp9/common/x86/vp9_idct_x86.h diff --git a/vp9/common/x86/vp9_idct_x86.c b/vp9/common/x86/vp9_idct_intrin_sse2.c similarity index 99% rename from vp9/common/x86/vp9_idct_x86.c rename to vp9/common/x86/vp9_idct_intrin_sse2.c index 811ed9899..dd7e68aa3 100644 --- a/vp9/common/x86/vp9_idct_x86.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -15,7 +15,6 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_idct.h" -#if HAVE_SSE2 // In order to improve performance, clip absolute diff values to [0, 255], // which allows to keep the additions/subtractions in 8 bits. void vp9_dc_only_idct_add_sse2(int input_dc, uint8_t *pred_ptr, @@ -1972,4 +1971,3 @@ void vp9_short_idct32x32_sse2(int16_t *input, int16_t *output, int pitch) { } } } -#endif diff --git a/vp9/common/x86/vp9_idct_x86.h b/vp9/common/x86/vp9_idct_x86.h deleted file mode 100644 index bd66d8c72..000000000 --- a/vp9/common/x86/vp9_idct_x86.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_COMMON_X86_VP9_IDCT_X86_H_ -#define VP9_COMMON_X86_VP9_IDCT_X86_H_ - -/* Note: - * - * This platform is commonly built for runtime CPU detection. If you modify - * any of the function mappings present in this file, be sure to also update - * them in the function pointer initialization code - */ - -#if HAVE_MMX -extern prototype_second_order(vp9_short_inv_walsh4x4_mmx); -extern prototype_second_order(vp9_short_inv_walsh4x4_1_mmx); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp9_idct_iwalsh16 -#define vp9_idct_iwalsh16 vp9_short_inv_walsh4x4_mmx - -#undef vp9_idct_iwalsh1 -#define vp9_idct_iwalsh1 vp9_short_inv_walsh4x4_1_mmx - -#endif -#endif - -#if HAVE_SSE2 - -extern prototype_second_order(vp9_short_inv_walsh4x4_sse2); - -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp9_idct_iwalsh16 -#define vp9_idct_iwalsh16 vp9_short_inv_walsh4x4_sse2 - -#endif - -#endif - - - -#endif diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 5e1ff62f7..f5a4103f6 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -82,7 +82,6 @@ VP9_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/vp9_textblit.c VP9_COMMON_SRCS-yes += common/vp9_treecoder.c VP9_COMMON_SRCS-$(CONFIG_IMPLICIT_SEGMENTATION) += common/vp9_implicit_segmentation.c -VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_idct_x86.h VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_x86.h VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_postproc_x86.h VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c @@ -112,13 +111,13 @@ VP9_COMMON_SRCS-yes += common/vp9_maskingmv.c VP9_COMMON_SRCS-$(HAVE_SSE3) += common/x86/vp9_mask_sse3.asm endif -VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_idct_x86.c +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_sadmxn_sse2.c ifeq ($(HAVE_SSE2),yes) -vp9/common/x86/vp9_idct_x86.c.o: CFLAGS += -msse2 +vp9/common/x86/vp9_idct_intrin_sse2.c.o: CFLAGS += -msse2 vp9/common/x86/vp9_loopfilter_intrin_sse2.c.o: CFLAGS += -msse2 vp9/common/x86/vp9_sadmxn_sse2.c.o: CFLAGS += -msse2 -vp9/common/x86/vp9_idct_x86.c.d: CFLAGS += -msse2 +vp9/common/x86/vp9_idct_intrin_sse2.c.d: CFLAGS += -msse2 vp9/common/x86/vp9_loopfilter_intrin_sse2.c.d: CFLAGS += -msse2 vp9/common/x86/vp9_sadmxn_sse2.c.d: CFLAGS += -msse2 endif From 863601c58968a5816ff17722432b5eedc6e13eae Mon Sep 17 00:00:00 2001 From: Johann Date: Thu, 25 Apr 2013 23:26:20 -0700 Subject: [PATCH 18/19] Normalize more intrinsic filenames vp9_dequantize_x86 has only sse2 functions. vp9_dct_sse2_intrinsics has no namespace collision and can drop _intrinsics. vp9_idct_mmx.h is unused. Change-Id: Ic16e31fb372a1d1e841a62ecb4189fe8f95808ec --- ...dequantize_x86.c => vp9_dequantize_sse2.c} | 3 --- vp9/decoder/x86/vp9_idct_mmx.h | 22 ------------------- ...9_dct_sse2_intrinsics.c => vp9_dct_sse2.c} | 0 vp9/vp9cx.mk | 6 ++--- vp9/vp9dx.mk | 6 ++--- 5 files changed, 6 insertions(+), 31 deletions(-) rename vp9/decoder/x86/{vp9_dequantize_x86.c => vp9_dequantize_sse2.c} (99%) delete mode 100644 vp9/decoder/x86/vp9_idct_mmx.h rename vp9/encoder/x86/{vp9_dct_sse2_intrinsics.c => vp9_dct_sse2.c} (100%) diff --git a/vp9/decoder/x86/vp9_dequantize_x86.c b/vp9/decoder/x86/vp9_dequantize_sse2.c similarity index 99% rename from vp9/decoder/x86/vp9_dequantize_x86.c rename to vp9/decoder/x86/vp9_dequantize_sse2.c index acfae2a27..1dfb8e08f 100644 --- a/vp9/decoder/x86/vp9_dequantize_x86.c +++ b/vp9/decoder/x86/vp9_dequantize_sse2.c @@ -15,8 +15,6 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_idct.h" -#if HAVE_SSE2 - void vp9_add_residual_4x4_sse2(const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride) { const int width = 4; @@ -452,4 +450,3 @@ void vp9_add_constant_residual_32x32_sse2(const int16_t diff, dest += 4 * stride; } while (--i); } -#endif diff --git a/vp9/decoder/x86/vp9_idct_mmx.h b/vp9/decoder/x86/vp9_idct_mmx.h deleted file mode 100644 index 7d9829175..000000000 --- a/vp9/decoder/x86/vp9_idct_mmx.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_DECODER_X86_VP9_IDCT_MMX_H_ -#define VP9_DECODER_X86_VP9_IDCT_MMX_H_ - - -void vp9_dequant_dc_idct_add_mmx(short *input, const short *dq, - unsigned char *pred, unsigned char *dest, - int pitch, int stride, int Dc); - -void vp9_dequant_idct_add_mmx(short *input, const short *dq, unsigned char *pred, - unsigned char *dest, int pitch, int stride); - -#endif /* VP9_DECODER_X86_VP9_IDCT_MMX_H_ */ diff --git a/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c b/vp9/encoder/x86/vp9_dct_sse2.c similarity index 100% rename from vp9/encoder/x86/vp9_dct_sse2_intrinsics.c rename to vp9/encoder/x86/vp9_dct_sse2.c diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 13785f71b..51e24b846 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -109,10 +109,10 @@ VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_quantize_mmx.asm VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_encodeopt.asm VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt.asm -VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2_intrinsics.c +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c ifeq ($(HAVE_SSE2),yes) -vp9/encoder/x86/vp9_dct_sse2_intrinsics.c.d: CFLAGS += -msse2 -vp9/encoder/x86/vp9_dct_sse2_intrinsics.c.o: CFLAGS += -msse2 +vp9/encoder/x86/vp9_dct_sse2.c.d: CFLAGS += -msse2 +vp9/encoder/x86/vp9_dct_sse2.c.o: CFLAGS += -msse2 endif diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk index 239ae30b6..5cab6fc1f 100644 --- a/vp9/vp9dx.mk +++ b/vp9/vp9dx.mk @@ -38,10 +38,10 @@ VP9_DX_SRCS-yes := $(filter-out $(VP9_DX_SRCS_REMOVE-yes),$(VP9_DX_SRCS-yes)) VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/vp9_idct_blk_sse2.c -VP9_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/vp9_dequantize_x86.c +VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/vp9_dequantize_sse2.c ifeq ($(HAVE_SSE2),yes) -vp9/decoder/x86/vp9_dequantize_x86.c.o: CFLAGS += -msse2 -vp9/decoder/x86/vp9_dequantize_x86.c.d: CFLAGS += -msse2 +vp9/decoder/x86/vp9_dequantize_sse2.c.o: CFLAGS += -msse2 +vp9/decoder/x86/vp9_dequantize_sse2.c.d: CFLAGS += -msse2 endif $(eval $(call asm_offsets_template,\ From e58852582d9b785e09dd202c302881a5faf941d2 Mon Sep 17 00:00:00 2001 From: "changjun.yang" Date: Fri, 26 Apr 2013 15:16:42 +0800 Subject: [PATCH 19/19] code cleanup for arm_cpudetect.c Change-Id: I5c49a983ced45197e1035fa5615d71b0bdad4109 --- vpx_ports/arm_cpudetect.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c index 3c916f247..542ff6786 100644 --- a/vpx_ports/arm_cpudetect.c +++ b/vpx_ports/arm_cpudetect.c @@ -53,8 +53,6 @@ int arm_cpu_caps(void) { return flags & mask; } -#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ - #elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ #define WIN32_LEAN_AND_MEAN