Compare commits
211 Commits
stable-vp9
...
pcs-2013
Author | SHA1 | Date | |
---|---|---|---|
![]() |
6d3bd96607 | ||
![]() |
6c2082db71 | ||
![]() |
3c4e9e341f | ||
![]() |
771f3ef5ad | ||
![]() |
e83ebc8992 | ||
![]() |
825b7c301d | ||
![]() |
691177842c | ||
![]() |
d0308b7daa | ||
![]() |
c52d85442c | ||
![]() |
5491a1f33e | ||
![]() |
c4627a9ff1 | ||
![]() |
b6e2f9b752 | ||
![]() |
6b78f11a03 | ||
![]() |
dcab9896e8 | ||
![]() |
03698aa6d8 | ||
![]() |
df8e156432 | ||
![]() |
5c66f6f5eb | ||
![]() |
40047bef5d | ||
![]() |
cd945c7bd9 | ||
![]() |
195061feda | ||
![]() |
c151bdd412 | ||
![]() |
1a9d4fedf3 | ||
![]() |
548671dd20 | ||
![]() |
4906fe45e2 | ||
![]() |
fd09be0984 | ||
![]() |
e3c1f0880f | ||
![]() |
509ba98938 | ||
![]() |
7ddd9f7f27 | ||
![]() |
c424c5e808 | ||
![]() |
282704145d | ||
![]() |
58a09c32c2 | ||
![]() |
9e056fa094 | ||
![]() |
d2a4ddf982 | ||
![]() |
cbdcc215b3 | ||
![]() |
d35e9a0c53 | ||
![]() |
14916b0ca6 | ||
![]() |
4e5d99ca72 | ||
![]() |
bc1b089372 | ||
![]() |
0f8805e086 | ||
![]() |
d12a502ef9 | ||
![]() |
7f13b33a78 | ||
![]() |
1a2f4fd2f5 | ||
![]() |
88251c86dc | ||
![]() |
68b8d1ea0a | ||
![]() |
821b987486 | ||
![]() |
fad3d07df3 | ||
![]() |
a76caa7ff4 | ||
![]() |
777460329b | ||
![]() |
7019e34c34 | ||
![]() |
f6d7e3679c | ||
![]() |
c66bfc70d1 | ||
![]() |
a57912f893 | ||
![]() |
8f229caf87 | ||
![]() |
623e163f84 | ||
![]() |
c288b94ab9 | ||
![]() |
03df17070b | ||
![]() |
6249a5b17e | ||
![]() |
855d078f95 | ||
![]() |
2b5bf7b8d8 | ||
![]() |
716d37f8bf | ||
![]() |
2ecd0dae1e | ||
![]() |
7a59efe7f8 | ||
![]() |
152fd59964 | ||
![]() |
ec421b7810 | ||
![]() |
31ceb6b13c | ||
![]() |
11cf0c39c9 | ||
![]() |
01d43aaa24 | ||
![]() |
ab03c00504 | ||
![]() |
eb506a6590 | ||
![]() |
fb6e6cd24d | ||
![]() |
d052117319 | ||
![]() |
efc8638890 | ||
![]() |
4ecdf26d9c | ||
![]() |
0f9efe9e7a | ||
![]() |
8e45778eaf | ||
![]() |
8486741e15 | ||
![]() |
8d0b712af6 | ||
![]() |
8d50d766d4 | ||
![]() |
a88f3110f8 | ||
![]() |
b927620231 | ||
![]() |
29815ca729 | ||
![]() |
4ab01fb5f7 | ||
![]() |
b3d3578ee4 | ||
![]() |
7343681675 | ||
![]() |
efbacc9f89 | ||
![]() |
5df8b1d05b | ||
![]() |
3bb773d03e | ||
![]() |
cf688474ea | ||
![]() |
33c7ed4478 | ||
![]() |
11fe8ecf57 | ||
![]() |
67a0a89272 | ||
![]() |
ef6d82358d | ||
![]() |
fff4caeac1 | ||
![]() |
2ce70a15d2 | ||
![]() |
da17ffa937 | ||
![]() |
e81a3ede4c | ||
![]() |
681fb22820 | ||
![]() |
cfbc246d57 | ||
![]() |
6903efa93d | ||
![]() |
b6c5dbe9ef | ||
![]() |
b10e6b2943 | ||
![]() |
bd9c057433 | ||
![]() |
ceaa3c37a9 | ||
![]() |
3fab2125ff | ||
![]() |
b1b4ba1bdd | ||
![]() |
209c6cbf8f | ||
![]() |
80d582239e | ||
![]() |
db60c02c9e | ||
![]() |
36d2794369 | ||
![]() |
35830879db | ||
![]() |
398ddafb62 | ||
![]() |
931c34e955 | ||
![]() |
3c465af2ab | ||
![]() |
15a36a0a0d | ||
![]() |
2b426969c3 | ||
![]() |
b55170ce95 | ||
![]() |
437f63144a | ||
![]() |
253fd256bf | ||
![]() |
794a7ccd78 | ||
![]() |
da0ce28fe3 | ||
![]() |
831d72ac5f | ||
![]() |
e45f4a4a4c | ||
![]() |
5b1dc1515f | ||
![]() |
eda4e24c0d | ||
![]() |
7755b9dada | ||
![]() |
6b5490cf68 | ||
![]() |
0c02bfcc2a | ||
![]() |
b5242368f3 | ||
![]() |
8266da1cd1 | ||
![]() |
f9e2140cab | ||
![]() |
64eff7f360 | ||
![]() |
2b5670238b | ||
![]() |
e2c92d1510 | ||
![]() |
87a214c277 | ||
![]() |
9cd14ea6ed | ||
![]() |
49f5efa8d8 | ||
![]() |
208658490c | ||
![]() |
d445945a84 | ||
![]() |
d0365c4a2c | ||
![]() |
c7b7b1da86 | ||
![]() |
682c27239f | ||
![]() |
450cbfe53a | ||
![]() |
12d57a9409 | ||
![]() |
d571e4e785 | ||
![]() |
57272e41dd | ||
![]() |
35c5d79e6b | ||
![]() |
b87696ac37 | ||
![]() |
b1c58f57a7 | ||
![]() |
30888742f4 | ||
![]() |
71cfaaa689 | ||
![]() |
9be0bb19df | ||
![]() |
6037f17942 | ||
![]() |
ff1ae7f713 | ||
![]() |
fe533c9741 | ||
![]() |
f24b9b4f87 | ||
![]() |
f1a627e8a2 | ||
![]() |
b6aa783d80 | ||
![]() |
9ba08208d3 | ||
![]() |
9bcd750565 | ||
![]() |
24ad692572 | ||
![]() |
b7a93578e5 | ||
![]() |
bacb5925ff | ||
![]() |
92a29c157f | ||
![]() |
13c7715a75 | ||
![]() |
e85eaf6acd | ||
![]() |
db92356577 | ||
![]() |
fbb62c6d2b | ||
![]() |
8c2e5e4964 | ||
![]() |
c701eeb59f | ||
![]() |
838eae3961 | ||
![]() |
071395de6a | ||
![]() |
a517343ca3 | ||
![]() |
54c87058bf | ||
![]() |
d11221f433 | ||
![]() |
14330abdc6 | ||
![]() |
e023e0d93b | ||
![]() |
a6a00fc6a3 | ||
![]() |
78fbb10642 | ||
![]() |
bb5e2bf86a | ||
![]() |
e51e7a0e8d | ||
![]() |
39c7b01d3c | ||
![]() |
24df77e951 | ||
![]() |
44b708b4c4 | ||
![]() |
f363aa3a15 | ||
![]() |
b0211e7edf | ||
![]() |
79af591368 | ||
![]() |
014acfa2af | ||
![]() |
a23c2a9e7b | ||
![]() |
2a233dd31d | ||
![]() |
1600707d35 | ||
![]() |
cda802ac86 | ||
![]() |
0fcb0e17bc | ||
![]() |
a7b7f94ae8 | ||
![]() |
9d901217c6 | ||
![]() |
bb30fff978 | ||
![]() |
98cf0145b1 | ||
![]() |
72fd127f8c | ||
![]() |
245ca04bab | ||
![]() |
a0fcbcfa5f | ||
![]() |
85fd8bdb01 | ||
![]() |
c437bbcde0 | ||
![]() |
88c8ff2508 | ||
![]() |
a783da80e7 | ||
![]() |
2b3bfaa9ce | ||
![]() |
84758960db | ||
![]() |
90a52694f3 | ||
![]() |
4bd171152d | ||
![]() |
f582aa6eda | ||
![]() |
5b23666e67 | ||
![]() |
cb50dc7f33 | ||
![]() |
3b01778450 |
@@ -1062,7 +1062,7 @@ EOF
|
||||
setup_gnu_toolchain
|
||||
add_cflags -use-msasm -use-asm
|
||||
add_ldflags -i-static
|
||||
enabled x86_64 && add_cflags -ipo -no-prec-div -static -xSSE2 -axSSE2
|
||||
enabled x86_64 && add_cflags -ipo -static -O3
|
||||
enabled x86_64 && AR=xiar
|
||||
case ${tune_cpu} in
|
||||
atom*)
|
||||
|
@@ -290,9 +290,11 @@ static void setup_rtcd_internal(void)
|
||||
{
|
||||
$(set_function_pointers c $ALL_ARCHS)
|
||||
#if HAVE_DSPR2
|
||||
#if CONFIG_VP8
|
||||
void dsputil_static_init();
|
||||
dsputil_static_init();
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
$(common_bottom)
|
||||
|
@@ -47,7 +47,7 @@ sub FixThumbInstructions($$)
|
||||
# this is used, it's used for two subsequent load instructions,
|
||||
# where a hand-written version of it could merge two subsequent
|
||||
# add and sub instructions.
|
||||
s/^(\s*)((ldr|str)(ne)?)(\s+)(r\d+),\s*\[(\w+), -([^\]]+)\]/$1sub$4$5$7, $7, $8\n$1$2$5$6, [$7]\n$1add$4$5$7, $7, $8/g;
|
||||
s/^(\s*)((ldr|str|pld)(ne)?)(\s+)(r\d+,\s*)?\[(\w+), -([^\]]+)\]/$1sub$4$5$7, $7, $8\n$1$2$5$6\[$7\]\n$1add$4$5$7, $7, $8/g;
|
||||
|
||||
# Convert register post indexing to a separate add instruction.
|
||||
# This converts "ldrneb r9, [r0], r2" into "ldrneb r9, [r0]",
|
||||
|
@@ -49,9 +49,9 @@ vpxenc.DESCRIPTION = Full featured encoder
|
||||
UTILS-$(CONFIG_VP8_ENCODER) += vp8_scalable_patterns.c
|
||||
vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C
|
||||
vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
|
||||
UTILS-$(CONFIG_VP8_ENCODER) += vp9_spatial_scalable_encoder.c
|
||||
vp8_scalable_patterns.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D
|
||||
vp8_scalable_patterns.DESCRIPTION = Spatial Scalable Encoder
|
||||
UTILS-$(CONFIG_VP9_ENCODER) += vp9_spatial_scalable_encoder.c
|
||||
vp9_spatial_scalable_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D
|
||||
vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder
|
||||
|
||||
# Clean up old ivfenc, ivfdec binaries.
|
||||
ifeq ($(CONFIG_MSVS),yes)
|
||||
|
@@ -105,7 +105,7 @@ void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned l
|
||||
void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) {
|
||||
int size;
|
||||
for (size = 4; size > 1; size--) {
|
||||
if (bin & 0x000000ff << ((size - 1) * 8))
|
||||
if (bin & (unsigned int)0x000000ff << ((size - 1) * 8))
|
||||
break;
|
||||
}
|
||||
Ebml_WriteID(glob, class_id);
|
||||
|
2
libs.mk
2
libs.mk
@@ -395,7 +395,7 @@ libvpx_test_srcs.txt:
|
||||
@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | sort -u > $@
|
||||
CLEAN-OBJS += libvpx_test_srcs.txt
|
||||
|
||||
$(LIBVPX_TEST_DATA):
|
||||
$(LIBVPX_TEST_DATA): $(SRC_PATH_BARE)/test/test-data.sha1
|
||||
@echo " [DOWNLOAD] $@"
|
||||
$(qexec)trap 'rm -f $@' INT TERM &&\
|
||||
curl -L -o $@ $(call libvpx_test_data_url,$(@F))
|
||||
|
@@ -258,7 +258,7 @@ void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
|
||||
}
|
||||
|
||||
typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
|
||||
typedef void (*idct_t)(int16_t *in, uint8_t *out, int stride);
|
||||
typedef void (*idct_t)(int16_t *in, uint8_t *dst, int stride);
|
||||
typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
|
||||
typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);
|
||||
|
||||
@@ -509,7 +509,8 @@ INSTANTIATE_TEST_CASE_P(
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans16x16DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct16x16_sse2, &vp9_short_idct16x16_add_c, 0)));
|
||||
make_tuple(&vp9_short_fdct16x16_sse2,
|
||||
&vp9_short_idct16x16_add_sse2, 0)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans16x16HT,
|
||||
::testing::Values(
|
||||
|
@@ -13,242 +13,309 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "test/util.h"
|
||||
|
||||
extern "C" {
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *output, int pitch);
|
||||
}
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
void fdct8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
||||
int stride, int /*tx_type*/) {
|
||||
typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
|
||||
typedef void (*idct_t)(int16_t *in, uint8_t *dst, int stride);
|
||||
typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
|
||||
typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);
|
||||
|
||||
void fdct8x8_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
vp9_short_fdct8x8_c(in, out, stride);
|
||||
}
|
||||
void idct8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||
int stride, int /*tx_type*/) {
|
||||
vp9_short_idct8x8_add_c(out, dst, stride >> 1);
|
||||
}
|
||||
void fht8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
||||
int stride, int tx_type) {
|
||||
// TODO(jingning): need to refactor this to test both _c and _sse2 functions,
|
||||
// when we have all inverse dct functions done sse2.
|
||||
#if HAVE_SSE2
|
||||
vp9_short_fht8x8_sse2(in, out, stride >> 1, tx_type);
|
||||
#else
|
||||
vp9_short_fht8x8_c(in, out, stride >> 1, tx_type);
|
||||
#endif
|
||||
}
|
||||
void iht8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
vp9_short_iht8x8_add_c(out, dst, stride >> 1, tx_type);
|
||||
|
||||
void fht8x8_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
vp9_short_fht8x8_c(in, out, stride, tx_type);
|
||||
}
|
||||
|
||||
class FwdTrans8x8Test : public ::testing::TestWithParam<int> {
|
||||
class FwdTrans8x8TestBase {
|
||||
public:
|
||||
virtual ~FwdTrans8x8Test() {}
|
||||
virtual void SetUp() {
|
||||
tx_type_ = GetParam();
|
||||
if (tx_type_ == 0) {
|
||||
fwd_txfm = fdct8x8;
|
||||
inv_txfm = idct8x8_add;
|
||||
} else {
|
||||
fwd_txfm = fht8x8;
|
||||
inv_txfm = iht8x8_add;
|
||||
}
|
||||
}
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
virtual ~FwdTrans8x8TestBase() {}
|
||||
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
(*fwd_txfm)(in, out, dst, stride, tx_type);
|
||||
}
|
||||
void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
(*inv_txfm)(in, out, dst, stride, tx_type);
|
||||
}
|
||||
virtual void RunFwdTxfm(int16_t *in, int16_t *out, int stride) = 0;
|
||||
virtual void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) = 0;
|
||||
|
||||
int tx_type_;
|
||||
void (*fwd_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
|
||||
void (*inv_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
|
||||
};
|
||||
void RunSignBiasCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64);
|
||||
int count_sign_block[64][2];
|
||||
const int count_test_block = 100000;
|
||||
|
||||
TEST_P(FwdTrans8x8Test, SignBiasCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64);
|
||||
const int pitch = 16;
|
||||
int count_sign_block[64][2];
|
||||
const int count_test_block = 100000;
|
||||
memset(count_sign_block, 0, sizeof(count_sign_block));
|
||||
|
||||
memset(count_sign_block, 0, sizeof(count_sign_block));
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_output_block, pitch_));
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_output_block,
|
||||
NULL, pitch, tx_type_));
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
++count_sign_block[j][0];
|
||||
else if (test_output_block[j] > 0)
|
||||
++count_sign_block[j][1];
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
++count_sign_block[j][0];
|
||||
else if (test_output_block[j] > 0)
|
||||
++count_sign_block[j][1];
|
||||
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
|
||||
const int max_diff = 1125;
|
||||
EXPECT_LT(diff, max_diff)
|
||||
<< "Error: 8x8 FDCT/FHT has a sign bias > "
|
||||
<< 1. * max_diff / count_test_block * 100 << "%"
|
||||
<< " for input range [-255, 255] at index " << j
|
||||
<< " count0: " << count_sign_block[j][0]
|
||||
<< " count1: " << count_sign_block[j][1]
|
||||
<< " diff: " << diff;
|
||||
}
|
||||
|
||||
memset(count_sign_block, 0, sizeof(count_sign_block));
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-15, 15].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_output_block, pitch_));
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
++count_sign_block[j][0];
|
||||
else if (test_output_block[j] > 0)
|
||||
++count_sign_block[j][1];
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
|
||||
const int max_diff = 10000;
|
||||
EXPECT_LT(diff, max_diff)
|
||||
<< "Error: 4x4 FDCT/FHT has a sign bias > "
|
||||
<< 1. * max_diff / count_test_block * 100 << "%"
|
||||
<< " for input range [-15, 15] at index " << j
|
||||
<< " count0: " << count_sign_block[j][0]
|
||||
<< " count1: " << count_sign_block[j][1]
|
||||
<< " diff: " << diff;
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
|
||||
const int max_diff = 1125;
|
||||
EXPECT_LT(diff, max_diff)
|
||||
<< "Error: 8x8 FDCT/FHT has a sign bias > "
|
||||
<< 1. * max_diff / count_test_block * 100 << "%"
|
||||
<< " for input range [-255, 255] at index " << j
|
||||
<< " count0: " << count_sign_block[j][0]
|
||||
<< " count1: " << count_sign_block[j][1]
|
||||
<< " diff: " << diff;
|
||||
}
|
||||
|
||||
memset(count_sign_block, 0, sizeof(count_sign_block));
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-15, 15].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_output_block,
|
||||
NULL, pitch, tx_type_));
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
++count_sign_block[j][0];
|
||||
else if (test_output_block[j] > 0)
|
||||
++count_sign_block[j][1];
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
|
||||
const int max_diff = 10000;
|
||||
EXPECT_LT(diff, max_diff)
|
||||
<< "Error: 4x4 FDCT/FHT has a sign bias > "
|
||||
<< 1. * max_diff / count_test_block * 100 << "%"
|
||||
<< " for input range [-15, 15] at index " << j
|
||||
<< " count0: " << count_sign_block[j][0]
|
||||
<< " count1: " << count_sign_block[j][1]
|
||||
<< " diff: " << diff;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
int total_error = 0;
|
||||
const int count_test_block = 100000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
void RunRoundTripErrorCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
int total_error = 0;
|
||||
const int count_test_block = 100000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
}
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
const int pitch = 16;
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_temp_block,
|
||||
dst, pitch, tx_type_));
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_temp_block[j] > 0) {
|
||||
test_temp_block[j] += 2;
|
||||
test_temp_block[j] /= 4;
|
||||
test_temp_block[j] *= 4;
|
||||
} else {
|
||||
test_temp_block[j] -= 2;
|
||||
test_temp_block[j] /= 4;
|
||||
test_temp_block[j] *= 4;
|
||||
}
|
||||
}
|
||||
REGISTER_STATE_CHECK(
|
||||
RunInvTxfm(test_input_block, test_temp_block,
|
||||
dst, pitch, tx_type_));
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_temp_block, pitch_));
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_temp_block[j] > 0) {
|
||||
test_temp_block[j] += 2;
|
||||
test_temp_block[j] /= 4;
|
||||
test_temp_block[j] *= 4;
|
||||
} else {
|
||||
test_temp_block[j] -= 2;
|
||||
test_temp_block[j] /= 4;
|
||||
test_temp_block[j] *= 4;
|
||||
}
|
||||
}
|
||||
REGISTER_STATE_CHECK(
|
||||
RunInvTxfm(test_temp_block, dst, pitch_));
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_GE(1, max_error)
|
||||
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual roundtrip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block/5, total_error)
|
||||
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
|
||||
"error > 1/5 per block";
|
||||
}
|
||||
|
||||
TEST_P(FwdTrans8x8Test, ExtremalCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
int total_error = 0;
|
||||
const int count_test_block = 100000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
src[j] = rnd.Rand8() % 2 ? 255 : 0;
|
||||
dst[j] = src[j] > 0 ? 0 : 255;
|
||||
}
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
|
||||
const int pitch = 16;
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_temp_block,
|
||||
dst, pitch, tx_type_));
|
||||
REGISTER_STATE_CHECK(
|
||||
RunInvTxfm(test_input_block, test_temp_block,
|
||||
dst, pitch, tx_type_));
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_GE(1, max_error)
|
||||
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has an"
|
||||
<< " individual roundtrip error > 1";
|
||||
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
|
||||
<< " roundtrip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block/5, total_error)
|
||||
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
|
||||
<< " roundtrip error > 1/5 per block";
|
||||
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
|
||||
<< "error > 1/5 per block";
|
||||
}
|
||||
|
||||
void RunExtremalCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
int total_error = 0;
|
||||
const int count_test_block = 100000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
src[j] = rnd.Rand8() % 2 ? 255 : 0;
|
||||
dst[j] = src[j] > 0 ? 0 : 255;
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_temp_block, pitch_));
|
||||
REGISTER_STATE_CHECK(
|
||||
RunInvTxfm(test_temp_block, dst, pitch_));
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
|
||||
EXPECT_GE(1, max_error)
|
||||
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
|
||||
<< "an individual roundtrip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block/5, total_error)
|
||||
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
|
||||
<< " roundtrip error > 1/5 per block";
|
||||
}
|
||||
}
|
||||
|
||||
int pitch_;
|
||||
int tx_type_;
|
||||
fht_t fwd_txfm_ref;
|
||||
};
|
||||
|
||||
class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
|
||||
public PARAMS(fdct_t, idct_t, int) {
|
||||
public:
|
||||
virtual ~FwdTrans8x8DCT() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
inv_txfm_ = GET_PARAM(1);
|
||||
tx_type_ = GET_PARAM(2);
|
||||
pitch_ = 16;
|
||||
fwd_txfm_ref = fdct8x8_ref;
|
||||
}
|
||||
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
|
||||
fwd_txfm_(in, out, stride);
|
||||
}
|
||||
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
|
||||
inv_txfm_(out, dst, stride >> 1);
|
||||
}
|
||||
|
||||
fdct_t fwd_txfm_;
|
||||
idct_t inv_txfm_;
|
||||
};
|
||||
|
||||
TEST_P(FwdTrans8x8DCT, SignBiasCheck) {
|
||||
RunSignBiasCheck();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(VP9, FwdTrans8x8Test, ::testing::Range(0, 4));
|
||||
TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) {
|
||||
RunRoundTripErrorCheck();
|
||||
}
|
||||
|
||||
TEST_P(FwdTrans8x8DCT, ExtremalCheck) {
|
||||
RunExtremalCheck();
|
||||
}
|
||||
|
||||
class FwdTrans8x8HT : public FwdTrans8x8TestBase,
|
||||
public PARAMS(fht_t, iht_t, int) {
|
||||
public:
|
||||
virtual ~FwdTrans8x8HT() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
inv_txfm_ = GET_PARAM(1);
|
||||
tx_type_ = GET_PARAM(2);
|
||||
pitch_ = 8;
|
||||
fwd_txfm_ref = fht8x8_ref;
|
||||
}
|
||||
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
|
||||
fwd_txfm_(in, out, stride, tx_type_);
|
||||
}
|
||||
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
|
||||
inv_txfm_(out, dst, stride, tx_type_);
|
||||
}
|
||||
|
||||
fht_t fwd_txfm_;
|
||||
iht_t inv_txfm_;
|
||||
};
|
||||
|
||||
TEST_P(FwdTrans8x8HT, SignBiasCheck) {
|
||||
RunSignBiasCheck();
|
||||
}
|
||||
|
||||
TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) {
|
||||
RunRoundTripErrorCheck();
|
||||
}
|
||||
|
||||
TEST_P(FwdTrans8x8HT, ExtremalCheck) {
|
||||
RunExtremalCheck();
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, FwdTrans8x8DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct8x8_c, &vp9_short_idct8x8_add_c, 0)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, FwdTrans8x8HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 0),
|
||||
make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 1),
|
||||
make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 2),
|
||||
make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 3)));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, FwdTrans8x8DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct8x8_sse2, &vp9_short_idct8x8_add_sse2, 0)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, FwdTrans8x8HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 0),
|
||||
make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 1),
|
||||
make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 2),
|
||||
make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 3)));
|
||||
#endif
|
||||
} // namespace
|
||||
|
@@ -34,13 +34,17 @@ class IntraPredBase {
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetupMacroblock(uint8_t *data, int block_size, int stride,
|
||||
void SetupMacroblock(MACROBLOCKD *mbptr,
|
||||
MODE_INFO *miptr,
|
||||
uint8_t *data,
|
||||
int block_size,
|
||||
int stride,
|
||||
int num_planes) {
|
||||
memset(&mb_, 0, sizeof(mb_));
|
||||
memset(&mi_, 0, sizeof(mi_));
|
||||
mb_.up_available = 1;
|
||||
mb_.left_available = 1;
|
||||
mb_.mode_info_context = &mi_;
|
||||
mbptr_ = mbptr;
|
||||
miptr_ = miptr;
|
||||
mbptr_->up_available = 1;
|
||||
mbptr_->left_available = 1;
|
||||
mbptr_->mode_info_context = miptr_;
|
||||
stride_ = stride;
|
||||
block_size_ = block_size;
|
||||
num_planes_ = num_planes;
|
||||
@@ -63,14 +67,14 @@ class IntraPredBase {
|
||||
virtual void Predict(MB_PREDICTION_MODE mode) = 0;
|
||||
|
||||
void SetLeftUnavailable() {
|
||||
mb_.left_available = 0;
|
||||
mbptr_->left_available = 0;
|
||||
for (int p = 0; p < num_planes_; p++)
|
||||
for (int i = -1; i < block_size_; ++i)
|
||||
data_ptr_[p][stride_ * i - 1] = 129;
|
||||
}
|
||||
|
||||
void SetTopUnavailable() {
|
||||
mb_.up_available = 0;
|
||||
mbptr_->up_available = 0;
|
||||
for (int p = 0; p < num_planes_; p++)
|
||||
memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2);
|
||||
}
|
||||
@@ -96,13 +100,13 @@ class IntraPredBase {
|
||||
for (int p = 0; p < num_planes_; p++) {
|
||||
// calculate expected DC
|
||||
int expected;
|
||||
if (mb_.up_available || mb_.left_available) {
|
||||
int sum = 0, shift = BlockSizeLog2Min1() + mb_.up_available +
|
||||
mb_.left_available;
|
||||
if (mb_.up_available)
|
||||
if (mbptr_->up_available || mbptr_->left_available) {
|
||||
int sum = 0, shift = BlockSizeLog2Min1() + mbptr_->up_available +
|
||||
mbptr_->left_available;
|
||||
if (mbptr_->up_available)
|
||||
for (int x = 0; x < block_size_; x++)
|
||||
sum += data_ptr_[p][x - stride_];
|
||||
if (mb_.left_available)
|
||||
if (mbptr_->left_available)
|
||||
for (int y = 0; y < block_size_; y++)
|
||||
sum += data_ptr_[p][y * stride_ - 1];
|
||||
expected = (sum + (1 << (shift - 1))) >> shift;
|
||||
@@ -209,8 +213,8 @@ class IntraPredBase {
|
||||
}
|
||||
}
|
||||
|
||||
MACROBLOCKD mb_;
|
||||
MODE_INFO mi_;
|
||||
MACROBLOCKD *mbptr_;
|
||||
MODE_INFO *miptr_;
|
||||
uint8_t *data_ptr_[2]; // in the case of Y, only [0] is used
|
||||
int stride_;
|
||||
int block_size_;
|
||||
@@ -228,12 +232,18 @@ class IntraPredYTest : public ::testing::TestWithParam<intra_pred_y_fn_t>,
|
||||
protected IntraPredBase {
|
||||
public:
|
||||
static void SetUpTestCase() {
|
||||
mb_ = reinterpret_cast<MACROBLOCKD*>(
|
||||
vpx_memalign(32, sizeof(MACROBLOCKD)));
|
||||
mi_ = reinterpret_cast<MODE_INFO*>(
|
||||
vpx_memalign(32, sizeof(MODE_INFO)));
|
||||
data_array_ = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
}
|
||||
|
||||
static void TearDownTestCase() {
|
||||
vpx_free(data_array_);
|
||||
vpx_free(mi_);
|
||||
vpx_free(mb_);
|
||||
data_array_ = NULL;
|
||||
}
|
||||
|
||||
@@ -250,12 +260,12 @@ class IntraPredYTest : public ::testing::TestWithParam<intra_pred_y_fn_t>,
|
||||
|
||||
virtual void SetUp() {
|
||||
pred_fn_ = GetParam();
|
||||
SetupMacroblock(data_array_, kBlockSize, kStride, 1);
|
||||
SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 1);
|
||||
}
|
||||
|
||||
virtual void Predict(MB_PREDICTION_MODE mode) {
|
||||
mb_.mode_info_context->mbmi.mode = mode;
|
||||
REGISTER_STATE_CHECK(pred_fn_(&mb_,
|
||||
mbptr_->mode_info_context->mbmi.mode = mode;
|
||||
REGISTER_STATE_CHECK(pred_fn_(mbptr_,
|
||||
data_ptr_[0] - kStride,
|
||||
data_ptr_[0] - 1, kStride,
|
||||
data_ptr_[0], kStride));
|
||||
@@ -263,8 +273,12 @@ class IntraPredYTest : public ::testing::TestWithParam<intra_pred_y_fn_t>,
|
||||
|
||||
intra_pred_y_fn_t pred_fn_;
|
||||
static uint8_t* data_array_;
|
||||
static MACROBLOCKD * mb_;
|
||||
static MODE_INFO *mi_;
|
||||
};
|
||||
|
||||
MACROBLOCKD* IntraPredYTest::mb_ = NULL;
|
||||
MODE_INFO* IntraPredYTest::mi_ = NULL;
|
||||
uint8_t* IntraPredYTest::data_array_ = NULL;
|
||||
|
||||
TEST_P(IntraPredYTest, IntraPredTests) {
|
||||
@@ -299,12 +313,18 @@ class IntraPredUVTest : public ::testing::TestWithParam<intra_pred_uv_fn_t>,
|
||||
protected IntraPredBase {
|
||||
public:
|
||||
static void SetUpTestCase() {
|
||||
mb_ = reinterpret_cast<MACROBLOCKD*>(
|
||||
vpx_memalign(32, sizeof(MACROBLOCKD)));
|
||||
mi_ = reinterpret_cast<MODE_INFO*>(
|
||||
vpx_memalign(32, sizeof(MODE_INFO)));
|
||||
data_array_ = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
}
|
||||
|
||||
static void TearDownTestCase() {
|
||||
vpx_free(data_array_);
|
||||
vpx_free(mi_);
|
||||
vpx_free(mb_);
|
||||
data_array_ = NULL;
|
||||
}
|
||||
|
||||
@@ -322,12 +342,12 @@ class IntraPredUVTest : public ::testing::TestWithParam<intra_pred_uv_fn_t>,
|
||||
|
||||
virtual void SetUp() {
|
||||
pred_fn_ = GetParam();
|
||||
SetupMacroblock(data_array_, kBlockSize, kStride, 2);
|
||||
SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 2);
|
||||
}
|
||||
|
||||
virtual void Predict(MB_PREDICTION_MODE mode) {
|
||||
mb_.mode_info_context->mbmi.uv_mode = mode;
|
||||
pred_fn_(&mb_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
|
||||
mbptr_->mode_info_context->mbmi.uv_mode = mode;
|
||||
pred_fn_(mbptr_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
|
||||
data_ptr_[0] - 1, data_ptr_[1] - 1, kStride,
|
||||
data_ptr_[0], data_ptr_[1], kStride);
|
||||
}
|
||||
@@ -340,8 +360,12 @@ class IntraPredUVTest : public ::testing::TestWithParam<intra_pred_uv_fn_t>,
|
||||
// We use 9 lines so we have one line above us for top-prediction.
|
||||
// [0] = U, [1] = V
|
||||
static uint8_t* data_array_;
|
||||
static MACROBLOCKD* mb_;
|
||||
static MODE_INFO* mi_;
|
||||
};
|
||||
|
||||
MACROBLOCKD* IntraPredUVTest::mb_ = NULL;
|
||||
MODE_INFO* IntraPredUVTest::mi_ = NULL;
|
||||
uint8_t* IntraPredUVTest::data_array_ = NULL;
|
||||
|
||||
TEST_P(IntraPredUVTest, IntraPredTests) {
|
||||
|
@@ -524,8 +524,6 @@ b6524e4084d15b5d0caaa3d3d1368db30cbee69c vp90-2-03-deltaq.webm
|
||||
65f45ec9a55537aac76104818278e0978f94a678 vp90-2-03-deltaq.webm.md5
|
||||
4dbb87494c7f565ffc266c98d17d0d8c7a5c5aba vp90-2-05-resize.ivf
|
||||
7f6d8879336239a43dbb6c9f13178cb11cf7ed09 vp90-2-05-resize.ivf.md5
|
||||
bf61ddc1f716eba58d4c9837d4e91031d9ce4ffe vp90-2-06-bilinear.webm
|
||||
f6235f937552e11d8eb331ec55da6b3aa596b9ac vp90-2-06-bilinear.webm.md5
|
||||
495256cfd123fe777b2c0406862ed8468a1f4677 vp91-2-04-yv444.webm
|
||||
65e3a7ffef61ab340d9140f335ecc49125970c2c vp91-2-04-yv444.webm.md5
|
||||
|
||||
|
@@ -633,7 +633,5 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5
|
||||
|
@@ -160,7 +160,7 @@ const char *kVP9TestVectors[] = {
|
||||
"vp90-2-03-size-226x202.webm", "vp90-2-03-size-226x208.webm",
|
||||
"vp90-2-03-size-226x210.webm", "vp90-2-03-size-226x224.webm",
|
||||
"vp90-2-03-size-226x226.webm", "vp90-2-03-deltaq.webm",
|
||||
"vp90-2-05-resize.ivf", "vp90-2-06-bilinear.webm",
|
||||
"vp90-2-05-resize.ivf",
|
||||
#if CONFIG_NON420
|
||||
"vp91-2-04-yv444.webm"
|
||||
#endif
|
||||
|
@@ -78,34 +78,6 @@ static unsigned int subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
|
||||
return sse - (((int64_t) se * se) >> (l2w + l2h));
|
||||
}
|
||||
|
||||
static unsigned int subpel_avg_variance_ref(const uint8_t *ref,
|
||||
const uint8_t *src,
|
||||
const uint8_t *second_pred,
|
||||
int l2w, int l2h,
|
||||
int xoff, int yoff,
|
||||
unsigned int *sse_ptr) {
|
||||
int se = 0;
|
||||
unsigned int sse = 0;
|
||||
const int w = 1 << l2w, h = 1 << l2h;
|
||||
for (int y = 0; y < h; y++) {
|
||||
for (int x = 0; x < w; x++) {
|
||||
// bilinear interpolation at a 16th pel step
|
||||
const int a1 = ref[(w + 1) * (y + 0) + x + 0];
|
||||
const int a2 = ref[(w + 1) * (y + 0) + x + 1];
|
||||
const int b1 = ref[(w + 1) * (y + 1) + x + 0];
|
||||
const int b2 = ref[(w + 1) * (y + 1) + x + 1];
|
||||
const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
|
||||
const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
|
||||
const int r = a + (((b - a) * yoff + 8) >> 4);
|
||||
int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
|
||||
se += diff;
|
||||
sse += diff * diff;
|
||||
}
|
||||
}
|
||||
*sse_ptr = sse;
|
||||
return sse - (((int64_t) se * se) >> (l2w + l2h));
|
||||
}
|
||||
|
||||
template<typename VarianceFunctionType>
|
||||
class VarianceTest
|
||||
: public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
|
||||
@@ -190,6 +162,36 @@ void VarianceTest<VarianceFunctionType>::OneQuarterTest() {
|
||||
EXPECT_EQ(expected, var);
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
|
||||
unsigned int subpel_avg_variance_ref(const uint8_t *ref,
|
||||
const uint8_t *src,
|
||||
const uint8_t *second_pred,
|
||||
int l2w, int l2h,
|
||||
int xoff, int yoff,
|
||||
unsigned int *sse_ptr) {
|
||||
int se = 0;
|
||||
unsigned int sse = 0;
|
||||
const int w = 1 << l2w, h = 1 << l2h;
|
||||
for (int y = 0; y < h; y++) {
|
||||
for (int x = 0; x < w; x++) {
|
||||
// bilinear interpolation at a 16th pel step
|
||||
const int a1 = ref[(w + 1) * (y + 0) + x + 0];
|
||||
const int a2 = ref[(w + 1) * (y + 0) + x + 1];
|
||||
const int b1 = ref[(w + 1) * (y + 1) + x + 0];
|
||||
const int b2 = ref[(w + 1) * (y + 1) + x + 1];
|
||||
const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
|
||||
const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
|
||||
const int r = a + (((b - a) * yoff + 8) >> 4);
|
||||
int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
|
||||
se += diff;
|
||||
sse += diff * diff;
|
||||
}
|
||||
}
|
||||
*sse_ptr = sse;
|
||||
return sse - (((int64_t) se * se) >> (l2w + l2h));
|
||||
}
|
||||
|
||||
template<typename SubpelVarianceFunctionType>
|
||||
class SubpelVarianceTest
|
||||
: public ::testing::TestWithParam<tuple<int, int,
|
||||
@@ -280,6 +282,8 @@ void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
|
||||
}
|
||||
}
|
||||
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// VP8 test cases.
|
||||
|
||||
|
96
third_party/x86inc/x86inc.asm
vendored
96
third_party/x86inc/x86inc.asm
vendored
@@ -97,21 +97,91 @@
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%if WIN64
|
||||
%define PIC
|
||||
%elifidn __OUTPUT_FORMAT__,macho64
|
||||
%define PIC
|
||||
%elif ARCH_X86_64 == 0
|
||||
; x86_32 doesn't require PIC.
|
||||
; Some distros prefer shared objects to be PIC, but nothing breaks if
|
||||
; the code contains a few textrels, so we'll skip that complexity.
|
||||
%undef PIC
|
||||
%elif CONFIG_PIC
|
||||
%define PIC
|
||||
; PIC macros are copied from vpx_ports/x86_abi_support.asm. The "define PIC"
|
||||
; from original code is added in for 64bit.
|
||||
%ifidn __OUTPUT_FORMAT__,elf32
|
||||
%define ABI_IS_32BIT 1
|
||||
%elifidn __OUTPUT_FORMAT__,macho32
|
||||
%define ABI_IS_32BIT 1
|
||||
%elifidn __OUTPUT_FORMAT__,win32
|
||||
%define ABI_IS_32BIT 1
|
||||
%elifidn __OUTPUT_FORMAT__,aout
|
||||
%define ABI_IS_32BIT 1
|
||||
%else
|
||||
%define ABI_IS_32BIT 0
|
||||
%endif
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
%if CONFIG_PIC=1
|
||||
%ifidn __OUTPUT_FORMAT__,elf32
|
||||
%define GET_GOT_SAVE_ARG 1
|
||||
%define WRT_PLT wrt ..plt
|
||||
%macro GET_GOT 1
|
||||
extern _GLOBAL_OFFSET_TABLE_
|
||||
push %1
|
||||
call %%get_got
|
||||
%%sub_offset:
|
||||
jmp %%exitGG
|
||||
%%get_got:
|
||||
mov %1, [esp]
|
||||
add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
|
||||
ret
|
||||
%%exitGG:
|
||||
%undef GLOBAL
|
||||
%define GLOBAL(x) x + %1 wrt ..gotoff
|
||||
%undef RESTORE_GOT
|
||||
%define RESTORE_GOT pop %1
|
||||
%endmacro
|
||||
%elifidn __OUTPUT_FORMAT__,macho32
|
||||
%define GET_GOT_SAVE_ARG 1
|
||||
%macro GET_GOT 1
|
||||
push %1
|
||||
call %%get_got
|
||||
%%get_got:
|
||||
pop %1
|
||||
%undef GLOBAL
|
||||
%define GLOBAL(x) x + %1 - %%get_got
|
||||
%undef RESTORE_GOT
|
||||
%define RESTORE_GOT pop %1
|
||||
%endmacro
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%if ARCH_X86_64 == 0
|
||||
%undef PIC
|
||||
%endif
|
||||
|
||||
%else
|
||||
%macro GET_GOT 1
|
||||
%endmacro
|
||||
%define GLOBAL(x) rel x
|
||||
%define WRT_PLT wrt ..plt
|
||||
|
||||
%if WIN64
|
||||
%define PIC
|
||||
%elifidn __OUTPUT_FORMAT__,macho64
|
||||
%define PIC
|
||||
%elif CONFIG_PIC
|
||||
%define PIC
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifnmacro GET_GOT
|
||||
%macro GET_GOT 1
|
||||
%endmacro
|
||||
%define GLOBAL(x) x
|
||||
%endif
|
||||
%ifndef RESTORE_GOT
|
||||
%define RESTORE_GOT
|
||||
%endif
|
||||
%ifndef WRT_PLT
|
||||
%define WRT_PLT
|
||||
%endif
|
||||
|
||||
%ifdef PIC
|
||||
default rel
|
||||
%endif
|
||||
; Done with PIC macros
|
||||
|
||||
; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
|
||||
%ifndef __NASM_VER__
|
||||
@@ -528,6 +598,10 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
|
||||
global %1:function hidden
|
||||
%elifidn __OUTPUT_FORMAT__,elf64
|
||||
global %1:function hidden
|
||||
%elifidn __OUTPUT_FORMAT__,macho32
|
||||
global %1:private_extern
|
||||
%elifidn __OUTPUT_FORMAT__,macho64
|
||||
global %1:private_extern
|
||||
%else
|
||||
global %1
|
||||
%endif
|
||||
|
@@ -9,9 +9,7 @@
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "filter.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
|
||||
{
|
||||
|
@@ -12,11 +12,13 @@
|
||||
#ifndef FILTER_H
|
||||
#define FILTER_H
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
extern const short vp8_bilinear_filters[8][2];
|
||||
extern const short vp8_sub_pel_filters[8][6];
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]);
|
||||
|
||||
#endif
|
||||
|
@@ -124,7 +124,7 @@ static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
|
||||
b += 16;
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 4)->mv.as_int;
|
||||
return (cur_mb->bmi + (b - 4))->mv.as_int;
|
||||
}
|
||||
static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
|
||||
{
|
||||
|
@@ -138,14 +138,10 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre,
|
||||
{
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
pred_ptr[0] = ptr[0];
|
||||
pred_ptr[1] = ptr[1];
|
||||
pred_ptr[2] = ptr[2];
|
||||
pred_ptr[3] = ptr[3];
|
||||
#else
|
||||
*(uint32_t *)pred_ptr = *(uint32_t *)ptr ;
|
||||
#endif
|
||||
pred_ptr += pitch;
|
||||
ptr += pre_stride;
|
||||
}
|
||||
@@ -196,16 +192,12 @@ static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stri
|
||||
{
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst[0] = ptr[0];
|
||||
dst[1] = ptr[1];
|
||||
dst[2] = ptr[2];
|
||||
dst[3] = ptr[3];
|
||||
#else
|
||||
*(uint32_t *)dst = *(uint32_t *)ptr ;
|
||||
#endif
|
||||
dst += dst_stride;
|
||||
ptr += pre_stride;
|
||||
dst += dst_stride;
|
||||
ptr += pre_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -270,7 +262,7 @@ void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x)
|
||||
+ x->block[yoffset+4].bmi.mv.as_mv.row
|
||||
+ x->block[yoffset+5].bmi.mv.as_mv.row;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
@@ -279,7 +271,7 @@ void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x)
|
||||
+ x->block[yoffset+4].bmi.mv.as_mv.col
|
||||
+ x->block[yoffset+5].bmi.mv.as_mv.col;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
@@ -558,7 +550,7 @@ void build_4x4uvmvs(MACROBLOCKD *x)
|
||||
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row
|
||||
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
@@ -567,7 +559,7 @@ void build_4x4uvmvs(MACROBLOCKD *x)
|
||||
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col
|
||||
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
|
@@ -8,7 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vp8/common/x86/filter_x86.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) =
|
||||
{
|
||||
|
@@ -11,9 +11,15 @@
|
||||
#ifndef FILTER_X86_H
|
||||
#define FILTER_X86_H
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
/* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with
|
||||
* duplicated values */
|
||||
extern const short vp8_bilinear_filters_x86_4[8][8]; /* duplicated 4x */
|
||||
extern const short vp8_bilinear_filters_x86_8[8][16]; /* duplicated 8x */
|
||||
|
||||
/* duplicated 4x */
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]);
|
||||
|
||||
/* duplicated 8x */
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]);
|
||||
|
||||
#endif /* FILTER_X86_H */
|
||||
|
@@ -611,16 +611,12 @@ void vp8_sixtap_predict4x4_ssse3
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst_ptr[0] = src_ptr[0];
|
||||
dst_ptr[1] = src_ptr[1];
|
||||
dst_ptr[2] = src_ptr[2];
|
||||
dst_ptr[3] = src_ptr[3];
|
||||
#else
|
||||
*(uint32_t *)dst_ptr = *(uint32_t *)src_ptr ;
|
||||
#endif
|
||||
dst_ptr += dst_pitch;
|
||||
src_ptr += src_pixels_per_line;
|
||||
dst_ptr += dst_pitch;
|
||||
src_ptr += src_pixels_per_line;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -110,8 +110,8 @@ static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc)
|
||||
|
||||
static void read_mv(vp8_reader *r, MV *mv, const MV_CONTEXT *mvc)
|
||||
{
|
||||
mv->row = (short)(read_mvcomponent(r, mvc) << 1);
|
||||
mv->col = (short)(read_mvcomponent(r, ++mvc) << 1);
|
||||
mv->row = (short)(read_mvcomponent(r, mvc) * 2);
|
||||
mv->col = (short)(read_mvcomponent(r, ++mvc) * 2);
|
||||
}
|
||||
|
||||
|
||||
@@ -292,9 +292,9 @@ static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi,
|
||||
blockmv.as_int = 0;
|
||||
if( vp8_read(bc, prob[2]) )
|
||||
{
|
||||
blockmv.as_mv.row = read_mvcomponent(bc, &mvc[0]) << 1;
|
||||
blockmv.as_mv.row = read_mvcomponent(bc, &mvc[0]) * 2;
|
||||
blockmv.as_mv.row += best_mv.as_mv.row;
|
||||
blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) << 1;
|
||||
blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) * 2;
|
||||
blockmv.as_mv.col += best_mv.as_mv.col;
|
||||
}
|
||||
}
|
||||
|
@@ -576,7 +576,7 @@ static void decode_mb_rows(VP8D_COMP *pbi)
|
||||
|
||||
xd->left_available = 0;
|
||||
|
||||
xd->mb_to_top_edge = -((mb_row * 16)) << 3;
|
||||
xd->mb_to_top_edge = -((mb_row * 16) << 3);
|
||||
xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
|
||||
|
||||
xd->recon_above[0] = dst_buffer[0] + recon_yoffset;
|
||||
@@ -1026,7 +1026,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
const unsigned char *clear = data;
|
||||
if (pbi->decrypt_cb)
|
||||
{
|
||||
int n = data_end - data;
|
||||
int n = (int)(data_end - data);
|
||||
if (n > 10) n = 10;
|
||||
pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n);
|
||||
clear = clear_buffer;
|
||||
|
@@ -432,7 +432,7 @@ static void write_mv_ref
|
||||
assert(NEARESTMV <= m && m <= SPLITMV);
|
||||
#endif
|
||||
vp8_write_token(w, vp8_mv_ref_tree, p,
|
||||
vp8_mv_ref_encoding_array - NEARESTMV + m);
|
||||
vp8_mv_ref_encoding_array + (m - NEARESTMV));
|
||||
}
|
||||
|
||||
static void write_sub_mv_ref
|
||||
@@ -444,7 +444,7 @@ static void write_sub_mv_ref
|
||||
assert(LEFT4X4 <= m && m <= NEW4X4);
|
||||
#endif
|
||||
vp8_write_token(w, vp8_sub_mv_ref_tree, p,
|
||||
vp8_sub_mv_ref_encoding_array - LEFT4X4 + m);
|
||||
vp8_sub_mv_ref_encoding_array + (m - LEFT4X4));
|
||||
}
|
||||
|
||||
static void write_mv
|
||||
@@ -577,7 +577,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
|
||||
*/
|
||||
xd->mb_to_left_edge = -((mb_col * 16) << 3);
|
||||
xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
|
||||
xd->mb_to_top_edge = -((mb_row * 16)) << 3;
|
||||
xd->mb_to_top_edge = -((mb_row * 16) << 3);
|
||||
xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
|
||||
|
||||
#ifdef VP8_ENTROPY_STATS
|
||||
|
@@ -20,10 +20,10 @@ void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ((ip[0] + ip[3])<<3);
|
||||
b1 = ((ip[1] + ip[2])<<3);
|
||||
c1 = ((ip[1] - ip[2])<<3);
|
||||
d1 = ((ip[0] - ip[3])<<3);
|
||||
a1 = ((ip[0] + ip[3]) * 8);
|
||||
b1 = ((ip[1] + ip[2]) * 8);
|
||||
c1 = ((ip[1] - ip[2]) * 8);
|
||||
d1 = ((ip[0] - ip[3]) * 8);
|
||||
|
||||
op[0] = a1 + b1;
|
||||
op[2] = a1 - b1;
|
||||
@@ -72,10 +72,10 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ((ip[0] + ip[2])<<2);
|
||||
d1 = ((ip[1] + ip[3])<<2);
|
||||
c1 = ((ip[1] - ip[3])<<2);
|
||||
b1 = ((ip[0] - ip[2])<<2);
|
||||
a1 = ((ip[0] + ip[2]) * 4);
|
||||
d1 = ((ip[1] + ip[3]) * 4);
|
||||
c1 = ((ip[1] - ip[3]) * 4);
|
||||
b1 = ((ip[0] - ip[2]) * 4);
|
||||
|
||||
op[0] = a1 + d1 + (a1!=0);
|
||||
op[1] = b1 + c1;
|
||||
|
@@ -711,8 +711,8 @@ skip_motion_search:
|
||||
neutral_count++;
|
||||
}
|
||||
|
||||
d->bmi.mv.as_mv.row <<= 3;
|
||||
d->bmi.mv.as_mv.col <<= 3;
|
||||
d->bmi.mv.as_mv.row *= 8;
|
||||
d->bmi.mv.as_mv.col *= 8;
|
||||
this_error = motion_error;
|
||||
vp8_set_mbmode_and_mvs(x, NEWMV, &d->bmi.mv);
|
||||
vp8_encode_inter16x16y(x);
|
||||
@@ -909,13 +909,16 @@ extern const int vp8_bits_per_mb[2][QINDEX_RANGE];
|
||||
|
||||
static double bitcost( double prob )
|
||||
{
|
||||
return -(log( prob ) / log( 2.0 ));
|
||||
if (prob > 0.000122)
|
||||
return -log(prob) / log(2.0);
|
||||
else
|
||||
return 13.0;
|
||||
}
|
||||
static int64_t estimate_modemvcost(VP8_COMP *cpi,
|
||||
FIRSTPASS_STATS * fpstats)
|
||||
{
|
||||
int mv_cost;
|
||||
int mode_cost;
|
||||
int64_t mode_cost;
|
||||
|
||||
double av_pct_inter = fpstats->pcnt_inter / fpstats->count;
|
||||
double av_pct_motion = fpstats->pcnt_motion / fpstats->count;
|
||||
@@ -937,10 +940,9 @@ static int64_t estimate_modemvcost(VP8_COMP *cpi,
|
||||
/* Crude estimate of overhead cost from modes
|
||||
* << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb
|
||||
*/
|
||||
mode_cost =
|
||||
(int)( ( ((av_pct_inter - av_pct_motion) * zz_cost) +
|
||||
(av_pct_motion * motion_cost) +
|
||||
(av_intra * intra_cost) ) * cpi->common.MBs ) << 9;
|
||||
mode_cost =((((av_pct_inter - av_pct_motion) * zz_cost) +
|
||||
(av_pct_motion * motion_cost) +
|
||||
(av_intra * intra_cost)) * cpi->common.MBs) * 512;
|
||||
|
||||
return mv_cost + mode_cost;
|
||||
}
|
||||
|
@@ -210,7 +210,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
unsigned char *z = (*(b->base_src) + b->src);
|
||||
|
||||
int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
|
||||
int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2;
|
||||
int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
|
||||
int tr = br, tc = bc;
|
||||
unsigned int besterr;
|
||||
unsigned int left, right, up, down, diag;
|
||||
@@ -220,10 +220,14 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
unsigned int quarteriters = 4;
|
||||
int thismse;
|
||||
|
||||
int minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
|
||||
int maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
|
||||
int minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
|
||||
int maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
|
||||
int minc = MAX(x->mv_col_min * 4,
|
||||
(ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
|
||||
int maxc = MIN(x->mv_col_max * 4,
|
||||
(ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
|
||||
int minr = MAX(x->mv_row_min * 4,
|
||||
(ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
|
||||
int maxr = MIN(x->mv_row_max * 4,
|
||||
(ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
|
||||
|
||||
int y_stride;
|
||||
int offset;
|
||||
@@ -254,8 +258,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
|
||||
|
||||
/* central mv */
|
||||
bestmv->as_mv.row <<= 3;
|
||||
bestmv->as_mv.col <<= 3;
|
||||
bestmv->as_mv.row *= 8;
|
||||
bestmv->as_mv.col *= 8;
|
||||
|
||||
/* calculate central point error */
|
||||
besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
|
||||
@@ -337,8 +341,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
tc = bc;
|
||||
}
|
||||
|
||||
bestmv->as_mv.row = br << 1;
|
||||
bestmv->as_mv.col = bc << 1;
|
||||
bestmv->as_mv.row = br * 2;
|
||||
bestmv->as_mv.col = bc * 2;
|
||||
|
||||
if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
|
||||
(abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
|
||||
@@ -699,8 +703,8 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
#endif
|
||||
|
||||
/* central mv */
|
||||
bestmv->as_mv.row <<= 3;
|
||||
bestmv->as_mv.col <<= 3;
|
||||
bestmv->as_mv.row *= 8;
|
||||
bestmv->as_mv.col *= 8;
|
||||
startmv = *bestmv;
|
||||
|
||||
/* calculate central point error */
|
||||
@@ -1315,8 +1319,8 @@ int vp8_diamond_search_sadx4
|
||||
(*num00)++;
|
||||
}
|
||||
|
||||
this_mv.as_mv.row = best_mv->as_mv.row << 3;
|
||||
this_mv.as_mv.col = best_mv->as_mv.col << 3;
|
||||
this_mv.as_mv.row = best_mv->as_mv.row * 8;
|
||||
this_mv.as_mv.col = best_mv->as_mv.col * 8;
|
||||
|
||||
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
|
||||
+ mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
|
||||
@@ -1709,8 +1713,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
}
|
||||
}
|
||||
|
||||
this_mv.as_mv.row = best_mv->as_mv.row << 3;
|
||||
this_mv.as_mv.col = best_mv->as_mv.col << 3;
|
||||
this_mv.as_mv.row = best_mv->as_mv.row * 8;
|
||||
this_mv.as_mv.col = best_mv->as_mv.col * 8;
|
||||
|
||||
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
|
||||
+ mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
|
||||
@@ -1905,8 +1909,8 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
}
|
||||
}
|
||||
|
||||
this_mv.as_mv.row = ref_mv->as_mv.row << 3;
|
||||
this_mv.as_mv.col = ref_mv->as_mv.col << 3;
|
||||
this_mv.as_mv.row = ref_mv->as_mv.row * 8;
|
||||
this_mv.as_mv.col = ref_mv->as_mv.col * 8;
|
||||
|
||||
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
|
||||
+ mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
|
||||
|
@@ -935,7 +935,7 @@ int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
|
||||
assert(NEARESTMV <= m && m <= SPLITMV);
|
||||
vp8_mv_ref_probs(p, near_mv_ref_ct);
|
||||
return vp8_cost_token(vp8_mv_ref_tree, p,
|
||||
vp8_mv_ref_encoding_array - NEARESTMV + m);
|
||||
vp8_mv_ref_encoding_array + (m - NEARESTMV));
|
||||
}
|
||||
|
||||
void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv)
|
||||
|
@@ -20,26 +20,28 @@ extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src,
|
||||
int16_t skip_adding,
|
||||
uint8_t *dest,
|
||||
int dest_stride);
|
||||
extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input,
|
||||
extern void vp9_short_idct16x16_10_add_neon_pass1(int16_t *input,
|
||||
int16_t *output,
|
||||
int output_stride);
|
||||
extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,
|
||||
extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
|
||||
int16_t *output,
|
||||
int16_t *pass1Output,
|
||||
int16_t skip_adding,
|
||||
uint8_t *dest,
|
||||
int dest_stride);
|
||||
extern void save_neon_registers();
|
||||
extern void restore_neon_registers();
|
||||
|
||||
/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
|
||||
extern void vp9_push_neon(int64_t *store);
|
||||
extern void vp9_pop_neon(int64_t *store);
|
||||
|
||||
void vp9_short_idct16x16_add_neon(int16_t *input,
|
||||
uint8_t *dest, int dest_stride) {
|
||||
int64_t store_reg[8];
|
||||
int16_t pass1_output[16*16] = {0};
|
||||
int16_t row_idct_output[16*16] = {0};
|
||||
|
||||
// save d8-d15 register values.
|
||||
save_neon_registers();
|
||||
vp9_push_neon(store_reg);
|
||||
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
@@ -102,28 +104,29 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
|
||||
dest_stride);
|
||||
|
||||
// restore d8-d15 register values.
|
||||
restore_neon_registers();
|
||||
vp9_pop_neon(store_reg);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void vp9_short_idct10_16x16_add_neon(int16_t *input,
|
||||
void vp9_short_idct16x16_10_add_neon(int16_t *input,
|
||||
uint8_t *dest, int dest_stride) {
|
||||
int64_t store_reg[8];
|
||||
int16_t pass1_output[16*16] = {0};
|
||||
int16_t row_idct_output[16*16] = {0};
|
||||
|
||||
// save d8-d15 register values.
|
||||
save_neon_registers();
|
||||
vp9_push_neon(store_reg);
|
||||
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8);
|
||||
vp9_short_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
vp9_short_idct10_16x16_add_neon_pass2(input+1,
|
||||
vp9_short_idct16x16_10_add_neon_pass2(input+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
0,
|
||||
@@ -163,7 +166,7 @@ void vp9_short_idct10_16x16_add_neon(int16_t *input,
|
||||
dest_stride);
|
||||
|
||||
// restore d8-d15 register values.
|
||||
restore_neon_registers();
|
||||
vp9_pop_neon(store_reg);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@@ -1,47 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vp9/common/vp9_common.h"
|
||||
|
||||
// defined in vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
|
||||
extern void idct32_transpose_and_transform(int16_t *transpose_buffer,
|
||||
int16_t *output, int16_t *input);
|
||||
extern void idct32_combine_add(uint8_t *dest, int16_t *out, int dest_stride);
|
||||
|
||||
|
||||
// defined in vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
|
||||
extern void save_neon_registers();
|
||||
extern void restore_neon_registers();
|
||||
|
||||
void vp9_short_idct32x32_add_neon(int16_t *input, uint8_t *dest,
|
||||
int dest_stride) {
|
||||
// TODO(cd): move the creation of these buffers within the ASM file
|
||||
// internal buffer used to transpose 8 lines into before transforming them
|
||||
int16_t transpose_buffer[32 * 8];
|
||||
// results of the first pass (transpose and transform rows)
|
||||
int16_t pass1[32 * 32];
|
||||
// results of the second pass (transpose and transform columns)
|
||||
int16_t pass2[32 * 32];
|
||||
|
||||
// save register we need to preserve
|
||||
save_neon_registers();
|
||||
// process rows
|
||||
idct32_transpose_and_transform(transpose_buffer, pass1, input);
|
||||
// process columns
|
||||
// TODO(cd): do these two steps/passes within the ASM file
|
||||
idct32_transpose_and_transform(transpose_buffer, pass2, pass1);
|
||||
// combine and add to dest
|
||||
// TODO(cd): integrate this within the last storage step of the second pass
|
||||
idct32_combine_add(dest, pass2, dest_stride);
|
||||
// restore register we need to preserve
|
||||
restore_neon_registers();
|
||||
}
|
||||
|
||||
// TODO(cd): Eliminate this file altogether when everything is in ASM file
|
36
vp9/common/arm/neon/vp9_save_reg_neon.asm
Normal file
36
vp9/common/arm/neon/vp9_save_reg_neon.asm
Normal file
@@ -0,0 +1,36 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp9_push_neon|
|
||||
EXPORT |vp9_pop_neon|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
|vp9_push_neon| PROC
|
||||
vst1.i64 {d8, d9, d10, d11}, [r0]!
|
||||
vst1.i64 {d12, d13, d14, d15}, [r0]!
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
|vp9_pop_neon| PROC
|
||||
vld1.i64 {d8, d9, d10, d11}, [r0]!
|
||||
vld1.i64 {d12, d13, d14, d15}, [r0]!
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
||||
|
@@ -10,10 +10,8 @@
|
||||
|
||||
EXPORT |vp9_short_idct16x16_add_neon_pass1|
|
||||
EXPORT |vp9_short_idct16x16_add_neon_pass2|
|
||||
EXPORT |vp9_short_idct10_16x16_add_neon_pass1|
|
||||
EXPORT |vp9_short_idct10_16x16_add_neon_pass2|
|
||||
EXPORT |save_neon_registers|
|
||||
EXPORT |restore_neon_registers|
|
||||
EXPORT |vp9_short_idct16x16_10_add_neon_pass1|
|
||||
EXPORT |vp9_short_idct16x16_10_add_neon_pass2|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
@@ -788,7 +786,7 @@ end_idct16x16_pass2
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct16x16_add_neon_pass2|
|
||||
|
||||
;void |vp9_short_idct10_16x16_add_neon_pass1|(int16_t *input,
|
||||
;void |vp9_short_idct16x16_10_add_neon_pass1|(int16_t *input,
|
||||
; int16_t *output, int output_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
@@ -798,7 +796,7 @@ end_idct16x16_pass2
|
||||
; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output
|
||||
; will be stored back into q8-q15 registers. This function will touch q0-q7
|
||||
; registers and use them as buffer during calculation.
|
||||
|vp9_short_idct10_16x16_add_neon_pass1| PROC
|
||||
|vp9_short_idct16x16_10_add_neon_pass1| PROC
|
||||
|
||||
; TODO(hkuang): Find a better way to load the elements.
|
||||
; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15
|
||||
@@ -907,9 +905,9 @@ end_idct16x16_pass2
|
||||
vst1.64 {d31}, [r1], r2
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct10_16x16_add_neon_pass1|
|
||||
ENDP ; |vp9_short_idct16x16_10_add_neon_pass1|
|
||||
|
||||
;void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,
|
||||
;void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
|
||||
; int16_t *output,
|
||||
; int16_t *pass1Output,
|
||||
; int16_t skip_adding,
|
||||
@@ -926,7 +924,7 @@ end_idct16x16_pass2
|
||||
; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output
|
||||
; will be stored back into q8-q15 registers. This function will touch q0-q7
|
||||
; registers and use them as buffer during calculation.
|
||||
|vp9_short_idct10_16x16_add_neon_pass2| PROC
|
||||
|vp9_short_idct16x16_10_add_neon_pass2| PROC
|
||||
push {r3-r9}
|
||||
|
||||
; TODO(hkuang): Find a better way to load the elements.
|
||||
@@ -1177,15 +1175,5 @@ end_idct16x16_pass2
|
||||
end_idct10_16x16_pass2
|
||||
pop {r3-r9}
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct10_16x16_add_neon_pass2|
|
||||
;void |save_neon_registers|()
|
||||
|save_neon_registers| PROC
|
||||
vpush {d8-d15}
|
||||
bx lr
|
||||
ENDP ; |save_registers|
|
||||
;void |restore_neon_registers|()
|
||||
|restore_neon_registers| PROC
|
||||
vpop {d8-d15}
|
||||
bx lr
|
||||
ENDP ; |restore_registers|
|
||||
ENDP ; |vp9_short_idct16x16_10_add_neon_pass2|
|
||||
END
|
||||
|
@@ -43,8 +43,7 @@ cospi_30_64 EQU 1606
|
||||
cospi_31_64 EQU 804
|
||||
|
||||
|
||||
EXPORT |idct32_transpose_and_transform|
|
||||
EXPORT |idct32_combine_add|
|
||||
EXPORT |vp9_short_idct32x32_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
@@ -100,6 +99,142 @@ cospi_31_64 EQU 804
|
||||
vst1.16 {$reg2}, [r1]
|
||||
MEND
|
||||
; --------------------------------------------------------------------------
|
||||
; Combine-add results with current destination content
|
||||
; q6-q9 contain the results (out[j * 32 + 0-31])
|
||||
MACRO
|
||||
STORE_COMBINE_CENTER_RESULTS
|
||||
; load dest[j * dest_stride + 0-31]
|
||||
vld1.s16 {d8}, [r10], r2
|
||||
vld1.s16 {d11}, [r9], r11
|
||||
vld1.s16 {d9}, [r10]
|
||||
vld1.s16 {d10}, [r9]
|
||||
; ROUND_POWER_OF_TWO
|
||||
vrshr.s16 q7, q7, #6
|
||||
vrshr.s16 q8, q8, #6
|
||||
vrshr.s16 q9, q9, #6
|
||||
vrshr.s16 q6, q6, #6
|
||||
; add to dest[j * dest_stride + 0-31]
|
||||
vaddw.u8 q7, q7, d9
|
||||
vaddw.u8 q8, q8, d10
|
||||
vaddw.u8 q9, q9, d11
|
||||
vaddw.u8 q6, q6, d8
|
||||
; clip pixel
|
||||
vqmovun.s16 d9, q7
|
||||
vqmovun.s16 d10, q8
|
||||
vqmovun.s16 d11, q9
|
||||
vqmovun.s16 d8, q6
|
||||
; store back into dest[j * dest_stride + 0-31]
|
||||
vst1.16 {d9}, [r10], r11
|
||||
vst1.16 {d10}, [r9], r2
|
||||
vst1.16 {d8}, [r10]
|
||||
vst1.16 {d11}, [r9]
|
||||
; update pointers (by dest_stride * 2)
|
||||
sub r9, r9, r2, lsl #1
|
||||
add r10, r10, r2, lsl #1
|
||||
MEND
|
||||
; --------------------------------------------------------------------------
|
||||
; Combine-add results with current destination content
|
||||
; q6-q9 contain the results (out[j * 32 + 0-31])
|
||||
MACRO
|
||||
STORE_COMBINE_CENTER_RESULTS_LAST
|
||||
; load dest[j * dest_stride + 0-31]
|
||||
vld1.s16 {d8}, [r10], r2
|
||||
vld1.s16 {d11}, [r9], r11
|
||||
vld1.s16 {d9}, [r10]
|
||||
vld1.s16 {d10}, [r9]
|
||||
; ROUND_POWER_OF_TWO
|
||||
vrshr.s16 q7, q7, #6
|
||||
vrshr.s16 q8, q8, #6
|
||||
vrshr.s16 q9, q9, #6
|
||||
vrshr.s16 q6, q6, #6
|
||||
; add to dest[j * dest_stride + 0-31]
|
||||
vaddw.u8 q7, q7, d9
|
||||
vaddw.u8 q8, q8, d10
|
||||
vaddw.u8 q9, q9, d11
|
||||
vaddw.u8 q6, q6, d8
|
||||
; clip pixel
|
||||
vqmovun.s16 d9, q7
|
||||
vqmovun.s16 d10, q8
|
||||
vqmovun.s16 d11, q9
|
||||
vqmovun.s16 d8, q6
|
||||
; store back into dest[j * dest_stride + 0-31]
|
||||
vst1.16 {d9}, [r10], r11
|
||||
vst1.16 {d10}, [r9], r2
|
||||
vst1.16 {d8}, [r10]!
|
||||
vst1.16 {d11}, [r9]!
|
||||
; update pointers (by dest_stride * 2)
|
||||
sub r9, r9, r2, lsl #1
|
||||
add r10, r10, r2, lsl #1
|
||||
MEND
|
||||
; --------------------------------------------------------------------------
|
||||
; Combine-add results with current destination content
|
||||
; q4-q7 contain the results (out[j * 32 + 0-31])
|
||||
MACRO
|
||||
STORE_COMBINE_EXTREME_RESULTS
|
||||
; load dest[j * dest_stride + 0-31]
|
||||
vld1.s16 {d4}, [r7], r2
|
||||
vld1.s16 {d7}, [r6], r11
|
||||
vld1.s16 {d5}, [r7]
|
||||
vld1.s16 {d6}, [r6]
|
||||
; ROUND_POWER_OF_TWO
|
||||
vrshr.s16 q5, q5, #6
|
||||
vrshr.s16 q6, q6, #6
|
||||
vrshr.s16 q7, q7, #6
|
||||
vrshr.s16 q4, q4, #6
|
||||
; add to dest[j * dest_stride + 0-31]
|
||||
vaddw.u8 q5, q5, d5
|
||||
vaddw.u8 q6, q6, d6
|
||||
vaddw.u8 q7, q7, d7
|
||||
vaddw.u8 q4, q4, d4
|
||||
; clip pixel
|
||||
vqmovun.s16 d5, q5
|
||||
vqmovun.s16 d6, q6
|
||||
vqmovun.s16 d7, q7
|
||||
vqmovun.s16 d4, q4
|
||||
; store back into dest[j * dest_stride + 0-31]
|
||||
vst1.16 {d5}, [r7], r11
|
||||
vst1.16 {d6}, [r6], r2
|
||||
vst1.16 {d7}, [r6]
|
||||
vst1.16 {d4}, [r7]
|
||||
; update pointers (by dest_stride * 2)
|
||||
sub r6, r6, r2, lsl #1
|
||||
add r7, r7, r2, lsl #1
|
||||
MEND
|
||||
; --------------------------------------------------------------------------
|
||||
; Combine-add results with current destination content
|
||||
; q4-q7 contain the results (out[j * 32 + 0-31])
|
||||
MACRO
|
||||
STORE_COMBINE_EXTREME_RESULTS_LAST
|
||||
; load dest[j * dest_stride + 0-31]
|
||||
vld1.s16 {d4}, [r7], r2
|
||||
vld1.s16 {d7}, [r6], r11
|
||||
vld1.s16 {d5}, [r7]
|
||||
vld1.s16 {d6}, [r6]
|
||||
; ROUND_POWER_OF_TWO
|
||||
vrshr.s16 q5, q5, #6
|
||||
vrshr.s16 q6, q6, #6
|
||||
vrshr.s16 q7, q7, #6
|
||||
vrshr.s16 q4, q4, #6
|
||||
; add to dest[j * dest_stride + 0-31]
|
||||
vaddw.u8 q5, q5, d5
|
||||
vaddw.u8 q6, q6, d6
|
||||
vaddw.u8 q7, q7, d7
|
||||
vaddw.u8 q4, q4, d4
|
||||
; clip pixel
|
||||
vqmovun.s16 d5, q5
|
||||
vqmovun.s16 d6, q6
|
||||
vqmovun.s16 d7, q7
|
||||
vqmovun.s16 d4, q4
|
||||
; store back into dest[j * dest_stride + 0-31]
|
||||
vst1.16 {d5}, [r7], r11
|
||||
vst1.16 {d6}, [r6], r2
|
||||
vst1.16 {d7}, [r6]!
|
||||
vst1.16 {d4}, [r7]!
|
||||
; update pointers (by dest_stride * 2)
|
||||
sub r6, r6, r2, lsl #1
|
||||
add r7, r7, r2, lsl #1
|
||||
MEND
|
||||
; --------------------------------------------------------------------------
|
||||
; Touches q8-q12, q15 (q13-q14 are preserved)
|
||||
; valid output registers are anything but q8-q11
|
||||
MACRO
|
||||
@@ -110,12 +245,12 @@ cospi_31_64 EQU 804
|
||||
; additions/substractions before the multiplies.
|
||||
; generate the constants
|
||||
; generate scalar constants
|
||||
mov r3, #$first_constant & 0xFF00
|
||||
add r3, #$first_constant & 0x00FF
|
||||
mov r8, #$first_constant & 0xFF00
|
||||
mov r12, #$second_constant & 0xFF00
|
||||
add r8, #$first_constant & 0x00FF
|
||||
add r12, #$second_constant & 0x00FF
|
||||
; generate vector constants
|
||||
vdup.16 d30, r3
|
||||
vdup.16 d30, r8
|
||||
vdup.16 d31, r12
|
||||
; (used) two for inputs (regA-regD), one for constants (q15)
|
||||
; do some multiplications (ordered for maximum latency hiding)
|
||||
@@ -153,15 +288,22 @@ cospi_31_64 EQU 804
|
||||
MEND
|
||||
; --------------------------------------------------------------------------
|
||||
|
||||
;void idct32_transpose_and_transform(int16_t *transpose_buffer, int16_t *output, int16_t *input);
|
||||
;void vp9_short_idct32x32_add_neon(int16_t *input, uint8_t *dest, int dest_stride);
|
||||
;
|
||||
; r0 int16_t *transpose_buffer
|
||||
; r1 int16_t *output
|
||||
; r2 int16_t *input)
|
||||
; TODO(cd): have more logical parameter ordering but this issue will disappear
|
||||
; when functions are combined.
|
||||
; r0 int16_t *input,
|
||||
; r1 uint8_t *dest,
|
||||
; r2 int dest_stride)
|
||||
; loop counters
|
||||
; r4 bands loop counter
|
||||
; r5 pass loop counter
|
||||
; r8 transpose loop counter
|
||||
; combine-add pointers
|
||||
; r6 dest + 31 * dest_stride, descending (30, 29, 28, ...)
|
||||
; r7 dest + 0 * dest_stride, ascending (1, 2, 3, ...)
|
||||
; r9 dest + 15 * dest_stride, descending (14, 13, 12, ...)
|
||||
; r10 dest + 16 * dest_stride, ascending (17, 18, 19, ...)
|
||||
|
||||
|idct32_transpose_and_transform| PROC
|
||||
|vp9_short_idct32x32_add_neon| PROC
|
||||
; This function does one pass of idct32x32 transform.
|
||||
;
|
||||
; This is done by transposing the input and then doing a 1d transform on
|
||||
@@ -171,43 +313,73 @@ cospi_31_64 EQU 804
|
||||
; The 1d transform is done by looping over bands of eight columns (the
|
||||
; idct32_bands loop). For each band, the transform input transposition
|
||||
; is done on demand, one band of four 8x8 matrices at a time. The four
|
||||
; matrices are trsnposed by pairs (the idct32_transpose_pair loop).
|
||||
push {r4}
|
||||
mov r4, #0 ; initialize bands loop counter
|
||||
; matrices are transposed by pairs (the idct32_transpose_pair loop).
|
||||
push {r4-r11}
|
||||
vpush {d8-d15}
|
||||
; stack operation
|
||||
; internal buffer used to transpose 8 lines into before transforming them
|
||||
; int16_t transpose_buffer[32 * 8];
|
||||
; at sp + [4096, 4607]
|
||||
; results of the first pass (transpose and transform rows)
|
||||
; int16_t pass1[32 * 32];
|
||||
; at sp + [0, 2047]
|
||||
; results of the second pass (transpose and transform columns)
|
||||
; int16_t pass2[32 * 32];
|
||||
; at sp + [2048, 4095]
|
||||
sub sp, sp, #512+2048+2048
|
||||
|
||||
; r6 = dest + 31 * dest_stride
|
||||
; r7 = dest + 0 * dest_stride
|
||||
; r9 = dest + 15 * dest_stride
|
||||
; r10 = dest + 16 * dest_stride
|
||||
rsb r6, r2, r2, lsl #5
|
||||
rsb r9, r2, r2, lsl #4
|
||||
add r10, r1, r2, lsl #4
|
||||
mov r7, r1
|
||||
add r6, r6, r1
|
||||
add r9, r9, r1
|
||||
; r11 = -dest_stride
|
||||
neg r11, r2
|
||||
; r3 = input
|
||||
mov r3, r0
|
||||
; parameters for first pass
|
||||
; r0 = transpose_buffer[32 * 8]
|
||||
add r0, sp, #4096
|
||||
; r1 = pass1[32 * 32]
|
||||
mov r1, sp
|
||||
|
||||
mov r5, #0 ; initialize pass loop counter
|
||||
idct32_pass_loop
|
||||
mov r4, #4 ; initialize bands loop counter
|
||||
idct32_bands_loop
|
||||
; TODO(cd) get rid of these push/pop by properly adjusting register
|
||||
; content at end of loop
|
||||
push {r0}
|
||||
push {r1}
|
||||
push {r2}
|
||||
mov r3, #0 ; initialize transpose loop counter
|
||||
mov r8, #2 ; initialize transpose loop counter
|
||||
idct32_transpose_pair_loop
|
||||
; Load two horizontally consecutive 8x8 16bit data matrices. The first one
|
||||
; into q0-q7 and the second one into q8-q15. There is a stride of 64,
|
||||
; adjusted to 32 because of the two post-increments.
|
||||
vld1.s16 {q8}, [r2]!
|
||||
vld1.s16 {q0}, [r2]!
|
||||
add r2, #32
|
||||
vld1.s16 {q9}, [r2]!
|
||||
vld1.s16 {q1}, [r2]!
|
||||
add r2, #32
|
||||
vld1.s16 {q10}, [r2]!
|
||||
vld1.s16 {q2}, [r2]!
|
||||
add r2, #32
|
||||
vld1.s16 {q11}, [r2]!
|
||||
vld1.s16 {q3}, [r2]!
|
||||
add r2, #32
|
||||
vld1.s16 {q12}, [r2]!
|
||||
vld1.s16 {q4}, [r2]!
|
||||
add r2, #32
|
||||
vld1.s16 {q13}, [r2]!
|
||||
vld1.s16 {q5}, [r2]!
|
||||
add r2, #32
|
||||
vld1.s16 {q14}, [r2]!
|
||||
vld1.s16 {q6}, [r2]!
|
||||
add r2, #32
|
||||
vld1.s16 {q15}, [r2]!
|
||||
vld1.s16 {q7}, [r2]!
|
||||
vld1.s16 {q8}, [r3]!
|
||||
vld1.s16 {q0}, [r3]!
|
||||
add r3, #32
|
||||
vld1.s16 {q9}, [r3]!
|
||||
vld1.s16 {q1}, [r3]!
|
||||
add r3, #32
|
||||
vld1.s16 {q10}, [r3]!
|
||||
vld1.s16 {q2}, [r3]!
|
||||
add r3, #32
|
||||
vld1.s16 {q11}, [r3]!
|
||||
vld1.s16 {q3}, [r3]!
|
||||
add r3, #32
|
||||
vld1.s16 {q12}, [r3]!
|
||||
vld1.s16 {q4}, [r3]!
|
||||
add r3, #32
|
||||
vld1.s16 {q13}, [r3]!
|
||||
vld1.s16 {q5}, [r3]!
|
||||
add r3, #32
|
||||
vld1.s16 {q14}, [r3]!
|
||||
vld1.s16 {q6}, [r3]!
|
||||
add r3, #32
|
||||
vld1.s16 {q15}, [r3]!
|
||||
vld1.s16 {q7}, [r3]!
|
||||
|
||||
; Transpose the two 8x8 16bit data matrices.
|
||||
vswp d17, d24
|
||||
@@ -255,11 +427,13 @@ idct32_transpose_pair_loop
|
||||
vst1.16 {q7}, [r0]!
|
||||
|
||||
; increment pointers by adjusted stride (not necessary for r0/out)
|
||||
sub r2, r2, #8*32*2-32-16*2
|
||||
; go back by 7*32 for the seven lines moved fully by read and add
|
||||
; go back by 32 for the eigth line only read
|
||||
; advance by 16*2 to go the next pair
|
||||
sub r3, r3, #7*32*2 + 32 - 16*2
|
||||
; transpose pair loop processing
|
||||
add r3, r3, #1
|
||||
cmp r3, #1
|
||||
BLE idct32_transpose_pair_loop
|
||||
subs r8, r8, #1
|
||||
bne idct32_transpose_pair_loop
|
||||
|
||||
; restore r0/input to its original value
|
||||
sub r0, r0, #32*8*2
|
||||
@@ -815,21 +989,26 @@ idct32_transpose_pair_loop
|
||||
vadd.s16 q9, q5, q0
|
||||
vsub.s16 q6, q5, q0
|
||||
vsub.s16 q7, q4, q1
|
||||
STORE_IN_OUTPUT 17, 17, 16, q7, q6
|
||||
STORE_IN_OUTPUT 16, 15, 14, q9, q8
|
||||
|
||||
cmp r5, #0
|
||||
bgt idct32_bands_end_2nd_pass
|
||||
|
||||
idct32_bands_end_1st_pass
|
||||
STORE_IN_OUTPUT 17, 16, 17, q6, q7
|
||||
STORE_IN_OUTPUT 17, 14, 15, q8, q9
|
||||
; --------------------------------------------------------------------------
|
||||
; part of final stage
|
||||
;output[ 0 * 32] = step1b[0][i] + step1b[31][i];
|
||||
;output[ 1 * 32] = step1b[1][i] + step1b[30][i];
|
||||
;output[30 * 32] = step1b[1][i] - step1b[30][i];
|
||||
;output[31 * 32] = step1b[0][i] - step1b[31][i];
|
||||
LOAD_FROM_OUTPUT 14, 30, 31, q0, q1
|
||||
LOAD_FROM_OUTPUT 15, 30, 31, q0, q1
|
||||
vadd.s16 q4, q2, q1
|
||||
vadd.s16 q5, q3, q0
|
||||
vsub.s16 q6, q3, q0
|
||||
vsub.s16 q7, q2, q1
|
||||
STORE_IN_OUTPUT 31, 31, 30, q7, q6
|
||||
STORE_IN_OUTPUT 30, 0, 1, q4, q5
|
||||
STORE_IN_OUTPUT 31, 30, 31, q6, q7
|
||||
STORE_IN_OUTPUT 31, 0, 1, q4, q5
|
||||
; --------------------------------------------------------------------------
|
||||
; part of stage 7
|
||||
;step1[2] = step1b[2][i] + step1b[13][i];
|
||||
@@ -848,25 +1027,25 @@ idct32_transpose_pair_loop
|
||||
;output[18 * 32] = step1b[13][i] - step1b[18][i];
|
||||
;output[19 * 32] = step1b[12][i] - step1b[19][i];
|
||||
LOAD_FROM_OUTPUT 13, 18, 19, q0, q1
|
||||
vadd.s16 q6, q4, q1
|
||||
vadd.s16 q7, q5, q0
|
||||
vsub.s16 q8, q5, q0
|
||||
vsub.s16 q9, q4, q1
|
||||
STORE_IN_OUTPUT 19, 19, 18, q9, q8
|
||||
STORE_IN_OUTPUT 18, 13, 12, q7, q6
|
||||
vadd.s16 q8, q4, q1
|
||||
vadd.s16 q9, q5, q0
|
||||
vsub.s16 q6, q5, q0
|
||||
vsub.s16 q7, q4, q1
|
||||
STORE_IN_OUTPUT 19, 18, 19, q6, q7
|
||||
STORE_IN_OUTPUT 19, 12, 13, q8, q9
|
||||
; --------------------------------------------------------------------------
|
||||
; part of final stage
|
||||
;output[ 2 * 32] = step1b[2][i] + step1b[29][i];
|
||||
;output[ 3 * 32] = step1b[3][i] + step1b[28][i];
|
||||
;output[28 * 32] = step1b[3][i] - step1b[28][i];
|
||||
;output[29 * 32] = step1b[2][i] - step1b[29][i];
|
||||
LOAD_FROM_OUTPUT 12, 28, 29, q0, q1
|
||||
LOAD_FROM_OUTPUT 13, 28, 29, q0, q1
|
||||
vadd.s16 q4, q2, q1
|
||||
vadd.s16 q5, q3, q0
|
||||
vsub.s16 q6, q3, q0
|
||||
vsub.s16 q7, q2, q1
|
||||
STORE_IN_OUTPUT 29, 29, 28, q7, q6
|
||||
STORE_IN_OUTPUT 28, 2, 3, q4, q5
|
||||
STORE_IN_OUTPUT 29, 28, 29, q6, q7
|
||||
STORE_IN_OUTPUT 29, 2, 3, q4, q5
|
||||
; --------------------------------------------------------------------------
|
||||
; part of stage 7
|
||||
;step1[4] = step1b[4][i] + step1b[11][i];
|
||||
@@ -885,25 +1064,25 @@ idct32_transpose_pair_loop
|
||||
;output[20 * 32] = step1b[11][i] - step1b[20][i];
|
||||
;output[21 * 32] = step1b[10][i] - step1b[21][i];
|
||||
LOAD_FROM_OUTPUT 11, 20, 21, q0, q1
|
||||
vadd.s16 q6, q4, q1
|
||||
vadd.s16 q7, q5, q0
|
||||
vsub.s16 q8, q5, q0
|
||||
vsub.s16 q9, q4, q1
|
||||
STORE_IN_OUTPUT 21, 21, 20, q9, q8
|
||||
STORE_IN_OUTPUT 20, 11, 10, q7, q6
|
||||
vadd.s16 q8, q4, q1
|
||||
vadd.s16 q9, q5, q0
|
||||
vsub.s16 q6, q5, q0
|
||||
vsub.s16 q7, q4, q1
|
||||
STORE_IN_OUTPUT 21, 20, 21, q6, q7
|
||||
STORE_IN_OUTPUT 21, 10, 11, q8, q9
|
||||
; --------------------------------------------------------------------------
|
||||
; part of final stage
|
||||
;output[ 4 * 32] = step1b[4][i] + step1b[27][i];
|
||||
;output[ 5 * 32] = step1b[5][i] + step1b[26][i];
|
||||
;output[26 * 32] = step1b[5][i] - step1b[26][i];
|
||||
;output[27 * 32] = step1b[4][i] - step1b[27][i];
|
||||
LOAD_FROM_OUTPUT 10, 26, 27, q0, q1
|
||||
LOAD_FROM_OUTPUT 11, 26, 27, q0, q1
|
||||
vadd.s16 q4, q2, q1
|
||||
vadd.s16 q5, q3, q0
|
||||
vsub.s16 q6, q3, q0
|
||||
vsub.s16 q7, q2, q1
|
||||
STORE_IN_OUTPUT 27, 27, 26, q7, q6
|
||||
STORE_IN_OUTPUT 26, 4, 5, q4, q5
|
||||
STORE_IN_OUTPUT 27, 26, 27, q6, q7
|
||||
STORE_IN_OUTPUT 27, 4, 5, q4, q5
|
||||
; --------------------------------------------------------------------------
|
||||
; part of stage 7
|
||||
;step1[6] = step1b[6][i] + step1b[9][i];
|
||||
@@ -922,92 +1101,199 @@ idct32_transpose_pair_loop
|
||||
;output[22 * 32] = step1b[9][i] - step1b[22][i];
|
||||
;output[23 * 32] = step1b[8][i] - step1b[23][i];
|
||||
LOAD_FROM_OUTPUT 9, 22, 23, q0, q1
|
||||
vadd.s16 q6, q4, q1
|
||||
vadd.s16 q7, q5, q0
|
||||
vsub.s16 q8, q5, q0
|
||||
vsub.s16 q9, q4, q1
|
||||
STORE_IN_OUTPUT 23, 23, 22, q9, q8
|
||||
STORE_IN_OUTPUT 22, 9, 8, q7, q6
|
||||
vadd.s16 q8, q4, q1
|
||||
vadd.s16 q9, q5, q0
|
||||
vsub.s16 q6, q5, q0
|
||||
vsub.s16 q7, q4, q1
|
||||
STORE_IN_OUTPUT 23, 22, 23, q6, q7
|
||||
STORE_IN_OUTPUT 23, 8, 9, q8, q9
|
||||
; --------------------------------------------------------------------------
|
||||
; part of final stage
|
||||
;output[ 6 * 32] = step1b[6][i] + step1b[25][i];
|
||||
;output[ 7 * 32] = step1b[7][i] + step1b[24][i];
|
||||
;output[24 * 32] = step1b[7][i] - step1b[24][i];
|
||||
;output[25 * 32] = step1b[6][i] - step1b[25][i];
|
||||
LOAD_FROM_OUTPUT 8, 24, 25, q0, q1
|
||||
LOAD_FROM_OUTPUT 9, 24, 25, q0, q1
|
||||
vadd.s16 q4, q2, q1
|
||||
vadd.s16 q5, q3, q0
|
||||
vsub.s16 q6, q3, q0
|
||||
vsub.s16 q7, q2, q1
|
||||
STORE_IN_OUTPUT 25, 25, 24, q7, q6
|
||||
STORE_IN_OUTPUT 24, 6, 7, q4, q5
|
||||
; --------------------------------------------------------------------------
|
||||
STORE_IN_OUTPUT 25, 24, 25, q6, q7
|
||||
STORE_IN_OUTPUT 25, 6, 7, q4, q5
|
||||
|
||||
; TODO(cd) get rid of these push/pop by properly adjusting register
|
||||
; content at end of loop
|
||||
pop {r2}
|
||||
pop {r1}
|
||||
pop {r0}
|
||||
add r1, r1, #8*2
|
||||
add r2, r2, #8*32*2
|
||||
; restore r0 by removing the last offset from the last
|
||||
; operation (LOAD_FROM_TRANSPOSED 16, 8, 24) => 24*8*2
|
||||
sub r0, r0, #24*8*2
|
||||
; restore r1 by removing the last offset from the last
|
||||
; operation (STORE_IN_OUTPUT 24, 6, 7) => 7*32*2
|
||||
; advance by 8 columns => 8*2
|
||||
sub r1, r1, #7*32*2 - 8*2
|
||||
; advance by 8 lines (8*32*2)
|
||||
; go back by the two pairs from the loop (32*2)
|
||||
add r3, r3, #8*32*2 - 32*2
|
||||
|
||||
; bands loop processing
|
||||
add r4, r4, #1
|
||||
cmp r4, #3
|
||||
BLE idct32_bands_loop
|
||||
subs r4, r4, #1
|
||||
bne idct32_bands_loop
|
||||
|
||||
pop {r4}
|
||||
; parameters for second pass
|
||||
; the input of pass2 is the result of pass1. we have to remove the offset
|
||||
; of 32 columns induced by the above idct32_bands_loop
|
||||
sub r3, r1, #32*2
|
||||
; r1 = pass2[32 * 32]
|
||||
add r1, sp, #2048
|
||||
|
||||
; pass loop processing
|
||||
add r5, r5, #1
|
||||
B idct32_pass_loop
|
||||
|
||||
idct32_bands_end_2nd_pass
|
||||
STORE_COMBINE_CENTER_RESULTS
|
||||
; --------------------------------------------------------------------------
|
||||
; part of final stage
|
||||
;output[ 0 * 32] = step1b[0][i] + step1b[31][i];
|
||||
;output[ 1 * 32] = step1b[1][i] + step1b[30][i];
|
||||
;output[30 * 32] = step1b[1][i] - step1b[30][i];
|
||||
;output[31 * 32] = step1b[0][i] - step1b[31][i];
|
||||
LOAD_FROM_OUTPUT 17, 30, 31, q0, q1
|
||||
vadd.s16 q4, q2, q1
|
||||
vadd.s16 q5, q3, q0
|
||||
vsub.s16 q6, q3, q0
|
||||
vsub.s16 q7, q2, q1
|
||||
STORE_COMBINE_EXTREME_RESULTS
|
||||
; --------------------------------------------------------------------------
|
||||
; part of stage 7
|
||||
;step1[2] = step1b[2][i] + step1b[13][i];
|
||||
;step1[3] = step1b[3][i] + step1b[12][i];
|
||||
;step1[12] = step1b[3][i] - step1b[12][i];
|
||||
;step1[13] = step1b[2][i] - step1b[13][i];
|
||||
LOAD_FROM_OUTPUT 31, 12, 13, q0, q1
|
||||
vadd.s16 q2, q10, q1
|
||||
vadd.s16 q3, q11, q0
|
||||
vsub.s16 q4, q11, q0
|
||||
vsub.s16 q5, q10, q1
|
||||
; --------------------------------------------------------------------------
|
||||
; part of final stage
|
||||
;output[12 * 32] = step1b[12][i] + step1b[19][i];
|
||||
;output[13 * 32] = step1b[13][i] + step1b[18][i];
|
||||
;output[18 * 32] = step1b[13][i] - step1b[18][i];
|
||||
;output[19 * 32] = step1b[12][i] - step1b[19][i];
|
||||
LOAD_FROM_OUTPUT 13, 18, 19, q0, q1
|
||||
vadd.s16 q8, q4, q1
|
||||
vadd.s16 q9, q5, q0
|
||||
vsub.s16 q6, q5, q0
|
||||
vsub.s16 q7, q4, q1
|
||||
STORE_COMBINE_CENTER_RESULTS
|
||||
; --------------------------------------------------------------------------
|
||||
; part of final stage
|
||||
;output[ 2 * 32] = step1b[2][i] + step1b[29][i];
|
||||
;output[ 3 * 32] = step1b[3][i] + step1b[28][i];
|
||||
;output[28 * 32] = step1b[3][i] - step1b[28][i];
|
||||
;output[29 * 32] = step1b[2][i] - step1b[29][i];
|
||||
LOAD_FROM_OUTPUT 19, 28, 29, q0, q1
|
||||
vadd.s16 q4, q2, q1
|
||||
vadd.s16 q5, q3, q0
|
||||
vsub.s16 q6, q3, q0
|
||||
vsub.s16 q7, q2, q1
|
||||
STORE_COMBINE_EXTREME_RESULTS
|
||||
; --------------------------------------------------------------------------
|
||||
; part of stage 7
|
||||
;step1[4] = step1b[4][i] + step1b[11][i];
|
||||
;step1[5] = step1b[5][i] + step1b[10][i];
|
||||
;step1[10] = step1b[5][i] - step1b[10][i];
|
||||
;step1[11] = step1b[4][i] - step1b[11][i];
|
||||
LOAD_FROM_OUTPUT 29, 10, 11, q0, q1
|
||||
vadd.s16 q2, q12, q1
|
||||
vadd.s16 q3, q13, q0
|
||||
vsub.s16 q4, q13, q0
|
||||
vsub.s16 q5, q12, q1
|
||||
; --------------------------------------------------------------------------
|
||||
; part of final stage
|
||||
;output[10 * 32] = step1b[10][i] + step1b[21][i];
|
||||
;output[11 * 32] = step1b[11][i] + step1b[20][i];
|
||||
;output[20 * 32] = step1b[11][i] - step1b[20][i];
|
||||
;output[21 * 32] = step1b[10][i] - step1b[21][i];
|
||||
LOAD_FROM_OUTPUT 11, 20, 21, q0, q1
|
||||
vadd.s16 q8, q4, q1
|
||||
vadd.s16 q9, q5, q0
|
||||
vsub.s16 q6, q5, q0
|
||||
vsub.s16 q7, q4, q1
|
||||
STORE_COMBINE_CENTER_RESULTS
|
||||
; --------------------------------------------------------------------------
|
||||
; part of final stage
|
||||
;output[ 4 * 32] = step1b[4][i] + step1b[27][i];
|
||||
;output[ 5 * 32] = step1b[5][i] + step1b[26][i];
|
||||
;output[26 * 32] = step1b[5][i] - step1b[26][i];
|
||||
;output[27 * 32] = step1b[4][i] - step1b[27][i];
|
||||
LOAD_FROM_OUTPUT 21, 26, 27, q0, q1
|
||||
vadd.s16 q4, q2, q1
|
||||
vadd.s16 q5, q3, q0
|
||||
vsub.s16 q6, q3, q0
|
||||
vsub.s16 q7, q2, q1
|
||||
STORE_COMBINE_EXTREME_RESULTS
|
||||
; --------------------------------------------------------------------------
|
||||
; part of stage 7
|
||||
;step1[6] = step1b[6][i] + step1b[9][i];
|
||||
;step1[7] = step1b[7][i] + step1b[8][i];
|
||||
;step1[8] = step1b[7][i] - step1b[8][i];
|
||||
;step1[9] = step1b[6][i] - step1b[9][i];
|
||||
LOAD_FROM_OUTPUT 27, 8, 9, q0, q1
|
||||
vadd.s16 q2, q14, q1
|
||||
vadd.s16 q3, q15, q0
|
||||
vsub.s16 q4, q15, q0
|
||||
vsub.s16 q5, q14, q1
|
||||
; --------------------------------------------------------------------------
|
||||
; part of final stage
|
||||
;output[ 8 * 32] = step1b[8][i] + step1b[23][i];
|
||||
;output[ 9 * 32] = step1b[9][i] + step1b[22][i];
|
||||
;output[22 * 32] = step1b[9][i] - step1b[22][i];
|
||||
;output[23 * 32] = step1b[8][i] - step1b[23][i];
|
||||
LOAD_FROM_OUTPUT 9, 22, 23, q0, q1
|
||||
vadd.s16 q8, q4, q1
|
||||
vadd.s16 q9, q5, q0
|
||||
vsub.s16 q6, q5, q0
|
||||
vsub.s16 q7, q4, q1
|
||||
STORE_COMBINE_CENTER_RESULTS_LAST
|
||||
; --------------------------------------------------------------------------
|
||||
; part of final stage
|
||||
;output[ 6 * 32] = step1b[6][i] + step1b[25][i];
|
||||
;output[ 7 * 32] = step1b[7][i] + step1b[24][i];
|
||||
;output[24 * 32] = step1b[7][i] - step1b[24][i];
|
||||
;output[25 * 32] = step1b[6][i] - step1b[25][i];
|
||||
LOAD_FROM_OUTPUT 23, 24, 25, q0, q1
|
||||
vadd.s16 q4, q2, q1
|
||||
vadd.s16 q5, q3, q0
|
||||
vsub.s16 q6, q3, q0
|
||||
vsub.s16 q7, q2, q1
|
||||
STORE_COMBINE_EXTREME_RESULTS_LAST
|
||||
; --------------------------------------------------------------------------
|
||||
; restore pointers to their initial indices for next band pass by
|
||||
; removing/adding dest_stride * 8. The actual increment by eight
|
||||
; is taken care of within the _LAST macros.
|
||||
add r6, r6, r2, lsl #3
|
||||
add r9, r9, r2, lsl #3
|
||||
sub r7, r7, r2, lsl #3
|
||||
sub r10, r10, r2, lsl #3
|
||||
|
||||
; restore r0 by removing the last offset from the last
|
||||
; operation (LOAD_FROM_TRANSPOSED 16, 8, 24) => 24*8*2
|
||||
sub r0, r0, #24*8*2
|
||||
; restore r1 by removing the last offset from the last
|
||||
; operation (LOAD_FROM_OUTPUT 23, 24, 25) => 25*32*2
|
||||
; advance by 8 columns => 8*2
|
||||
sub r1, r1, #25*32*2 - 8*2
|
||||
; advance by 8 lines (8*32*2)
|
||||
; go back by the two pairs from the loop (32*2)
|
||||
add r3, r3, #8*32*2 - 32*2
|
||||
|
||||
; bands loop processing
|
||||
subs r4, r4, #1
|
||||
bne idct32_bands_loop
|
||||
|
||||
; stack operation
|
||||
add sp, sp, #512+2048+2048
|
||||
vpop {d8-d15}
|
||||
pop {r4-r11}
|
||||
bx lr
|
||||
ENDP ; |idct32_transpose_and_transform|
|
||||
|
||||
;void idct32_combine_add(uint8_t *dest, int16_t *out, int dest_stride);
|
||||
;
|
||||
; r0 uint8_t *dest
|
||||
; r1 int16_t *out
|
||||
; r2 int dest_stride)
|
||||
|
||||
|idct32_combine_add| PROC
|
||||
|
||||
mov r12, r0 ; dest pointer used for stores
|
||||
sub r2, r2, #32 ; adjust the stride (remove the post-increments)
|
||||
mov r3, #0 ; initialize loop counter
|
||||
|
||||
idct32_combine_add_loop
|
||||
; load out[j * 32 + 0-31]
|
||||
vld1.s16 {q12}, [r1]!
|
||||
vld1.s16 {q13}, [r1]!
|
||||
vld1.s16 {q14}, [r1]!
|
||||
vld1.s16 {q15}, [r1]!
|
||||
; load dest[j * dest_stride + 0-31]
|
||||
vld1.s16 {q6}, [r0]!
|
||||
vld1.s16 {q7}, [r0]!
|
||||
; ROUND_POWER_OF_TWO
|
||||
vrshr.s16 q12, q12, #6
|
||||
vrshr.s16 q13, q13, #6
|
||||
vrshr.s16 q14, q14, #6
|
||||
vrshr.s16 q15, q15, #6
|
||||
; add to dest[j * dest_stride + 0-31]
|
||||
vaddw.u8 q12, q12, d12
|
||||
vaddw.u8 q13, q13, d13
|
||||
vaddw.u8 q14, q14, d14
|
||||
vaddw.u8 q15, q15, d15
|
||||
; clip pixel
|
||||
vqmovun.s16 d12, q12
|
||||
vqmovun.s16 d13, q13
|
||||
vqmovun.s16 d14, q14
|
||||
vqmovun.s16 d15, q15
|
||||
; store back into dest[j * dest_stride + 0-31]
|
||||
vst1.16 {q6}, [r12]!
|
||||
vst1.16 {q7}, [r12]!
|
||||
; increment pointers by adjusted stride (not necessary for r1/out)
|
||||
add r0, r0, r2
|
||||
add r12, r12, r2
|
||||
; loop processing
|
||||
add r3, r3, #1
|
||||
cmp r3, #31
|
||||
BLE idct32_combine_add_loop
|
||||
|
||||
bx lr
|
||||
ENDP ; |idct32_transpose|
|
||||
|
||||
ENDP ; |vp9_short_idct32x32_add_neon|
|
||||
END
|
||||
|
@@ -9,7 +9,7 @@
|
||||
;
|
||||
|
||||
EXPORT |vp9_short_idct8x8_add_neon|
|
||||
EXPORT |vp9_short_idct10_8x8_add_neon|
|
||||
EXPORT |vp9_short_idct8x8_10_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
@@ -310,13 +310,13 @@
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct8x8_add_neon|
|
||||
|
||||
;void vp9_short_idct10_8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
|
||||
;void vp9_short_idct8x8_10_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct10_8x8_add_neon| PROC
|
||||
|vp9_short_idct8x8_10_add_neon| PROC
|
||||
push {r4-r9}
|
||||
vpush {d8-d15}
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
@@ -514,6 +514,6 @@
|
||||
vpop {d8-d15}
|
||||
pop {r4-r9}
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct10_8x8_add_neon|
|
||||
ENDP ; |vp9_short_idct8x8_10_add_neon|
|
||||
|
||||
END
|
||||
|
@@ -10,7 +10,7 @@
|
||||
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9_rtcd.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_onyxc_int.h"
|
||||
|
||||
void vp9_machine_specific_config(VP9_COMMON *cm) {
|
||||
|
@@ -170,13 +170,8 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
|
||||
void vp9_create_common(VP9_COMMON *cm) {
|
||||
vp9_machine_specific_config(cm);
|
||||
|
||||
vp9_init_mbmode_probs(cm);
|
||||
|
||||
cm->tx_mode = ONLY_4X4;
|
||||
cm->comp_pred_mode = HYBRID_PREDICTION;
|
||||
|
||||
// Initialize reference frame sign bias structure to defaults
|
||||
vpx_memset(cm->ref_frame_sign_bias, 0, sizeof(cm->ref_frame_sign_bias));
|
||||
}
|
||||
|
||||
void vp9_remove_common(VP9_COMMON *cm) {
|
||||
|
@@ -137,7 +137,7 @@ typedef struct {
|
||||
TX_SIZE tx_size;
|
||||
int_mv mv[2]; // for each reference frame used
|
||||
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
|
||||
int_mv best_mv, best_second_mv;
|
||||
int_mv best_mv[2];
|
||||
|
||||
uint8_t mode_context[MAX_REF_FRAMES];
|
||||
|
||||
@@ -244,10 +244,9 @@ typedef struct macroblockd {
|
||||
unsigned char ab_index; // index of 4x4 block inside the 8x8 block
|
||||
|
||||
int q_index;
|
||||
|
||||
} MACROBLOCKD;
|
||||
|
||||
static INLINE unsigned char *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE subsize) {
|
||||
static INLINE uint8_t *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE subsize) {
|
||||
switch (subsize) {
|
||||
case BLOCK_64X64:
|
||||
case BLOCK_64X32:
|
||||
|
@@ -29,4 +29,4 @@ extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES];
|
||||
extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZES];
|
||||
extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
|
||||
|
||||
#endif // VP9_COMMON_VP9_COMMON_DATA_H
|
||||
#endif // VP9_COMMON_VP9_COMMON_DATA_H
|
||||
|
@@ -35,7 +35,7 @@ static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
|
||||
for (y = 0; y < h; ++y) {
|
||||
/* Initial phase offset */
|
||||
int x_q4 = (filter_x0 - filter_x_base) / taps;
|
||||
int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
|
||||
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Per-pixel src offset */
|
||||
@@ -76,7 +76,7 @@ static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
|
||||
for (y = 0; y < h; ++y) {
|
||||
/* Initial phase offset */
|
||||
int x_q4 = (filter_x0 - filter_x_base) / taps;
|
||||
int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
|
||||
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Per-pixel src offset */
|
||||
@@ -118,7 +118,7 @@ static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Initial phase offset */
|
||||
int y_q4 = (filter_y0 - filter_y_base) / taps;
|
||||
int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
|
||||
|
||||
for (y = 0; y < h; ++y) {
|
||||
/* Per-pixel src offset */
|
||||
@@ -160,7 +160,7 @@ static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Initial phase offset */
|
||||
int y_q4 = (filter_y0 - filter_y_base) / taps;
|
||||
int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
|
||||
|
||||
for (y = 0; y < h; ++y) {
|
||||
/* Per-pixel src offset */
|
||||
@@ -282,7 +282,7 @@ void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
int r;
|
||||
|
||||
for (r = h; r > 0; --r) {
|
||||
memcpy(dst, src, w);
|
||||
vpx_memcpy(dst, src, w);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
|
@@ -7,8 +7,8 @@
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef VP9_COMMON_CONVOLVE_H_
|
||||
#define VP9_COMMON_CONVOLVE_H_
|
||||
#ifndef VP9_COMMON_VP9_CONVOLVE_H_
|
||||
#define VP9_COMMON_VP9_CONVOLVE_H_
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
@@ -26,4 +26,4 @@ struct subpix_fn_table {
|
||||
const int16_t (*filter_y)[8];
|
||||
};
|
||||
|
||||
#endif // VP9_COMMON_CONVOLVE_H_
|
||||
#endif // VP9_COMMON_VP9_CONVOLVE_H_
|
||||
|
@@ -63,9 +63,9 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, char *file) {
|
||||
print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size));
|
||||
print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode));
|
||||
|
||||
log_frame_info(cm, "Vectors ",mvs);
|
||||
log_frame_info(cm, "Vectors ", mvs);
|
||||
for (mi_row = 0; mi_row < rows; mi_row++) {
|
||||
fprintf(mvs,"V ");
|
||||
fprintf(mvs, "V ");
|
||||
for (mi_col = 0; mi_col < cols; mi_col++) {
|
||||
fprintf(mvs, "%4d:%4d ", mi_8x8[mi_index]->mbmi.mv[0].as_mv.row,
|
||||
mi_8x8[mi_index]->mbmi.mv[0].as_mv.col);
|
||||
|
@@ -7,6 +7,8 @@
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef VP9_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
#define VP9_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
|
||||
/*Generated file, included by vp9_entropy.c*/
|
||||
static const vp9_coeff_probs_model default_coef_probs_4x4[BLOCK_TYPES] = {
|
||||
@@ -694,3 +696,4 @@ static const vp9_coeff_probs_model default_coef_probs_32x32[BLOCK_TYPES] = {
|
||||
}
|
||||
};
|
||||
|
||||
#endif // VP9_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
|
@@ -107,101 +107,171 @@ DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_8x8[64]) = {
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_16x16[256]) = {
|
||||
0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80,
|
||||
50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52,
|
||||
98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69,
|
||||
100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, 101, 131, 160, 146,
|
||||
55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, 176, 162, 87, 56, 25,
|
||||
133, 118, 177, 148, 72, 103, 41, 163, 10, 192, 178, 88, 57, 134, 149, 119,
|
||||
26, 164, 73, 104, 193, 42, 179, 208, 11, 135, 89, 165, 120, 150, 58, 194,
|
||||
180, 27, 74, 209, 105, 151, 136, 43, 90, 224, 166, 195, 181, 121, 210, 59,
|
||||
12, 152, 106, 167, 196, 75, 137, 225, 211, 240, 182, 122, 91, 28, 197, 13,
|
||||
226, 168, 183, 153, 44, 212, 138, 107, 241, 60, 29, 123, 198, 184, 227, 169,
|
||||
242, 76, 213, 154, 45, 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108,
|
||||
77, 155, 30, 15, 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140,
|
||||
230, 62, 216, 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141,
|
||||
63, 232, 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142,
|
||||
219, 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, 251,
|
||||
190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239, 255,
|
||||
0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80,
|
||||
50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52,
|
||||
98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69,
|
||||
100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, 101, 131, 160, 146,
|
||||
55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, 176, 162, 87, 56, 25,
|
||||
133, 118, 177, 148, 72, 103, 41, 163, 10, 192, 178, 88, 57, 134, 149, 119,
|
||||
26, 164, 73, 104, 193, 42, 179, 208, 11, 135, 89, 165, 120, 150, 58, 194,
|
||||
180, 27, 74, 209, 105, 151, 136, 43, 90, 224, 166, 195, 181, 121, 210, 59,
|
||||
12, 152, 106, 167, 196, 75, 137, 225, 211, 240, 182, 122, 91, 28, 197, 13,
|
||||
226, 168, 183, 153, 44, 212, 138, 107, 241, 60, 29, 123, 198, 184, 227, 169,
|
||||
242, 76, 213, 154, 45, 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108,
|
||||
77, 155, 30, 15, 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140,
|
||||
230, 62, 216, 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141,
|
||||
63, 232, 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142,
|
||||
219, 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159,
|
||||
251,
|
||||
190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239,
|
||||
255,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_16x16[256]) = {
|
||||
0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81,
|
||||
34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4,
|
||||
67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21,
|
||||
146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, 116, 193, 147, 85,
|
||||
22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, 7, 148, 194, 86, 179,
|
||||
225, 23, 133, 39, 164, 8, 102, 210, 241, 55, 195, 118, 149, 71, 180, 24,
|
||||
87, 226, 134, 165, 211, 40, 103, 56, 72, 150, 196, 242, 119, 9, 181, 227,
|
||||
88, 166, 25, 135, 41, 104, 212, 57, 151, 197, 120, 73, 243, 182, 136, 167,
|
||||
213, 89, 10, 228, 105, 152, 198, 26, 42, 121, 183, 244, 168, 58, 137, 229,
|
||||
74, 214, 90, 153, 199, 184, 11, 106, 245, 27, 122, 230, 169, 43, 215, 59,
|
||||
200, 138, 185, 246, 75, 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170,
|
||||
60, 247, 232, 76, 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202,
|
||||
233, 171, 61, 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125,
|
||||
62, 172, 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79,
|
||||
126, 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, 236,
|
||||
159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239, 255,
|
||||
0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81,
|
||||
34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4,
|
||||
67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21,
|
||||
146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, 116, 193, 147, 85,
|
||||
22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, 7, 148, 194, 86, 179,
|
||||
225, 23, 133, 39, 164, 8, 102, 210, 241, 55, 195, 118, 149, 71, 180, 24,
|
||||
87, 226, 134, 165, 211, 40, 103, 56, 72, 150, 196, 242, 119, 9, 181, 227,
|
||||
88, 166, 25, 135, 41, 104, 212, 57, 151, 197, 120, 73, 243, 182, 136, 167,
|
||||
213, 89, 10, 228, 105, 152, 198, 26, 42, 121, 183, 244, 168, 58, 137, 229,
|
||||
74, 214, 90, 153, 199, 184, 11, 106, 245, 27, 122, 230, 169, 43, 215, 59,
|
||||
200, 138, 185, 246, 75, 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170,
|
||||
60, 247, 232, 76, 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202,
|
||||
233, 171, 61, 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125,
|
||||
62, 172, 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79,
|
||||
126, 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205,
|
||||
236,
|
||||
159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239,
|
||||
255,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_16x16[256]) = {
|
||||
0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20,
|
||||
49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52,
|
||||
23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69,
|
||||
25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, 41, 56, 114, 100,
|
||||
13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, 116, 14, 87, 130, 102,
|
||||
144, 73, 131, 117, 28, 58, 15, 88, 43, 145, 103, 132, 146, 118, 74, 160,
|
||||
89, 133, 104, 29, 59, 147, 119, 44, 161, 148, 90, 105, 134, 162, 120, 176,
|
||||
75, 135, 149, 30, 60, 163, 177, 45, 121, 91, 106, 164, 178, 150, 192, 136,
|
||||
165, 179, 31, 151, 193, 76, 122, 61, 137, 194, 107, 152, 180, 208, 46, 166,
|
||||
167, 195, 92, 181, 138, 209, 123, 153, 224, 196, 77, 168, 210, 182, 240, 108,
|
||||
197, 62, 154, 225, 183, 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170,
|
||||
124, 155, 199, 78, 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186,
|
||||
156, 229, 243, 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110,
|
||||
157, 245, 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, 158,
|
||||
188, 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, 175,
|
||||
190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254, 255,
|
||||
0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20,
|
||||
49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52,
|
||||
23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69,
|
||||
25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, 41, 56, 114, 100,
|
||||
13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, 116, 14, 87, 130, 102,
|
||||
144, 73, 131, 117, 28, 58, 15, 88, 43, 145, 103, 132, 146, 118, 74, 160,
|
||||
89, 133, 104, 29, 59, 147, 119, 44, 161, 148, 90, 105, 134, 162, 120, 176,
|
||||
75, 135, 149, 30, 60, 163, 177, 45, 121, 91, 106, 164, 178, 150, 192, 136,
|
||||
165, 179, 31, 151, 193, 76, 122, 61, 137, 194, 107, 152, 180, 208, 46, 166,
|
||||
167, 195, 92, 181, 138, 209, 123, 153, 224, 196, 77, 168, 210, 182, 240, 108,
|
||||
197, 62, 154, 225, 183, 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170,
|
||||
124, 155, 199, 78, 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186,
|
||||
156, 229, 243, 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110,
|
||||
157, 245, 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111,
|
||||
158,
|
||||
188, 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220,
|
||||
175,
|
||||
190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254,
|
||||
255,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_32x32[1024]) = {
|
||||
0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160, 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193, 68, 131, 37, 100,
|
||||
225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38, 258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321, 102, 352, 8, 197,
|
||||
71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293, 41, 417, 199, 136,
|
||||
262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105, 419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169, 295, 420, 106, 451,
|
||||
481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, 75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391, 453, 139, 44, 234,
|
||||
484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 203, 108, 546, 485, 576, 298, 235, 140, 361, 330, 172, 547, 45, 455, 267, 577, 486, 77, 204, 362,
|
||||
608, 14, 299, 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, 610, 363, 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, 111, 238, 48, 143,
|
||||
80, 175, 112, 207, 49, 18, 239, 81, 113, 19, 50, 82, 114, 51, 83, 115, 640, 516, 392, 268, 144, 20, 672, 641, 548, 517, 424, 393, 300, 269, 176, 145,
|
||||
52, 21, 704, 673, 642, 580, 549, 518, 456, 425, 394, 332, 301, 270, 208, 177, 146, 84, 53, 22, 736, 705, 674, 643, 612, 581, 550, 519, 488, 457, 426, 395,
|
||||
364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, 23, 737, 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, 241, 210, 179, 117, 86, 55, 738, 707,
|
||||
614, 583, 490, 459, 366, 335, 242, 211, 118, 87, 739, 615, 491, 367, 243, 119, 768, 644, 520, 396, 272, 148, 24, 800, 769, 676, 645, 552, 521, 428, 397, 304,
|
||||
273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, 553, 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, 26, 864, 833, 802, 771, 740, 709,
|
||||
678, 647, 616, 585, 554, 523, 492, 461, 430, 399, 368, 337, 306, 275, 244, 213, 182, 151, 120, 89, 58, 27, 865, 834, 803, 741, 710, 679, 617, 586, 555, 493,
|
||||
462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835, 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867, 743, 619, 495, 371, 247, 123,
|
||||
896, 772, 648, 524, 400, 276, 152, 28, 928, 897, 804, 773, 680, 649, 556, 525, 432, 401, 308, 277, 184, 153, 60, 29, 960, 929, 898, 836, 805, 774, 712, 681,
|
||||
650, 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, 92, 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682, 651, 620, 589, 558, 527,
|
||||
496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124, 93, 62, 31, 993, 962, 931, 869, 838, 807, 745, 714, 683, 621, 590, 559, 497, 466, 435, 373,
|
||||
342, 311, 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, 622, 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623, 499, 375, 251, 127,
|
||||
900, 776, 652, 528, 404, 280, 156, 932, 901, 808, 777, 684, 653, 560, 529, 436, 405, 312, 281, 188, 157, 964, 933, 902, 840, 809, 778, 716, 685, 654, 592, 561,
|
||||
530, 468, 437, 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, 872, 841, 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, 438, 407, 376, 345,
|
||||
314, 283, 252, 221, 190, 159, 997, 966, 935, 873, 842, 811, 749, 718, 687, 625, 594, 563, 501, 470, 439, 377, 346, 315, 253, 222, 191, 998, 967, 874, 843, 750,
|
||||
719, 626, 595, 502, 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, 379, 255, 904, 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, 564, 533, 440, 409,
|
||||
316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, 596, 565, 534, 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, 876, 845, 814, 783, 752, 721, 690, 659,
|
||||
628, 597, 566, 535, 504, 473, 442, 411, 380, 349, 318, 287, 1001, 970, 939, 877, 846, 815, 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, 350, 319, 1002, 971,
|
||||
878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, 755, 631, 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, 692, 661, 568, 537, 444, 413, 972,
|
||||
941, 910, 848, 817, 786, 724, 693, 662, 600, 569, 538, 476, 445, 414, 1004, 973, 942, 911, 880, 849, 818, 787, 756, 725, 694, 663, 632, 601, 570, 539, 508, 477,
|
||||
446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, 571, 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, 479, 1007, 883, 759, 635, 511,
|
||||
912, 788, 664, 540, 944, 913, 820, 789, 696, 665, 572, 541, 976, 945, 914, 852, 821, 790, 728, 697, 666, 604, 573, 542, 1008, 977, 946, 915, 884, 853, 822, 791,
|
||||
760, 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823, 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, 607, 1011, 887, 763, 639,
|
||||
916, 792, 668, 948, 917, 824, 793, 700, 669, 980, 949, 918, 856, 825, 794, 732, 701, 670, 1012, 981, 950, 919, 888, 857, 826, 795, 764, 733, 702, 671, 1013, 982,
|
||||
951, 889, 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, 891, 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798, 1016, 985, 954, 923,
|
||||
892, 861, 830, 799, 1017, 986, 955, 893, 862, 831, 1018, 987, 894, 863, 1019, 895, 924, 956, 925, 988, 957, 926, 1020, 989, 958, 927, 1021, 990, 959, 1022, 991, 1023,
|
||||
0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160,
|
||||
129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193,
|
||||
68, 131, 37, 100,
|
||||
225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38,
|
||||
258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321,
|
||||
102, 352, 8, 197,
|
||||
71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292,
|
||||
135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293,
|
||||
41, 417, 199, 136,
|
||||
262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105,
|
||||
419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169,
|
||||
295, 420, 106, 451,
|
||||
481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421,
|
||||
75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391,
|
||||
453, 139, 44, 234,
|
||||
484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 203, 108,
|
||||
546, 485, 576, 298, 235, 140, 361, 330, 172, 547, 45, 455, 267, 577,
|
||||
486, 77, 204, 362,
|
||||
608, 14, 299, 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173,
|
||||
610, 363, 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17,
|
||||
111, 238, 48, 143,
|
||||
80, 175, 112, 207, 49, 18, 239, 81, 113, 19, 50, 82, 114, 51,
|
||||
83, 115, 640, 516, 392, 268, 144, 20, 672, 641, 548, 517, 424,
|
||||
393, 300, 269, 176, 145,
|
||||
52, 21, 704, 673, 642, 580, 549, 518, 456, 425, 394, 332, 301,
|
||||
270, 208, 177, 146, 84, 53, 22, 736, 705, 674, 643, 612, 581,
|
||||
550, 519, 488, 457, 426, 395,
|
||||
364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, 23, 737,
|
||||
706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, 241,
|
||||
210, 179, 117, 86, 55, 738, 707,
|
||||
614, 583, 490, 459, 366, 335, 242, 211, 118, 87, 739, 615, 491,
|
||||
367, 243, 119, 768, 644, 520, 396, 272, 148, 24, 800, 769, 676,
|
||||
645, 552, 521, 428, 397, 304,
|
||||
273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, 553,
|
||||
522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, 26,
|
||||
864, 833, 802, 771, 740, 709,
|
||||
678, 647, 616, 585, 554, 523, 492, 461, 430, 399, 368, 337, 306,
|
||||
275, 244, 213, 182, 151, 120, 89, 58, 27, 865, 834, 803, 741,
|
||||
710, 679, 617, 586, 555, 493,
|
||||
462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835,
|
||||
742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867,
|
||||
743, 619, 495, 371, 247, 123,
|
||||
896, 772, 648, 524, 400, 276, 152, 28, 928, 897, 804, 773, 680,
|
||||
649, 556, 525, 432, 401, 308, 277, 184, 153, 60, 29, 960, 929,
|
||||
898, 836, 805, 774, 712, 681,
|
||||
650, 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154,
|
||||
92, 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682,
|
||||
651, 620, 589, 558, 527,
|
||||
496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124,
|
||||
93, 62, 31, 993, 962, 931, 869, 838, 807, 745, 714, 683, 621, 590,
|
||||
559, 497, 466, 435, 373,
|
||||
342, 311, 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715,
|
||||
622, 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623,
|
||||
499, 375, 251, 127,
|
||||
900, 776, 652, 528, 404, 280, 156, 932, 901, 808, 777, 684, 653, 560,
|
||||
529, 436, 405, 312, 281, 188, 157, 964, 933, 902, 840, 809, 778, 716,
|
||||
685, 654, 592, 561,
|
||||
530, 468, 437, 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903,
|
||||
872, 841, 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469,
|
||||
438, 407, 376, 345,
|
||||
314, 283, 252, 221, 190, 159, 997, 966, 935, 873, 842, 811, 749, 718,
|
||||
687, 625, 594, 563, 501, 470, 439, 377, 346, 315, 253, 222, 191, 998,
|
||||
967, 874, 843, 750,
|
||||
719, 626, 595, 502, 471, 378, 347, 254, 223, 999, 875, 751, 627, 503,
|
||||
379, 255, 904, 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657,
|
||||
564, 533, 440, 409,
|
||||
316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, 596, 565, 534,
|
||||
472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, 876, 845, 814, 783,
|
||||
752, 721, 690, 659,
|
||||
628, 597, 566, 535, 504, 473, 442, 411, 380, 349, 318, 287, 1001, 970,
|
||||
939, 877, 846, 815, 753, 722, 691, 629, 598, 567, 505, 474, 443, 381,
|
||||
350, 319, 1002, 971,
|
||||
878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, 755, 631,
|
||||
507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, 692, 661, 568,
|
||||
537, 444, 413, 972,
|
||||
941, 910, 848, 817, 786, 724, 693, 662, 600, 569, 538, 476, 445, 414,
|
||||
1004, 973, 942, 911, 880, 849, 818, 787, 756, 725, 694, 663, 632, 601,
|
||||
570, 539, 508, 477,
|
||||
446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, 571,
|
||||
509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, 479,
|
||||
1007, 883, 759, 635, 511,
|
||||
912, 788, 664, 540, 944, 913, 820, 789, 696, 665, 572, 541, 976, 945,
|
||||
914, 852, 821, 790, 728, 697, 666, 604, 573, 542, 1008, 977, 946, 915,
|
||||
884, 853, 822, 791,
|
||||
760, 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823,
|
||||
761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, 607,
|
||||
1011, 887, 763, 639,
|
||||
916, 792, 668, 948, 917, 824, 793, 700, 669, 980, 949, 918, 856, 825,
|
||||
794, 732, 701, 670, 1012, 981, 950, 919, 888, 857, 826, 795, 764, 733,
|
||||
702, 671, 1013, 982,
|
||||
951, 889, 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015,
|
||||
891, 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798,
|
||||
1016, 985, 954, 923,
|
||||
892, 861, 830, 799, 1017, 986, 955, 893, 862, 831, 1018, 987, 894, 863,
|
||||
1019, 895, 924, 956, 925, 988, 957, 926, 1020, 989, 958, 927, 1021,
|
||||
990, 959, 1022, 991, 1023,
|
||||
};
|
||||
|
||||
/* Array indices are identical to previously-existing CONTEXT_NODE indices */
|
||||
|
||||
const vp9_tree_index vp9_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
|
||||
{
|
||||
const vp9_tree_index vp9_coef_tree[ 22] = {
|
||||
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
|
||||
-ZERO_TOKEN, 4, /* 1 = ZERO */
|
||||
-ONE_TOKEN, 6, /* 2 = ONE */
|
||||
@@ -569,31 +639,6 @@ void vp9_init_neighbors() {
|
||||
vp9_default_scan_32x32_neighbors);
|
||||
}
|
||||
|
||||
const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan) {
|
||||
if (scan == vp9_default_scan_4x4) {
|
||||
return vp9_default_scan_4x4_neighbors;
|
||||
} else if (scan == vp9_row_scan_4x4) {
|
||||
return vp9_row_scan_4x4_neighbors;
|
||||
} else if (scan == vp9_col_scan_4x4) {
|
||||
return vp9_col_scan_4x4_neighbors;
|
||||
} else if (scan == vp9_default_scan_8x8) {
|
||||
return vp9_default_scan_8x8_neighbors;
|
||||
} else if (scan == vp9_row_scan_8x8) {
|
||||
return vp9_row_scan_8x8_neighbors;
|
||||
} else if (scan == vp9_col_scan_8x8) {
|
||||
return vp9_col_scan_8x8_neighbors;
|
||||
} else if (scan == vp9_default_scan_16x16) {
|
||||
return vp9_default_scan_16x16_neighbors;
|
||||
} else if (scan == vp9_row_scan_16x16) {
|
||||
return vp9_row_scan_16x16_neighbors;
|
||||
} else if (scan == vp9_col_scan_16x16) {
|
||||
return vp9_col_scan_16x16_neighbors;
|
||||
} else {
|
||||
assert(scan == vp9_default_scan_32x32);
|
||||
return vp9_default_scan_32x32_neighbors;
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_coef_tree_initialize() {
|
||||
vp9_init_neighbors();
|
||||
init_bit_trees();
|
||||
|
@@ -190,9 +190,6 @@ static INLINE int get_coef_context(const int16_t *neighbors,
|
||||
token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1;
|
||||
}
|
||||
|
||||
const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan);
|
||||
|
||||
|
||||
// 128 lists of probabilities are stored for the following ONE node probs:
|
||||
// 1, 3, 5, 7, ..., 253, 255
|
||||
// In between probabilities are interpolated linearly
|
||||
@@ -336,37 +333,26 @@ static INLINE const int16_t* get_iscan_16x16(TX_TYPE tx_type) {
|
||||
}
|
||||
}
|
||||
|
||||
static int get_entropy_context(const MACROBLOCKD *xd, TX_SIZE tx_size,
|
||||
PLANE_TYPE type, int block_idx,
|
||||
ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
|
||||
const int16_t **scan,
|
||||
const uint8_t **band_translate) {
|
||||
static int get_entropy_context(TX_SIZE tx_size,
|
||||
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
|
||||
ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
|
||||
|
||||
switch (tx_size) {
|
||||
case TX_4X4:
|
||||
*scan = get_scan_4x4(get_tx_type_4x4(type, xd, block_idx));
|
||||
*band_translate = vp9_coefband_trans_4x4;
|
||||
above_ec = A[0] != 0;
|
||||
left_ec = L[0] != 0;
|
||||
above_ec = a[0] != 0;
|
||||
left_ec = l[0] != 0;
|
||||
break;
|
||||
case TX_8X8:
|
||||
*scan = get_scan_8x8(get_tx_type_8x8(type, xd));
|
||||
*band_translate = vp9_coefband_trans_8x8plus;
|
||||
above_ec = !!*(uint16_t *)A;
|
||||
left_ec = !!*(uint16_t *)L;
|
||||
above_ec = !!*(uint16_t *)a;
|
||||
left_ec = !!*(uint16_t *)l;
|
||||
break;
|
||||
case TX_16X16:
|
||||
*scan = get_scan_16x16(get_tx_type_16x16(type, xd));
|
||||
*band_translate = vp9_coefband_trans_8x8plus;
|
||||
above_ec = !!*(uint32_t *)A;
|
||||
left_ec = !!*(uint32_t *)L;
|
||||
above_ec = !!*(uint32_t *)a;
|
||||
left_ec = !!*(uint32_t *)l;
|
||||
break;
|
||||
case TX_32X32:
|
||||
*scan = vp9_default_scan_32x32;
|
||||
*band_translate = vp9_coefband_trans_8x8plus;
|
||||
above_ec = !!*(uint64_t *)A;
|
||||
left_ec = !!*(uint64_t *)L;
|
||||
above_ec = !!*(uint64_t *)a;
|
||||
left_ec = !!*(uint64_t *)l;
|
||||
break;
|
||||
default:
|
||||
assert(!"Invalid transform size.");
|
||||
@@ -375,6 +361,35 @@ static int get_entropy_context(const MACROBLOCKD *xd, TX_SIZE tx_size,
|
||||
return combine_entropy_contexts(above_ec, left_ec);
|
||||
}
|
||||
|
||||
static void get_scan_and_band(const MACROBLOCKD *xd, TX_SIZE tx_size,
|
||||
PLANE_TYPE type, int block_idx,
|
||||
const int16_t **scan,
|
||||
const int16_t **scan_nb,
|
||||
const uint8_t **band_translate) {
|
||||
switch (tx_size) {
|
||||
case TX_4X4:
|
||||
get_scan_nb_4x4(get_tx_type_4x4(type, xd, block_idx), scan, scan_nb);
|
||||
*band_translate = vp9_coefband_trans_4x4;
|
||||
break;
|
||||
case TX_8X8:
|
||||
get_scan_nb_8x8(get_tx_type_8x8(type, xd), scan, scan_nb);
|
||||
*band_translate = vp9_coefband_trans_8x8plus;
|
||||
break;
|
||||
case TX_16X16:
|
||||
get_scan_nb_16x16(get_tx_type_16x16(type, xd), scan, scan_nb);
|
||||
*band_translate = vp9_coefband_trans_8x8plus;
|
||||
break;
|
||||
case TX_32X32:
|
||||
*scan = vp9_default_scan_32x32;
|
||||
*scan_nb = vp9_default_scan_32x32_neighbors;
|
||||
*band_translate = vp9_coefband_trans_8x8plus;
|
||||
break;
|
||||
default:
|
||||
assert(!"Invalid transform size.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
enum { VP9_COEF_UPDATE_PROB = 252 };
|
||||
|
||||
#endif // VP9_COMMON_VP9_ENTROPY_H_
|
||||
|
@@ -14,7 +14,6 @@
|
||||
#include "vp9/common/vp9_blockd.h"
|
||||
#include "vp9/common/vp9_treecoder.h"
|
||||
|
||||
#define SUBMVREF_COUNT 5
|
||||
#define TX_SIZE_CONTEXTS 2
|
||||
#define MODE_UPDATE_PROB 252
|
||||
#define SWITCHABLE_FILTERS 3 // number of switchable filters
|
||||
|
@@ -39,12 +39,12 @@ const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = {
|
||||
};
|
||||
struct vp9_token vp9_mv_class_encodings[MV_CLASSES];
|
||||
|
||||
const vp9_tree_index vp9_mv_class0_tree [2 * CLASS0_SIZE - 2] = {
|
||||
const vp9_tree_index vp9_mv_class0_tree[2 * CLASS0_SIZE - 2] = {
|
||||
-0, -1,
|
||||
};
|
||||
struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE];
|
||||
|
||||
const vp9_tree_index vp9_mv_fp_tree [2 * 4 - 2] = {
|
||||
const vp9_tree_index vp9_mv_fp_tree[2 * 4 - 2] = {
|
||||
-0, 2,
|
||||
-1, 4,
|
||||
-2, -3
|
||||
@@ -53,8 +53,8 @@ struct vp9_token vp9_mv_fp_encodings[4];
|
||||
|
||||
static const nmv_context default_nmv_context = {
|
||||
{32, 64, 96},
|
||||
{
|
||||
{ /* vert component */
|
||||
{ // NOLINT
|
||||
{ /* vert component */ // NOLINT
|
||||
128, /* sign */
|
||||
{224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, /* class */
|
||||
{216}, /* class0 */
|
||||
@@ -64,7 +64,7 @@ static const nmv_context default_nmv_context = {
|
||||
160, /* class0_hp bit */
|
||||
128, /* hp */
|
||||
},
|
||||
{ /* hor component */
|
||||
{ /* hor component */ // NOLINT
|
||||
128, /* sign */
|
||||
{216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, /* class */
|
||||
{208}, /* class0 */
|
||||
@@ -149,7 +149,7 @@ int vp9_get_mv_mag(MV_CLASS_TYPE c, int offset) {
|
||||
static void inc_mv_component(int v, nmv_component_counts *comp_counts,
|
||||
int incr, int usehp) {
|
||||
int s, z, c, o, d, e, f;
|
||||
assert (v != 0); /* should not be zero */
|
||||
assert(v != 0); /* should not be zero */
|
||||
s = v < 0;
|
||||
comp_counts->sign[s] += incr;
|
||||
z = (s ? -v : v) - 1; /* magnitude - 1 */
|
||||
@@ -198,8 +198,6 @@ static unsigned int adapt_probs(unsigned int i,
|
||||
vp9_prob this_probs[],
|
||||
const vp9_prob last_probs[],
|
||||
const unsigned int num_events[]) {
|
||||
|
||||
|
||||
const unsigned int left = tree[i] <= 0
|
||||
? num_events[-tree[i]]
|
||||
: adapt_probs(tree[i], tree, this_probs, last_probs, num_events);
|
||||
|
@@ -13,7 +13,7 @@
|
||||
#define VP9_COMMON_VP9_ENTROPYMV_H_
|
||||
|
||||
#include "vp9/common/vp9_treecoder.h"
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9/common/vp9_blockd.h"
|
||||
|
||||
struct VP9Common;
|
||||
@@ -73,6 +73,10 @@ extern struct vp9_token vp9_mv_class_encodings[MV_CLASSES];
|
||||
#define MV_MAX ((1 << MV_MAX_BITS) - 1)
|
||||
#define MV_VALS ((MV_MAX << 1) + 1)
|
||||
|
||||
#define MV_IN_USE_BITS 14
|
||||
#define MV_UPP ((1 << MV_IN_USE_BITS) - 1)
|
||||
#define MV_LOW (-(1 << MV_IN_USE_BITS))
|
||||
|
||||
extern const vp9_tree_index vp9_mv_class0_tree[2 * CLASS0_SIZE - 2];
|
||||
extern struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE];
|
||||
|
||||
|
@@ -11,7 +11,7 @@
|
||||
#ifndef VP9_COMMON_VP9_FILTER_H_
|
||||
#define VP9_COMMON_VP9_FILTER_H_
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#define SUBPEL_BITS 4
|
||||
|
@@ -54,7 +54,7 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||
|
||||
dst_list[1].as_int = 0;
|
||||
if (block_idx == 0) {
|
||||
memcpy(dst_list, mv_list, MAX_MV_REF_CANDIDATES * sizeof(int_mv));
|
||||
vpx_memcpy(dst_list, mv_list, MAX_MV_REF_CANDIDATES * sizeof(int_mv));
|
||||
} else if (block_idx == 1 || block_idx == 2) {
|
||||
int dst = 0, n;
|
||||
union b_mode_info *bmi = mi->bmi;
|
||||
|
@@ -55,13 +55,11 @@ static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb,
|
||||
if (!mi)
|
||||
return DC_PRED;
|
||||
|
||||
if (mi->mbmi.ref_frame[0] != INTRA_FRAME) {
|
||||
if (is_inter_block(&mi->mbmi))
|
||||
return DC_PRED;
|
||||
} else if (mi->mbmi.sb_type < BLOCK_8X8) {
|
||||
return ((mi->bmi + 1 + b)->as_mode);
|
||||
} else {
|
||||
return mi->mbmi.mode;
|
||||
}
|
||||
else
|
||||
return mi->mbmi.sb_type < BLOCK_8X8 ? (mi->bmi + 1 + b)->as_mode
|
||||
: mi->mbmi.mode;
|
||||
}
|
||||
assert(b == 1 || b == 3);
|
||||
return (mi->bmi + b - 1)->as_mode;
|
||||
@@ -77,13 +75,11 @@ static MB_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb,
|
||||
if (!mi)
|
||||
return DC_PRED;
|
||||
|
||||
if (mi->mbmi.ref_frame[0] != INTRA_FRAME) {
|
||||
if (is_inter_block(&mi->mbmi))
|
||||
return DC_PRED;
|
||||
} else if (mi->mbmi.sb_type < BLOCK_8X8) {
|
||||
return ((mi->bmi + 2 + b)->as_mode);
|
||||
} else {
|
||||
return mi->mbmi.mode;
|
||||
}
|
||||
else
|
||||
return mi->mbmi.sb_type < BLOCK_8X8 ? (mi->bmi + 2 + b)->as_mode
|
||||
: mi->mbmi.mode;
|
||||
}
|
||||
|
||||
return (mi->bmi + b - 2)->as_mode;
|
||||
|
@@ -28,10 +28,10 @@ void vp9_short_iwalsh4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
||||
int16_t *op = output;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
a1 = ip[0] >> WHT_UPSCALE_FACTOR;
|
||||
c1 = ip[1] >> WHT_UPSCALE_FACTOR;
|
||||
d1 = ip[2] >> WHT_UPSCALE_FACTOR;
|
||||
b1 = ip[3] >> WHT_UPSCALE_FACTOR;
|
||||
a1 = ip[0] >> UNIT_QUANT_SHIFT;
|
||||
c1 = ip[1] >> UNIT_QUANT_SHIFT;
|
||||
d1 = ip[2] >> UNIT_QUANT_SHIFT;
|
||||
b1 = ip[3] >> UNIT_QUANT_SHIFT;
|
||||
a1 += c1;
|
||||
d1 -= b1;
|
||||
e1 = (a1 - d1) >> 1;
|
||||
@@ -77,7 +77,7 @@ void vp9_short_iwalsh4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) {
|
||||
int16_t *ip = in;
|
||||
int16_t *op = tmp;
|
||||
|
||||
a1 = ip[0] >> WHT_UPSCALE_FACTOR;
|
||||
a1 = ip[0] >> UNIT_QUANT_SHIFT;
|
||||
e1 = a1 >> 1;
|
||||
a1 -= e1;
|
||||
op[0] = a1;
|
||||
@@ -420,7 +420,7 @@ void vp9_short_iht8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride,
|
||||
+ dest[j * dest_stride + i]); }
|
||||
}
|
||||
|
||||
void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest,
|
||||
void vp9_short_idct8x8_10_add_c(int16_t *input, uint8_t *dest,
|
||||
int dest_stride) {
|
||||
int16_t out[8 * 8] = { 0 };
|
||||
int16_t *outptr = out;
|
||||
@@ -838,7 +838,7 @@ void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride,
|
||||
+ dest[j * dest_stride + i]); }
|
||||
}
|
||||
|
||||
void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest,
|
||||
void vp9_short_idct16x16_10_add_c(int16_t *input, uint8_t *dest,
|
||||
int dest_stride) {
|
||||
int16_t out[16 * 16] = { 0 };
|
||||
int16_t *outptr = out;
|
||||
@@ -1269,8 +1269,18 @@ void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) {
|
||||
void vp9_short_idct32x32_1_add_c(int16_t *input, uint8_t *dest,
|
||||
int dest_stride) {
|
||||
int i, j;
|
||||
int a1;
|
||||
|
||||
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
||||
out = dct_const_round_shift(out * cospi_16_64);
|
||||
output[0] = ROUND_POWER_OF_TWO(out, 6);
|
||||
a1 = ROUND_POWER_OF_TWO(out, 6);
|
||||
|
||||
for (j = 0; j < 32; ++j) {
|
||||
for (i = 0; i < 32; ++i)
|
||||
dest[i] = clip_pixel(dest[i] + a1);
|
||||
dest += dest_stride;
|
||||
}
|
||||
}
|
||||
|
@@ -22,10 +22,11 @@
|
||||
#define DCT_CONST_BITS 14
|
||||
#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1))
|
||||
|
||||
#define WHT_UPSCALE_FACTOR 2
|
||||
#define UNIT_QUANT_SHIFT 2
|
||||
#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT)
|
||||
|
||||
#define pair_set_epi16(a, b) \
|
||||
_mm_set1_epi32(((uint16_t)(a)) + (((uint16_t)(b)) << 16))
|
||||
_mm_set_epi16(b, a, b, a, b, a, b, a)
|
||||
|
||||
#define pair_set_epi32(a, b) \
|
||||
_mm_set_epi32(b, a, b, a)
|
||||
|
@@ -8,7 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9/common/vp9_loopfilter.h"
|
||||
#include "vp9/common/vp9_onyxc_int.h"
|
||||
#include "vp9/common/vp9_reconinter.h"
|
||||
@@ -316,13 +316,13 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
|
||||
continue;
|
||||
}
|
||||
|
||||
intra_lvl = lvl_seg + (lf->ref_deltas[INTRA_FRAME] << n_shift);
|
||||
intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * (1 << n_shift);
|
||||
lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
|
||||
|
||||
for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref)
|
||||
for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
|
||||
const int inter_lvl = lvl_seg + (lf->ref_deltas[ref] << n_shift)
|
||||
+ (lf->mode_deltas[mode] << n_shift);
|
||||
const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * (1 << n_shift)
|
||||
+ lf->mode_deltas[mode] * (1 << n_shift);
|
||||
lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
|
||||
}
|
||||
}
|
||||
|
@@ -12,7 +12,7 @@
|
||||
#define VP9_COMMON_VP9_LOOPFILTER_H_
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
|
||||
#include "vp9/common/vp9_blockd.h"
|
||||
#include "vp9/common/vp9_seg_common.h"
|
||||
|
@@ -8,7 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/vp9_loopfilter.h"
|
||||
#include "vp9/common/vp9_onyxc_int.h"
|
||||
|
@@ -108,7 +108,7 @@ static const int idx_n_column_to_subblock[4][2] = {
|
||||
};
|
||||
|
||||
// clamp_mv_ref
|
||||
#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
|
||||
#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
|
||||
|
||||
static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
|
||||
clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
|
||||
@@ -119,10 +119,9 @@ static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
|
||||
|
||||
// This function returns either the appropriate sub block or block's mv
|
||||
// on whether the block_size < 8x8 and we have check_sub_blocks set.
|
||||
static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate,
|
||||
int check_sub_blocks, int which_mv,
|
||||
static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
|
||||
int search_col, int block_idx) {
|
||||
return check_sub_blocks && candidate->mbmi.sb_type < BLOCK_8X8
|
||||
return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
|
||||
? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
|
||||
.as_mv[which_mv]
|
||||
: candidate->mbmi.mv[which_mv];
|
||||
@@ -203,7 +202,6 @@ void vp9_find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
|
||||
for (i = 0; i < 2; ++i) {
|
||||
const MV *const mv_ref = &mv_ref_search[i];
|
||||
if (is_inside(cm, mi_col, mi_row, mv_ref)) {
|
||||
const int check_sub_blocks = block_idx >= 0;
|
||||
const MODE_INFO *const candidate_mi = xd->mi_8x8[mv_ref->col + mv_ref->row
|
||||
* xd->mode_info_stride];
|
||||
const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
|
||||
@@ -212,13 +210,13 @@ void vp9_find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
|
||||
|
||||
// Check if the candidate comes from the same reference frame.
|
||||
if (candidate->ref_frame[0] == ref_frame) {
|
||||
ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, check_sub_blocks, 0,
|
||||
ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0,
|
||||
mv_ref->col, block_idx));
|
||||
different_ref_found = candidate->ref_frame[1] != ref_frame;
|
||||
} else {
|
||||
if (candidate->ref_frame[1] == ref_frame)
|
||||
// Add second motion vector if it has the same ref_frame.
|
||||
ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, check_sub_blocks, 1,
|
||||
ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1,
|
||||
mv_ref->col, block_idx));
|
||||
different_ref_found = 1;
|
||||
}
|
||||
|
@@ -13,7 +13,7 @@
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
{ // NOLINT
|
||||
#endif
|
||||
|
||||
#include "./vpx_config.h"
|
||||
@@ -33,7 +33,6 @@ extern "C"
|
||||
FOURFIVE = 1,
|
||||
THREEFIVE = 2,
|
||||
ONETWO = 3
|
||||
|
||||
} VPX_SCALING;
|
||||
|
||||
typedef enum {
|
||||
@@ -71,42 +70,48 @@ extern "C"
|
||||
// 3 - lowest quality/fastest decode
|
||||
int width; // width of data passed to the compressor
|
||||
int height; // height of data passed to the compressor
|
||||
double framerate; // set to passed in framerate
|
||||
int64_t target_bandwidth; // bandwidth to be used in kilobits per second
|
||||
double framerate; // set to passed in framerate
|
||||
int64_t target_bandwidth; // bandwidth to be used in kilobits per second
|
||||
|
||||
int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0
|
||||
int Sharpness; // parameter used for sharpening output: recommendation 0:
|
||||
int noise_sensitivity; // pre processing blur: recommendation 0
|
||||
int Sharpness; // sharpening output: recommendation 0:
|
||||
int cpu_used;
|
||||
unsigned int rc_max_intra_bitrate_pct;
|
||||
|
||||
// mode ->
|
||||
// (0)=Realtime/Live Encoding. This mode is optimized for realtim encoding (for example, capturing
|
||||
// a television signal or feed from a live camera). ( speed setting controls how fast )
|
||||
// (1)=Good Quality Fast Encoding. The encoder balances quality with the amount of time it takes to
|
||||
// encode the output. ( speed setting controls how fast )
|
||||
// (2)=One Pass - Best Quality. The encoder places priority on the quality of the output over encoding
|
||||
// speed. The output is compressed at the highest possible quality. This option takes the longest
|
||||
// amount of time to encode. ( speed setting ignored )
|
||||
// (3)=Two Pass - First Pass. The encoder generates a file of statistics for use in the second encoding
|
||||
// pass. ( speed setting controls how fast )
|
||||
// (4)=Two Pass - Second Pass. The encoder uses the statistics that were generated in the first encoding
|
||||
// pass to create the compressed output. ( speed setting controls how fast )
|
||||
// (5)=Two Pass - Second Pass Best. The encoder uses the statistics that were generated in the first
|
||||
// encoding pass to create the compressed output using the highest possible quality, and taking a
|
||||
// (0)=Realtime/Live Encoding. This mode is optimized for realtime
|
||||
// encoding (for example, capturing a television signal or feed from
|
||||
// a live camera). ( speed setting controls how fast )
|
||||
// (1)=Good Quality Fast Encoding. The encoder balances quality with the
|
||||
// amount of time it takes to encode the output. ( speed setting
|
||||
// controls how fast )
|
||||
// (2)=One Pass - Best Quality. The encoder places priority on the
|
||||
// quality of the output over encoding speed. The output is compressed
|
||||
// at the highest possible quality. This option takes the longest
|
||||
// amount of time to encode. ( speed setting ignored )
|
||||
// (3)=Two Pass - First Pass. The encoder generates a file of statistics
|
||||
// for use in the second encoding pass. ( speed setting controls how
|
||||
// fast )
|
||||
// (4)=Two Pass - Second Pass. The encoder uses the statistics that were
|
||||
// generated in the first encoding pass to create the compressed
|
||||
// output. ( speed setting controls how fast )
|
||||
// (5)=Two Pass - Second Pass Best. The encoder uses the statistics that
|
||||
// were generated in the first encoding pass to create the compressed
|
||||
// output using the highest possible quality, and taking a
|
||||
// longer amount of time to encode.. ( speed setting ignored )
|
||||
int Mode; //
|
||||
int Mode;
|
||||
|
||||
// Key Framing Operations
|
||||
int auto_key; // automatically detect cut scenes and set the keyframes
|
||||
int key_freq; // maximum distance to key frame.
|
||||
int auto_key; // autodetect cut scenes and set the keyframes
|
||||
int key_freq; // maximum distance to key frame.
|
||||
|
||||
int allow_lag; // allow lagged compression (if 0 lagin frames is ignored)
|
||||
int lag_in_frames; // how many frames lag before we start encoding
|
||||
int allow_lag; // allow lagged compression (if 0 lagin frames is ignored)
|
||||
int lag_in_frames; // how many frames lag before we start encoding
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// DATARATE CONTROL OPTIONS
|
||||
|
||||
int end_usage; // vbr or cbr
|
||||
int end_usage; // vbr or cbr
|
||||
|
||||
// buffer targeting aggressiveness
|
||||
int under_shoot_pct;
|
||||
@@ -138,7 +143,7 @@ extern "C"
|
||||
int play_alternate;
|
||||
int alt_freq;
|
||||
|
||||
int encode_breakout; // early breakout encode threshold : for video conf recommend 800
|
||||
int encode_breakout; // early breakout : for video conf recommend 800
|
||||
|
||||
/* Bitfield defining the error resiliency features to enable.
|
||||
* Can provide decodable frames after losses in previous
|
||||
@@ -173,8 +178,8 @@ extern "C"
|
||||
|
||||
void vp9_change_config(VP9_PTR onyx, VP9_CONFIG *oxcf);
|
||||
|
||||
// receive a frames worth of data caller can assume that a copy of this frame is made
|
||||
// and not just a copy of the pointer..
|
||||
// receive a frames worth of data. caller can assume that a copy of this
|
||||
// frame is made and not just a copy of the pointer..
|
||||
int vp9_receive_raw_frame(VP9_PTR comp, unsigned int frame_flags,
|
||||
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
|
||||
int64_t end_time_stamp);
|
||||
|
@@ -11,9 +11,9 @@
|
||||
#ifndef VP9_COMMON_VP9_ONYXC_INT_H_
|
||||
#define VP9_COMMON_VP9_ONYXC_INT_H_
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
#include "vp9_rtcd.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_loopfilter.h"
|
||||
#include "vp9/common/vp9_entropymv.h"
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
@@ -120,7 +120,7 @@ typedef struct VP9Common {
|
||||
|
||||
YV12_BUFFER_CONFIG post_proc_buffer;
|
||||
|
||||
FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
|
||||
FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int show_frame;
|
||||
@@ -280,10 +280,10 @@ static int check_bsize_coverage(int bs, int mi_rows, int mi_cols,
|
||||
static void set_mi_row_col(VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int bh,
|
||||
int mi_col, int bw) {
|
||||
xd->mb_to_top_edge = -((mi_row * MI_SIZE) << 3);
|
||||
xd->mb_to_bottom_edge = ((cm->mi_rows - bh - mi_row) * MI_SIZE) << 3;
|
||||
xd->mb_to_left_edge = -((mi_col * MI_SIZE) << 3);
|
||||
xd->mb_to_right_edge = ((cm->mi_cols - bw - mi_col) * MI_SIZE) << 3;
|
||||
xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
|
||||
xd->mb_to_bottom_edge = ((cm->mi_rows - bh - mi_row) * MI_SIZE) * 8;
|
||||
xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
|
||||
xd->mb_to_right_edge = ((cm->mi_cols - bw - mi_col) * MI_SIZE) * 8;
|
||||
|
||||
// Are edges available for intra prediction?
|
||||
xd->up_available = (mi_row != 0);
|
||||
|
@@ -8,6 +8,9 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
@@ -18,11 +21,6 @@
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define RGB_TO_YUV(t) \
|
||||
( (0.257*(float)(t >> 16)) + (0.504*(float)(t >> 8 & 0xff)) + \
|
||||
(0.098*(float)(t & 0xff)) + 16), \
|
||||
@@ -155,7 +153,6 @@ void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr,
|
||||
p_dst = dst_ptr;
|
||||
|
||||
for (col = 0; col < cols; col++) {
|
||||
|
||||
int kernel = 4;
|
||||
int v = p_src[col];
|
||||
|
||||
@@ -257,7 +254,7 @@ void vp9_mbpost_proc_across_ip_c(uint8_t *src, int pitch,
|
||||
void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch,
|
||||
int rows, int cols, int flimit) {
|
||||
int r, c, i;
|
||||
const short *rv3 = &vp9_rv[63 & rand()];
|
||||
const short *rv3 = &vp9_rv[63 & rand()]; // NOLINT
|
||||
|
||||
for (c = 0; c < cols; c++) {
|
||||
uint8_t *s = &dst[c];
|
||||
@@ -408,7 +405,6 @@ static void fillrd(struct postproc_state *state, int q, int a) {
|
||||
|
||||
next = next + j;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (; next < 256; next++)
|
||||
@@ -416,7 +412,7 @@ static void fillrd(struct postproc_state *state, int q, int a) {
|
||||
}
|
||||
|
||||
for (i = 0; i < 3072; i++) {
|
||||
state->noise[i] = char_dist[rand() & 0xff];
|
||||
state->noise[i] = char_dist[rand() & 0xff]; // NOLINT
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
@@ -680,13 +676,14 @@ int vp9_post_proc_frame(struct VP9Common *cm,
|
||||
#if 0 && CONFIG_POSTPROC_VISUALIZER
|
||||
if (flags & VP9D_DEBUG_TXT_FRAME_INFO) {
|
||||
char message[512];
|
||||
sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d",
|
||||
(cm->frame_type == KEY_FRAME),
|
||||
cm->refresh_golden_frame,
|
||||
cm->base_qindex,
|
||||
cm->filter_level,
|
||||
flags,
|
||||
cm->mb_cols, cm->mb_rows);
|
||||
snprintf(message, sizeof(message) -1,
|
||||
"F%1dG%1dQ%3dF%3dP%d_s%dx%d",
|
||||
(cm->frame_type == KEY_FRAME),
|
||||
cm->refresh_golden_frame,
|
||||
cm->base_qindex,
|
||||
cm->filter_level,
|
||||
flags,
|
||||
cm->mb_cols, cm->mb_rows);
|
||||
vp9_blit_text(message, cm->post_proc_buffer.y_buffer,
|
||||
cm->post_proc_buffer.y_stride);
|
||||
}
|
||||
@@ -707,7 +704,7 @@ int vp9_post_proc_frame(struct VP9Common *cm,
|
||||
for (j = 0; j < mb_cols; j++) {
|
||||
char zz[4];
|
||||
|
||||
sprintf(zz, "%c", mi[mb_index].mbmi.mode + 'a');
|
||||
snprintf(zz, sizeof(zz) - 1, "%c", mi[mb_index].mbmi.mode + 'a');
|
||||
|
||||
vp9_blit_text(zz, y_ptr, post->y_stride);
|
||||
mb_index++;
|
||||
@@ -716,7 +713,6 @@ int vp9_post_proc_frame(struct VP9Common *cm,
|
||||
|
||||
mb_index++; /* border */
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -740,9 +736,9 @@ int vp9_post_proc_frame(struct VP9Common *cm,
|
||||
mi[mb_index].mbmi.skip_coeff);
|
||||
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
sprintf(zz, "a");
|
||||
snprintf(zz, sizeof(zz) - 1, "a");
|
||||
else
|
||||
sprintf(zz, "%c", dc_diff + '0');
|
||||
snprintf(zz, sizeof(zz) - 1, "%c", dc_diff + '0');
|
||||
|
||||
vp9_blit_text(zz, y_ptr, post->y_stride);
|
||||
mb_index++;
|
||||
@@ -751,7 +747,6 @@ int vp9_post_proc_frame(struct VP9Common *cm,
|
||||
|
||||
mb_index++; /* border */
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -894,8 +889,9 @@ int vp9_post_proc_frame(struct VP9Common *cm,
|
||||
|
||||
constrain_line(lx0, &x1, ly0 + 1, &y1, width, height);
|
||||
vp9_blit_line(lx0, x1, ly0 + 1, y1, y_buffer, y_stride);
|
||||
} else
|
||||
} else {
|
||||
vp9_blit_line(lx0, x1, ly0, y1, y_buffer, y_stride);
|
||||
}
|
||||
}
|
||||
|
||||
mi++;
|
||||
|
@@ -69,8 +69,9 @@ unsigned char vp9_get_pred_context_comp_inter_inter(const VP9_COMMON *cm,
|
||||
const MACROBLOCKD *xd);
|
||||
|
||||
|
||||
static INLINE vp9_prob vp9_get_pred_prob_comp_inter_inter(const VP9_COMMON *cm,
|
||||
const MACROBLOCKD *xd) {
|
||||
static INLINE
|
||||
vp9_prob vp9_get_pred_prob_comp_inter_inter(const VP9_COMMON *cm,
|
||||
const MACROBLOCKD *xd) {
|
||||
const int pred_context = vp9_get_pred_context_comp_inter_inter(cm, xd);
|
||||
return cm->fc.comp_inter_prob[pred_context];
|
||||
}
|
||||
|
@@ -14,69 +14,69 @@
|
||||
|
||||
#if 1
|
||||
static const int16_t dc_qlookup[QINDEX_RANGE] = {
|
||||
4, 8, 8, 9, 10, 11, 12, 12,
|
||||
13, 14, 15, 16, 17, 18, 19, 19,
|
||||
20, 21, 22, 23, 24, 25, 26, 26,
|
||||
27, 28, 29, 30, 31, 32, 32, 33,
|
||||
34, 35, 36, 37, 38, 38, 39, 40,
|
||||
41, 42, 43, 43, 44, 45, 46, 47,
|
||||
48, 48, 49, 50, 51, 52, 53, 53,
|
||||
54, 55, 56, 57, 57, 58, 59, 60,
|
||||
61, 62, 62, 63, 64, 65, 66, 66,
|
||||
67, 68, 69, 70, 70, 71, 72, 73,
|
||||
74, 74, 75, 76, 77, 78, 78, 79,
|
||||
80, 81, 81, 82, 83, 84, 85, 85,
|
||||
87, 88, 90, 92, 93, 95, 96, 98,
|
||||
99, 101, 102, 104, 105, 107, 108, 110,
|
||||
111, 113, 114, 116, 117, 118, 120, 121,
|
||||
123, 125, 127, 129, 131, 134, 136, 138,
|
||||
140, 142, 144, 146, 148, 150, 152, 154,
|
||||
156, 158, 161, 164, 166, 169, 172, 174,
|
||||
177, 180, 182, 185, 187, 190, 192, 195,
|
||||
199, 202, 205, 208, 211, 214, 217, 220,
|
||||
223, 226, 230, 233, 237, 240, 243, 247,
|
||||
250, 253, 257, 261, 265, 269, 272, 276,
|
||||
280, 284, 288, 292, 296, 300, 304, 309,
|
||||
313, 317, 322, 326, 330, 335, 340, 344,
|
||||
349, 354, 359, 364, 369, 374, 379, 384,
|
||||
389, 395, 400, 406, 411, 417, 423, 429,
|
||||
435, 441, 447, 454, 461, 467, 475, 482,
|
||||
489, 497, 505, 513, 522, 530, 539, 549,
|
||||
559, 569, 579, 590, 602, 614, 626, 640,
|
||||
654, 668, 684, 700, 717, 736, 755, 775,
|
||||
796, 819, 843, 869, 896, 925, 955, 988,
|
||||
4, 8, 8, 9, 10, 11, 12, 12,
|
||||
13, 14, 15, 16, 17, 18, 19, 19,
|
||||
20, 21, 22, 23, 24, 25, 26, 26,
|
||||
27, 28, 29, 30, 31, 32, 32, 33,
|
||||
34, 35, 36, 37, 38, 38, 39, 40,
|
||||
41, 42, 43, 43, 44, 45, 46, 47,
|
||||
48, 48, 49, 50, 51, 52, 53, 53,
|
||||
54, 55, 56, 57, 57, 58, 59, 60,
|
||||
61, 62, 62, 63, 64, 65, 66, 66,
|
||||
67, 68, 69, 70, 70, 71, 72, 73,
|
||||
74, 74, 75, 76, 77, 78, 78, 79,
|
||||
80, 81, 81, 82, 83, 84, 85, 85,
|
||||
87, 88, 90, 92, 93, 95, 96, 98,
|
||||
99, 101, 102, 104, 105, 107, 108, 110,
|
||||
111, 113, 114, 116, 117, 118, 120, 121,
|
||||
123, 125, 127, 129, 131, 134, 136, 138,
|
||||
140, 142, 144, 146, 148, 150, 152, 154,
|
||||
156, 158, 161, 164, 166, 169, 172, 174,
|
||||
177, 180, 182, 185, 187, 190, 192, 195,
|
||||
199, 202, 205, 208, 211, 214, 217, 220,
|
||||
223, 226, 230, 233, 237, 240, 243, 247,
|
||||
250, 253, 257, 261, 265, 269, 272, 276,
|
||||
280, 284, 288, 292, 296, 300, 304, 309,
|
||||
313, 317, 322, 326, 330, 335, 340, 344,
|
||||
349, 354, 359, 364, 369, 374, 379, 384,
|
||||
389, 395, 400, 406, 411, 417, 423, 429,
|
||||
435, 441, 447, 454, 461, 467, 475, 482,
|
||||
489, 497, 505, 513, 522, 530, 539, 549,
|
||||
559, 569, 579, 590, 602, 614, 626, 640,
|
||||
654, 668, 684, 700, 717, 736, 755, 775,
|
||||
796, 819, 843, 869, 896, 925, 955, 988,
|
||||
1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336,
|
||||
};
|
||||
|
||||
static const int16_t ac_qlookup[QINDEX_RANGE] = {
|
||||
4, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22,
|
||||
23, 24, 25, 26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35, 36, 37, 38,
|
||||
39, 40, 41, 42, 43, 44, 45, 46,
|
||||
47, 48, 49, 50, 51, 52, 53, 54,
|
||||
55, 56, 57, 58, 59, 60, 61, 62,
|
||||
63, 64, 65, 66, 67, 68, 69, 70,
|
||||
71, 72, 73, 74, 75, 76, 77, 78,
|
||||
79, 80, 81, 82, 83, 84, 85, 86,
|
||||
87, 88, 89, 90, 91, 92, 93, 94,
|
||||
95, 96, 97, 98, 99, 100, 101, 102,
|
||||
104, 106, 108, 110, 112, 114, 116, 118,
|
||||
120, 122, 124, 126, 128, 130, 132, 134,
|
||||
136, 138, 140, 142, 144, 146, 148, 150,
|
||||
152, 155, 158, 161, 164, 167, 170, 173,
|
||||
176, 179, 182, 185, 188, 191, 194, 197,
|
||||
200, 203, 207, 211, 215, 219, 223, 227,
|
||||
231, 235, 239, 243, 247, 251, 255, 260,
|
||||
265, 270, 275, 280, 285, 290, 295, 300,
|
||||
305, 311, 317, 323, 329, 335, 341, 347,
|
||||
353, 359, 366, 373, 380, 387, 394, 401,
|
||||
408, 416, 424, 432, 440, 448, 456, 465,
|
||||
474, 483, 492, 501, 510, 520, 530, 540,
|
||||
550, 560, 571, 582, 593, 604, 615, 627,
|
||||
639, 651, 663, 676, 689, 702, 715, 729,
|
||||
743, 757, 771, 786, 801, 816, 832, 848,
|
||||
864, 881, 898, 915, 933, 951, 969, 988,
|
||||
4, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22,
|
||||
23, 24, 25, 26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35, 36, 37, 38,
|
||||
39, 40, 41, 42, 43, 44, 45, 46,
|
||||
47, 48, 49, 50, 51, 52, 53, 54,
|
||||
55, 56, 57, 58, 59, 60, 61, 62,
|
||||
63, 64, 65, 66, 67, 68, 69, 70,
|
||||
71, 72, 73, 74, 75, 76, 77, 78,
|
||||
79, 80, 81, 82, 83, 84, 85, 86,
|
||||
87, 88, 89, 90, 91, 92, 93, 94,
|
||||
95, 96, 97, 98, 99, 100, 101, 102,
|
||||
104, 106, 108, 110, 112, 114, 116, 118,
|
||||
120, 122, 124, 126, 128, 130, 132, 134,
|
||||
136, 138, 140, 142, 144, 146, 148, 150,
|
||||
152, 155, 158, 161, 164, 167, 170, 173,
|
||||
176, 179, 182, 185, 188, 191, 194, 197,
|
||||
200, 203, 207, 211, 215, 219, 223, 227,
|
||||
231, 235, 239, 243, 247, 251, 255, 260,
|
||||
265, 270, 275, 280, 285, 290, 295, 300,
|
||||
305, 311, 317, 323, 329, 335, 341, 347,
|
||||
353, 359, 366, 373, 380, 387, 394, 401,
|
||||
408, 416, 424, 432, 440, 448, 456, 465,
|
||||
474, 483, 492, 501, 510, 520, 530, 540,
|
||||
550, 560, 571, 582, 593, 604, 615, 627,
|
||||
639, 651, 663, 676, 689, 702, 715, 729,
|
||||
743, 757, 771, 786, 801, 816, 832, 848,
|
||||
864, 881, 898, 915, 933, 951, 969, 988,
|
||||
1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151,
|
||||
1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343,
|
||||
1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567,
|
||||
|
@@ -59,8 +59,8 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
|
||||
const struct subpix_fn_table *subpix,
|
||||
enum mv_precision precision) {
|
||||
const int is_q4 = precision == MV_PRECISION_Q4;
|
||||
const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row << 1,
|
||||
is_q4 ? src_mv->col : src_mv->col << 1 };
|
||||
const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
|
||||
is_q4 ? src_mv->col : src_mv->col * 2 };
|
||||
const MV32 mv = scale->scale_mv(&mv_q4, scale);
|
||||
const int subpel_x = mv.col & SUBPEL_MASK;
|
||||
const int subpel_y = mv.row & SUBPEL_MASK;
|
||||
@@ -100,16 +100,17 @@ MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv,
|
||||
const int spel_top = (VP9_INTERP_EXTEND + bh) << SUBPEL_BITS;
|
||||
const int spel_bottom = spel_top - SUBPEL_SHIFTS;
|
||||
MV clamped_mv = {
|
||||
src_mv->row << (1 - ss_y),
|
||||
src_mv->col << (1 - ss_x)
|
||||
src_mv->row * (1 << (1 - ss_y)),
|
||||
src_mv->col * (1 << (1 - ss_x))
|
||||
};
|
||||
assert(ss_x <= 1);
|
||||
assert(ss_y <= 1);
|
||||
|
||||
clamp_mv(&clamped_mv, (xd->mb_to_left_edge << (1 - ss_x)) - spel_left,
|
||||
(xd->mb_to_right_edge << (1 - ss_x)) + spel_right,
|
||||
(xd->mb_to_top_edge << (1 - ss_y)) - spel_top,
|
||||
(xd->mb_to_bottom_edge << (1 - ss_y)) + spel_bottom);
|
||||
clamp_mv(&clamped_mv,
|
||||
xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left,
|
||||
xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right,
|
||||
xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top,
|
||||
xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom);
|
||||
|
||||
return clamped_mv;
|
||||
}
|
||||
@@ -131,7 +132,7 @@ static void build_inter_predictors(int plane, int block, BLOCK_SIZE bsize,
|
||||
const int x = 4 * (block & ((1 << bwl) - 1));
|
||||
const int y = 4 * (block >> bwl);
|
||||
const MODE_INFO *mi = xd->this_mi;
|
||||
const int use_second_ref = mi->mbmi.ref_frame[1] > 0;
|
||||
const int is_compound = has_second_ref(&mi->mbmi);
|
||||
int ref;
|
||||
|
||||
assert(x < bw);
|
||||
@@ -139,7 +140,7 @@ static void build_inter_predictors(int plane, int block, BLOCK_SIZE bsize,
|
||||
assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_w == bw);
|
||||
assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_h == bh);
|
||||
|
||||
for (ref = 0; ref < 1 + use_second_ref; ++ref) {
|
||||
for (ref = 0; ref < 1 + is_compound; ++ref) {
|
||||
struct scale_factors *const scale = &xd->scale_factor[ref];
|
||||
struct buf_2d *const pre_buf = &pd->pre[ref];
|
||||
struct buf_2d *const dst_buf = &pd->dst;
|
||||
|
@@ -13,7 +13,7 @@
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vpx_ports/vpx_once.h"
|
||||
|
||||
#include "vp9_rtcd.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
|
||||
#include "vp9/common/vp9_reconintra.h"
|
||||
#include "vp9/common/vp9_onyxc_int.h"
|
||||
|
@@ -7,9 +7,9 @@
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
#define RTCD_C
|
||||
#include "vp9_rtcd.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vpx_ports/vpx_once.h"
|
||||
|
||||
void vpx_scale_rtcd(void);
|
||||
|
@@ -53,7 +53,7 @@ prototype void vp9_d45_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const ui
|
||||
specialize vp9_d45_predictor_4x4 $ssse3_x86inc
|
||||
|
||||
prototype void vp9_d63_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
|
||||
specialize vp9_d63_predictor_4x4
|
||||
specialize vp9_d63_predictor_4x4 $ssse3_x86inc
|
||||
|
||||
prototype void vp9_h_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
|
||||
specialize vp9_h_predictor_4x4 $ssse3_x86inc
|
||||
@@ -92,7 +92,7 @@ prototype void vp9_d45_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const ui
|
||||
specialize vp9_d45_predictor_8x8 $ssse3_x86inc
|
||||
|
||||
prototype void vp9_d63_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
|
||||
specialize vp9_d63_predictor_8x8
|
||||
specialize vp9_d63_predictor_8x8 $ssse3_x86inc
|
||||
|
||||
prototype void vp9_h_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
|
||||
specialize vp9_h_predictor_8x8 $ssse3_x86inc
|
||||
@@ -131,7 +131,7 @@ prototype void vp9_d45_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const
|
||||
specialize vp9_d45_predictor_16x16 $ssse3_x86inc
|
||||
|
||||
prototype void vp9_d63_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
|
||||
specialize vp9_d63_predictor_16x16
|
||||
specialize vp9_d63_predictor_16x16 $ssse3_x86inc
|
||||
|
||||
prototype void vp9_h_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
|
||||
specialize vp9_h_predictor_16x16 $ssse3_x86inc
|
||||
@@ -170,7 +170,7 @@ prototype void vp9_d45_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const
|
||||
specialize vp9_d45_predictor_32x32 $ssse3_x86inc
|
||||
|
||||
prototype void vp9_d63_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
|
||||
specialize vp9_d63_predictor_32x32
|
||||
specialize vp9_d63_predictor_32x32 $ssse3_x86inc
|
||||
|
||||
prototype void vp9_h_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
|
||||
specialize vp9_h_predictor_32x32 $ssse3 x86inc
|
||||
@@ -202,17 +202,6 @@ specialize vp9_dc_left_predictor_32x32
|
||||
prototype void vp9_dc_128_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
|
||||
specialize vp9_dc_128_predictor_32x32
|
||||
|
||||
if [ "$CONFIG_VP9_DECODER" = "yes" ]; then
|
||||
prototype void vp9_add_constant_residual_8x8 "const int16_t diff, uint8_t *dest, int stride"
|
||||
specialize vp9_add_constant_residual_8x8 sse2 neon
|
||||
|
||||
prototype void vp9_add_constant_residual_16x16 "const int16_t diff, uint8_t *dest, int stride"
|
||||
specialize vp9_add_constant_residual_16x16 sse2 neon
|
||||
|
||||
prototype void vp9_add_constant_residual_32x32 "const int16_t diff, uint8_t *dest, int stride"
|
||||
specialize vp9_add_constant_residual_32x32 sse2 neon
|
||||
fi
|
||||
|
||||
#
|
||||
# Loopfilter
|
||||
#
|
||||
@@ -306,8 +295,8 @@ specialize vp9_short_idct8x8_1_add sse2 neon
|
||||
prototype void vp9_short_idct8x8_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct8x8_add sse2 neon
|
||||
|
||||
prototype void vp9_short_idct10_8x8_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct10_8x8_add sse2 neon
|
||||
prototype void vp9_short_idct8x8_10_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct8x8_10_add sse2 neon
|
||||
|
||||
prototype void vp9_short_idct16x16_1_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct16x16_1_add sse2 neon
|
||||
@@ -315,14 +304,14 @@ specialize vp9_short_idct16x16_1_add sse2 neon
|
||||
prototype void vp9_short_idct16x16_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct16x16_add sse2 neon
|
||||
|
||||
prototype void vp9_short_idct10_16x16_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct10_16x16_add sse2 neon
|
||||
prototype void vp9_short_idct16x16_10_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct16x16_10_add sse2 neon
|
||||
|
||||
prototype void vp9_short_idct32x32_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct32x32_add sse2 neon
|
||||
|
||||
prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output"
|
||||
specialize vp9_short_idct1_32x32
|
||||
prototype void vp9_short_idct32x32_1_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct32x32_1_add sse2
|
||||
|
||||
prototype void vp9_short_iht4x4_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
|
||||
specialize vp9_short_iht4x4_add sse2 neon
|
||||
|
@@ -48,4 +48,4 @@ static int vp9_is_scaled(const struct scale_factors *sf) {
|
||||
sf->y_scale_fp != REF_NO_SCALE;
|
||||
}
|
||||
|
||||
#endif // VP9_COMMON_VP9_SCALE_H_
|
||||
#endif // VP9_COMMON_VP9_SCALE_H_
|
||||
|
@@ -1,145 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP9_COMMON_VP9_SUBPELVAR_H_
|
||||
#define VP9_COMMON_VP9_SUBPELVAR_H_
|
||||
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/vp9_convolve.h"
|
||||
|
||||
static void variance(const uint8_t *src_ptr,
|
||||
int source_stride,
|
||||
const uint8_t *ref_ptr,
|
||||
int recon_stride,
|
||||
int w,
|
||||
int h,
|
||||
unsigned int *sse,
|
||||
int *sum) {
|
||||
int i, j;
|
||||
int diff;
|
||||
|
||||
*sum = 0;
|
||||
*sse = 0;
|
||||
|
||||
for (i = 0; i < h; i++) {
|
||||
for (j = 0; j < w; j++) {
|
||||
diff = src_ptr[j] - ref_ptr[j];
|
||||
*sum += diff;
|
||||
*sse += diff * diff;
|
||||
}
|
||||
|
||||
src_ptr += source_stride;
|
||||
ref_ptr += recon_stride;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_first_pass
|
||||
*
|
||||
* INPUTS : uint8_t *src_ptr : Pointer to source block.
|
||||
* uint32_t src_pixels_per_line : Stride of input block.
|
||||
* uint32_t pixel_step : Offset between filter input samples (see notes).
|
||||
* uint32_t output_height : Input block height.
|
||||
* uint32_t output_width : Input block width.
|
||||
* int32_t *vp9_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : int32_t *output_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
|
||||
* either horizontal or vertical direction to produce the
|
||||
* filtered output block. Used to implement first-pass
|
||||
* of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
|
||||
* Two filter taps should sum to VP9_FILTER_WEIGHT.
|
||||
* pixel_step defines whether the filter is applied
|
||||
* horizontally (pixel_step=1) or vertically (pixel_step=stride).
|
||||
* It defines the offset required to move from one input
|
||||
* to the next.
|
||||
*
|
||||
****************************************************************************/
|
||||
static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
|
||||
uint16_t *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int16_t *vp9_filter) {
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < output_height; i++) {
|
||||
for (j = 0; j < output_width; j++) {
|
||||
output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
|
||||
(int)src_ptr[pixel_step] * vp9_filter[1],
|
||||
FILTER_BITS);
|
||||
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
// Next row...
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_second_pass
|
||||
*
|
||||
* INPUTS : int32_t *src_ptr : Pointer to source block.
|
||||
* uint32_t src_pixels_per_line : Stride of input block.
|
||||
* uint32_t pixel_step : Offset between filter input samples (see notes).
|
||||
* uint32_t output_height : Input block height.
|
||||
* uint32_t output_width : Input block width.
|
||||
* int32_t *vp9_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : uint16_t *output_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
|
||||
* either horizontal or vertical direction to produce the
|
||||
* filtered output block. Used to implement second-pass
|
||||
* of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
|
||||
* Two filter taps should sum to VP9_FILTER_WEIGHT.
|
||||
* pixel_step defines whether the filter is applied
|
||||
* horizontally (pixel_step=1) or vertically (pixel_step=stride).
|
||||
* It defines the offset required to move from one input
|
||||
* to the next.
|
||||
*
|
||||
****************************************************************************/
|
||||
static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
|
||||
uint8_t *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int16_t *vp9_filter) {
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < output_height; i++) {
|
||||
for (j = 0; j < output_width; j++) {
|
||||
output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
|
||||
(int)src_ptr[pixel_step] * vp9_filter[1],
|
||||
FILTER_BITS);
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // VP9_COMMON_VP9_SUBPELVAR_H_
|
@@ -25,8 +25,9 @@ static void tree2tok(struct vp9_token *const p, vp9_tree t,
|
||||
if (j <= 0) {
|
||||
p[-j].value = v;
|
||||
p[-j].len = l;
|
||||
} else
|
||||
} else {
|
||||
tree2tok(p, t, j, v, l);
|
||||
}
|
||||
} while (++v & 1);
|
||||
}
|
||||
|
||||
@@ -65,11 +66,9 @@ static unsigned int convert_distribution(unsigned int i,
|
||||
return left + right;
|
||||
}
|
||||
|
||||
void vp9_tree_probs_from_distribution(
|
||||
vp9_tree tree,
|
||||
vp9_prob probs [ /* n-1 */ ],
|
||||
unsigned int branch_ct [ /* n-1 */ ] [2],
|
||||
const unsigned int num_events[ /* n */ ],
|
||||
unsigned int tok0_offset) {
|
||||
void vp9_tree_probs_from_distribution(vp9_tree tree, vp9_prob probs[/* n-1 */],
|
||||
unsigned int branch_ct[/* n-1 */][2],
|
||||
const unsigned int num_events[/* n */],
|
||||
unsigned int tok0_offset) {
|
||||
convert_distribution(0, tree, probs, branch_ct, num_events, tok0_offset);
|
||||
}
|
||||
|
@@ -985,7 +985,7 @@ void vp9_short_iht8x8_add_sse2(int16_t *input, uint8_t *dest, int stride,
|
||||
RECON_AND_STORE(dest, in[7]);
|
||||
}
|
||||
|
||||
void vp9_short_idct10_8x8_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
void vp9_short_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
|
||||
const __m128i final_rounding = _mm_set1_epi16(1<<4);
|
||||
@@ -1014,7 +1014,7 @@ void vp9_short_idct10_8x8_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
TRANSPOSE_8X4(in0, in1, in2, in3, in0, in1, in2, in3)
|
||||
|
||||
// Stage1
|
||||
{
|
||||
{ //NOLINT
|
||||
const __m128i lo_17 = _mm_unpackhi_epi16(in0, in3);
|
||||
const __m128i lo_35 = _mm_unpackhi_epi16(in1, in2);
|
||||
|
||||
@@ -1039,7 +1039,7 @@ void vp9_short_idct10_8x8_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
}
|
||||
|
||||
// Stage2
|
||||
{
|
||||
{ //NOLINT
|
||||
const __m128i lo_04 = _mm_unpacklo_epi16(in0, in2);
|
||||
const __m128i lo_26 = _mm_unpacklo_epi16(in1, in3);
|
||||
|
||||
@@ -1069,7 +1069,7 @@ void vp9_short_idct10_8x8_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
}
|
||||
|
||||
// Stage3
|
||||
{
|
||||
{ //NOLINT
|
||||
const __m128i lo_56 = _mm_unpacklo_epi16(stp2_5, stp2_6);
|
||||
stp1_0 = _mm_adds_epi16(stp2_0, stp2_3);
|
||||
stp1_1 = _mm_adds_epi16(stp2_1, stp2_2);
|
||||
@@ -2456,7 +2456,7 @@ void vp9_short_iht16x16_add_sse2(int16_t *input, uint8_t *dest, int stride,
|
||||
write_buffer_8x16(dest, in1, stride);
|
||||
}
|
||||
|
||||
void vp9_short_idct10_16x16_add_sse2(int16_t *input, uint8_t *dest,
|
||||
void vp9_short_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest,
|
||||
int stride) {
|
||||
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
|
||||
const __m128i final_rounding = _mm_set1_epi16(1<<5);
|
||||
@@ -3548,4 +3548,52 @@ void vp9_short_idct32x32_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
dest += 8 - (stride * 32);
|
||||
}
|
||||
}
|
||||
} //NOLINT
|
||||
|
||||
void vp9_short_idct32x32_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
__m128i dc_value;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int a, i;
|
||||
|
||||
a = dct_const_round_shift(input[0] * cospi_16_64);
|
||||
a = dct_const_round_shift(a * cospi_16_64);
|
||||
a = ROUND_POWER_OF_TWO(a, 6);
|
||||
|
||||
dc_value = _mm_set1_epi16(a);
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
RECON_AND_STORE(dest, dc_value);
|
||||
dest += 8 - (stride * 32);
|
||||
}
|
||||
}
|
||||
|
@@ -19,12 +19,14 @@ pw_32: times 8 dw 32
|
||||
SECTION .text
|
||||
|
||||
INIT_MMX sse
|
||||
cglobal dc_predictor_4x4, 4, 4, 2, dst, stride, above, left
|
||||
cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
|
||||
GET_GOT goffsetq
|
||||
|
||||
pxor m1, m1
|
||||
movd m0, [aboveq]
|
||||
punpckldq m0, [leftq]
|
||||
psadbw m0, m1
|
||||
paddw m0, [pw_4]
|
||||
paddw m0, [GLOBAL(pw_4)]
|
||||
psraw m0, 3
|
||||
pshufw m0, m0, 0x0
|
||||
packuswb m0, m0
|
||||
@@ -33,10 +35,14 @@ cglobal dc_predictor_4x4, 4, 4, 2, dst, stride, above, left
|
||||
lea dstq, [dstq+strideq*2]
|
||||
movd [dstq ], m0
|
||||
movd [dstq+strideq], m0
|
||||
|
||||
RESTORE_GOT
|
||||
RET
|
||||
|
||||
INIT_MMX sse
|
||||
cglobal dc_predictor_8x8, 4, 4, 3, dst, stride, above, left
|
||||
cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
|
||||
GET_GOT goffsetq
|
||||
|
||||
pxor m1, m1
|
||||
movq m0, [aboveq]
|
||||
movq m2, [leftq]
|
||||
@@ -45,7 +51,7 @@ cglobal dc_predictor_8x8, 4, 4, 3, dst, stride, above, left
|
||||
psadbw m0, m1
|
||||
psadbw m2, m1
|
||||
paddw m0, m2
|
||||
paddw m0, [pw_8]
|
||||
paddw m0, [GLOBAL(pw_8)]
|
||||
psraw m0, 4
|
||||
pshufw m0, m0, 0x0
|
||||
packuswb m0, m0
|
||||
@@ -58,10 +64,14 @@ cglobal dc_predictor_8x8, 4, 4, 3, dst, stride, above, left
|
||||
movq [dstq+strideq ], m0
|
||||
movq [dstq+strideq*2], m0
|
||||
movq [dstq+stride3q ], m0
|
||||
|
||||
RESTORE_GOT
|
||||
RET
|
||||
|
||||
INIT_XMM sse2
|
||||
cglobal dc_predictor_16x16, 4, 4, 3, dst, stride, above, left
|
||||
cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
|
||||
GET_GOT goffsetq
|
||||
|
||||
pxor m1, m1
|
||||
mova m0, [aboveq]
|
||||
mova m2, [leftq]
|
||||
@@ -73,7 +83,7 @@ cglobal dc_predictor_16x16, 4, 4, 3, dst, stride, above, left
|
||||
paddw m0, m2
|
||||
movhlps m2, m0
|
||||
paddw m0, m2
|
||||
paddw m0, [pw_16]
|
||||
paddw m0, [GLOBAL(pw_16)]
|
||||
psraw m0, 5
|
||||
pshuflw m0, m0, 0x0
|
||||
punpcklqdq m0, m0
|
||||
@@ -86,10 +96,14 @@ cglobal dc_predictor_16x16, 4, 4, 3, dst, stride, above, left
|
||||
lea dstq, [dstq+strideq*4]
|
||||
dec lines4d
|
||||
jnz .loop
|
||||
|
||||
RESTORE_GOT
|
||||
REP_RET
|
||||
|
||||
INIT_XMM sse2
|
||||
cglobal dc_predictor_32x32, 4, 4, 5, dst, stride, above, left
|
||||
cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
|
||||
GET_GOT goffsetq
|
||||
|
||||
pxor m1, m1
|
||||
mova m0, [aboveq]
|
||||
mova m2, [aboveq+16]
|
||||
@@ -107,7 +121,7 @@ cglobal dc_predictor_32x32, 4, 4, 5, dst, stride, above, left
|
||||
paddw m0, m4
|
||||
movhlps m2, m0
|
||||
paddw m0, m2
|
||||
paddw m0, [pw_32]
|
||||
paddw m0, [GLOBAL(pw_32)]
|
||||
psraw m0, 6
|
||||
pshuflw m0, m0, 0x0
|
||||
punpcklqdq m0, m0
|
||||
@@ -124,6 +138,8 @@ cglobal dc_predictor_32x32, 4, 4, 5, dst, stride, above, left
|
||||
lea dstq, [dstq+strideq*4]
|
||||
dec lines4d
|
||||
jnz .loop
|
||||
|
||||
RESTORE_GOT
|
||||
REP_RET
|
||||
|
||||
INIT_MMX sse
|
||||
|
@@ -17,8 +17,8 @@ pw_2: times 8 dw 2
|
||||
pb_7m1: times 8 db 7, -1
|
||||
pb_15: times 16 db 15
|
||||
|
||||
sh_b01234577: db 0, 1, 2, 3, 4, 5, 7, 7
|
||||
sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7
|
||||
sh_b01234577: db 0, 1, 2, 3, 4, 5, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
sh_b23456777: db 2, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
sh_b0123456777777777: db 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7
|
||||
sh_b1234567777777777: db 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
|
||||
@@ -112,14 +112,16 @@ cglobal h_predictor_32x32, 2, 4, 3, dst, stride, line, left
|
||||
REP_RET
|
||||
|
||||
INIT_MMX ssse3
|
||||
cglobal d45_predictor_4x4, 3, 3, 4, dst, stride, above
|
||||
cglobal d45_predictor_4x4, 3, 4, 4, dst, stride, above, goffset
|
||||
GET_GOT goffsetq
|
||||
|
||||
movq m0, [aboveq]
|
||||
pshufb m2, m0, [sh_b23456777]
|
||||
pshufb m1, m0, [sh_b01234577]
|
||||
pshufb m0, [sh_b12345677]
|
||||
pshufb m2, m0, [GLOBAL(sh_b23456777)]
|
||||
pshufb m1, m0, [GLOBAL(sh_b01234577)]
|
||||
pshufb m0, [GLOBAL(sh_b12345677)]
|
||||
pavgb m3, m2, m1
|
||||
pxor m2, m1
|
||||
pand m2, [pb_1]
|
||||
pand m2, [GLOBAL(pb_1)]
|
||||
psubb m3, m2
|
||||
pavgb m0, m3
|
||||
|
||||
@@ -132,19 +134,23 @@ cglobal d45_predictor_4x4, 3, 3, 4, dst, stride, above
|
||||
movd [dstq ], m0
|
||||
psrlq m0, 8
|
||||
movd [dstq+strideq], m0
|
||||
|
||||
RESTORE_GOT
|
||||
RET
|
||||
|
||||
INIT_MMX ssse3
|
||||
cglobal d45_predictor_8x8, 3, 3, 4, dst, stride, above
|
||||
cglobal d45_predictor_8x8, 3, 4, 4, dst, stride, above, goffset
|
||||
GET_GOT goffsetq
|
||||
|
||||
movq m0, [aboveq]
|
||||
mova m1, [sh_b12345677]
|
||||
DEFINE_ARGS dst, stride, stride3, line
|
||||
mova m1, [GLOBAL(sh_b12345677)]
|
||||
DEFINE_ARGS dst, stride, stride3
|
||||
lea stride3q, [strideq*3]
|
||||
pshufb m2, m0, [sh_b23456777]
|
||||
pshufb m2, m0, [GLOBAL(sh_b23456777)]
|
||||
pavgb m3, m2, m0
|
||||
pxor m2, m0
|
||||
pshufb m0, m1
|
||||
pand m2, [pb_1]
|
||||
pand m2, [GLOBAL(pb_1)]
|
||||
psubb m3, m2
|
||||
pavgb m0, m3
|
||||
|
||||
@@ -167,20 +173,24 @@ cglobal d45_predictor_8x8, 3, 3, 4, dst, stride, above
|
||||
movq [dstq+strideq*2], m0
|
||||
pshufb m0, m1
|
||||
movq [dstq+stride3q ], m0
|
||||
|
||||
RESTORE_GOT
|
||||
RET
|
||||
|
||||
INIT_XMM ssse3
|
||||
cglobal d45_predictor_16x16, 3, 5, 4, dst, stride, above, dst8, line
|
||||
cglobal d45_predictor_16x16, 3, 6, 4, dst, stride, above, dst8, line, goffset
|
||||
GET_GOT goffsetq
|
||||
|
||||
mova m0, [aboveq]
|
||||
DEFINE_ARGS dst, stride, stride3, dst8, line
|
||||
lea stride3q, [strideq*3]
|
||||
lea dst8q, [dstq+strideq*8]
|
||||
mova m1, [sh_b123456789abcdeff]
|
||||
pshufb m2, m0, [sh_b23456789abcdefff]
|
||||
mova m1, [GLOBAL(sh_b123456789abcdeff)]
|
||||
pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)]
|
||||
pavgb m3, m2, m0
|
||||
pxor m2, m0
|
||||
pshufb m0, m1
|
||||
pand m2, [pb_1]
|
||||
pand m2, [GLOBAL(pb_1)]
|
||||
psubb m3, m2
|
||||
pavgb m0, m3
|
||||
|
||||
@@ -214,29 +224,33 @@ cglobal d45_predictor_16x16, 3, 5, 4, dst, stride, above, dst8, line
|
||||
movhps [dstq+strideq +8], m0
|
||||
movhps [dstq+strideq*2+8], m0
|
||||
movhps [dstq+stride3q +8], m0
|
||||
|
||||
RESTORE_GOT
|
||||
RET
|
||||
|
||||
INIT_XMM ssse3
|
||||
cglobal d45_predictor_32x32, 3, 5, 7, dst, stride, above, dst16, line
|
||||
cglobal d45_predictor_32x32, 3, 6, 7, dst, stride, above, dst16, line, goffset
|
||||
GET_GOT goffsetq
|
||||
|
||||
mova m0, [aboveq]
|
||||
mova m4, [aboveq+16]
|
||||
DEFINE_ARGS dst, stride, stride3, dst16, line
|
||||
lea stride3q, [strideq*3]
|
||||
lea dst16q, [dstq +strideq*8]
|
||||
lea dst16q, [dst16q+strideq*8]
|
||||
mova m1, [sh_b123456789abcdeff]
|
||||
pshufb m2, m4, [sh_b23456789abcdefff]
|
||||
mova m1, [GLOBAL(sh_b123456789abcdeff)]
|
||||
pshufb m2, m4, [GLOBAL(sh_b23456789abcdefff)]
|
||||
pavgb m3, m2, m4
|
||||
pxor m2, m4
|
||||
palignr m5, m4, m0, 1
|
||||
palignr m6, m4, m0, 2
|
||||
pshufb m4, m1
|
||||
pand m2, [pb_1]
|
||||
pand m2, [GLOBAL(pb_1)]
|
||||
psubb m3, m2
|
||||
pavgb m4, m3
|
||||
pavgb m3, m0, m6
|
||||
pxor m0, m6
|
||||
pand m0, [pb_1]
|
||||
pand m0, [GLOBAL(pb_1)]
|
||||
psubb m3, m0
|
||||
pavgb m5, m3
|
||||
|
||||
@@ -288,4 +302,156 @@ cglobal d45_predictor_32x32, 3, 5, 7, dst, stride, above, dst16, line
|
||||
mova [dstq +strideq +16], m4
|
||||
mova [dstq +strideq*2+16], m4
|
||||
mova [dstq +stride3q +16], m4
|
||||
|
||||
RESTORE_GOT
|
||||
RET
|
||||
|
||||
; ------------------------------------------
|
||||
; input: x, y, z, result
|
||||
;
|
||||
; trick from pascal
|
||||
; (x+2y+z+2)>>2 can be calculated as:
|
||||
; result = avg(x,z)
|
||||
; result -= xor(x,z) & 1
|
||||
; result = avg(result,y)
|
||||
; ------------------------------------------
|
||||
%macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4
|
||||
pavgb %4, %1, %3
|
||||
pxor %3, %1
|
||||
pand %3, [GLOBAL(pb_1)]
|
||||
psubb %4, %3
|
||||
pavgb %4, %2
|
||||
%endmacro
|
||||
|
||||
INIT_XMM ssse3
|
||||
cglobal d63_predictor_4x4, 3, 4, 5, dst, stride, above, goffset
|
||||
GET_GOT goffsetq
|
||||
|
||||
movq m3, [aboveq]
|
||||
pshufb m1, m3, [GLOBAL(sh_b23456777)]
|
||||
pshufb m2, m3, [GLOBAL(sh_b12345677)]
|
||||
|
||||
X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m3, m2, m1, m4
|
||||
pavgb m3, m2
|
||||
|
||||
; store 4 lines
|
||||
movd [dstq ], m3
|
||||
movd [dstq+strideq], m4
|
||||
lea dstq, [dstq+strideq*2]
|
||||
psrldq m3, 1
|
||||
psrldq m4, 1
|
||||
movd [dstq ], m3
|
||||
movd [dstq+strideq], m4
|
||||
RESTORE_GOT
|
||||
RET
|
||||
|
||||
INIT_XMM ssse3
|
||||
cglobal d63_predictor_8x8, 3, 4, 5, dst, stride, above, goffset
|
||||
GET_GOT goffsetq
|
||||
|
||||
movq m3, [aboveq]
|
||||
DEFINE_ARGS dst, stride, stride3
|
||||
lea stride3q, [strideq*3]
|
||||
pshufb m1, m3, [GLOBAL(sh_b2345677777777777)]
|
||||
pshufb m0, m3, [GLOBAL(sh_b0123456777777777)]
|
||||
pshufb m2, m3, [GLOBAL(sh_b1234567777777777)]
|
||||
pshufb m3, [GLOBAL(sh_b0123456777777777)]
|
||||
|
||||
X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m2, m1, m4
|
||||
pavgb m3, m2
|
||||
|
||||
; store 4 lines
|
||||
movq [dstq ], m3
|
||||
movq [dstq+strideq], m4
|
||||
psrldq m3, 1
|
||||
psrldq m4, 1
|
||||
movq [dstq+strideq*2], m3
|
||||
movq [dstq+stride3q ], m4
|
||||
lea dstq, [dstq+strideq*4]
|
||||
psrldq m3, 1
|
||||
psrldq m4, 1
|
||||
|
||||
; store 4 lines
|
||||
movq [dstq ], m3
|
||||
movq [dstq+strideq], m4
|
||||
psrldq m3, 1
|
||||
psrldq m4, 1
|
||||
movq [dstq+strideq*2], m3
|
||||
movq [dstq+stride3q ], m4
|
||||
RESTORE_GOT
|
||||
RET
|
||||
|
||||
INIT_XMM ssse3
|
||||
cglobal d63_predictor_16x16, 3, 5, 5, dst, stride, above, line, goffset
|
||||
GET_GOT goffsetq
|
||||
|
||||
mova m0, [aboveq]
|
||||
DEFINE_ARGS dst, stride, stride3, line
|
||||
lea stride3q, [strideq*3]
|
||||
mova m1, [GLOBAL(sh_b123456789abcdeff)]
|
||||
pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)]
|
||||
pshufb m3, m0, m1
|
||||
|
||||
X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m3, m2, m4
|
||||
pavgb m0, m3
|
||||
|
||||
mov lined, 4
|
||||
.loop:
|
||||
mova [dstq ], m0
|
||||
mova [dstq+strideq ], m4
|
||||
pshufb m0, m1
|
||||
pshufb m4, m1
|
||||
mova [dstq+strideq*2], m0
|
||||
mova [dstq+stride3q ], m4
|
||||
pshufb m0, m1
|
||||
pshufb m4, m1
|
||||
lea dstq, [dstq+strideq*4]
|
||||
dec lined
|
||||
jnz .loop
|
||||
RESTORE_GOT
|
||||
REP_RET
|
||||
|
||||
INIT_XMM ssse3
|
||||
cglobal d63_predictor_32x32, 3, 5, 8, dst, stride, above, line, goffset
|
||||
GET_GOT goffsetq
|
||||
|
||||
mova m0, [aboveq]
|
||||
mova m7, [aboveq+16]
|
||||
DEFINE_ARGS dst, stride, stride3, line
|
||||
mova m1, [GLOBAL(sh_b123456789abcdeff)]
|
||||
lea stride3q, [strideq*3]
|
||||
pshufb m2, m7, [GLOBAL(sh_b23456789abcdefff)]
|
||||
pshufb m3, m7, m1
|
||||
|
||||
X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m2, m4
|
||||
palignr m6, m7, m0, 1
|
||||
palignr m5, m7, m0, 2
|
||||
pavgb m7, m3
|
||||
|
||||
X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m6, m5, m2
|
||||
pavgb m0, m6
|
||||
|
||||
mov lined, 8
|
||||
.loop:
|
||||
mova [dstq ], m0
|
||||
mova [dstq +16], m7
|
||||
mova [dstq+strideq ], m2
|
||||
mova [dstq+strideq +16], m4
|
||||
palignr m3, m7, m0, 1
|
||||
palignr m5, m4, m2, 1
|
||||
pshufb m7, m1
|
||||
pshufb m4, m1
|
||||
|
||||
mova [dstq+strideq*2 ], m3
|
||||
mova [dstq+strideq*2+16], m7
|
||||
mova [dstq+stride3q ], m5
|
||||
mova [dstq+stride3q +16], m4
|
||||
palignr m0, m7, m3, 1
|
||||
palignr m2, m4, m5, 1
|
||||
pshufb m7, m1
|
||||
pshufb m4, m1
|
||||
lea dstq, [dstq+strideq*4]
|
||||
dec lined
|
||||
jnz .loop
|
||||
RESTORE_GOT
|
||||
REP_RET
|
||||
|
@@ -61,4 +61,4 @@ extern prototype_postproc_addnoise(vp9_plane_add_noise_wmt);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif // VP9_COMMON_X86_VP9_POSTPROC_X86_H_
|
||||
|
@@ -705,60 +705,53 @@ sym(vp9_filter_block1d16_v8_avg_ssse3):
|
||||
movsxd rcx, dword ptr arg(4) ;output_height
|
||||
|
||||
.loop:
|
||||
movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4
|
||||
prefetcht0 [rsi + 2 * rax -3]
|
||||
|
||||
movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12
|
||||
punpcklqdq xmm0, xmm3
|
||||
movq xmm0, [rsi - 3] ;load src data
|
||||
movq xmm4, [rsi + 5]
|
||||
movq xmm7, [rsi + 13]
|
||||
punpcklqdq xmm0, xmm4
|
||||
punpcklqdq xmm4, xmm7
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
movdqa xmm2, xmm0
|
||||
movdqa xmm3, xmm0
|
||||
movdqa xmm5, xmm4
|
||||
movdqa xmm6, xmm4
|
||||
movdqa xmm7, xmm4
|
||||
|
||||
pshufb xmm0, [GLOBAL(shuf_t0t1)]
|
||||
pmaddubsw xmm0, k0k1
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
pshufb xmm1, [GLOBAL(shuf_t2t3)]
|
||||
pmaddubsw xmm1, k2k3
|
||||
|
||||
movdqa xmm4, xmm2
|
||||
pshufb xmm2, [GLOBAL(shuf_t4t5)]
|
||||
pmaddubsw xmm2, k4k5
|
||||
pshufb xmm3, [GLOBAL(shuf_t6t7)]
|
||||
pshufb xmm4, [GLOBAL(shuf_t0t1)]
|
||||
pshufb xmm5, [GLOBAL(shuf_t2t3)]
|
||||
pshufb xmm6, [GLOBAL(shuf_t4t5)]
|
||||
pshufb xmm7, [GLOBAL(shuf_t6t7)]
|
||||
|
||||
pshufb xmm4, [GLOBAL(shuf_t6t7)]
|
||||
pmaddubsw xmm4, k6k7
|
||||
pmaddubsw xmm0, k0k1
|
||||
pmaddubsw xmm1, k2k3
|
||||
pmaddubsw xmm2, k4k5
|
||||
pmaddubsw xmm3, k6k7
|
||||
pmaddubsw xmm4, k0k1
|
||||
pmaddubsw xmm5, k2k3
|
||||
pmaddubsw xmm6, k4k5
|
||||
pmaddubsw xmm7, k6k7
|
||||
|
||||
paddsw xmm0, xmm1
|
||||
paddsw xmm0, xmm4
|
||||
paddsw xmm0, xmm3
|
||||
paddsw xmm0, xmm2
|
||||
paddsw xmm4, xmm5
|
||||
paddsw xmm4, xmm7
|
||||
paddsw xmm4, xmm6
|
||||
|
||||
paddsw xmm0, krd
|
||||
paddsw xmm4, krd
|
||||
psraw xmm0, 7
|
||||
psraw xmm4, 7
|
||||
packuswb xmm0, xmm0
|
||||
|
||||
|
||||
movq xmm3, [rsi + 5]
|
||||
movq xmm7, [rsi + 13]
|
||||
punpcklqdq xmm3, xmm7
|
||||
|
||||
movdqa xmm1, xmm3
|
||||
pshufb xmm3, [GLOBAL(shuf_t0t1)]
|
||||
pmaddubsw xmm3, k0k1
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
pshufb xmm1, [GLOBAL(shuf_t2t3)]
|
||||
pmaddubsw xmm1, k2k3
|
||||
|
||||
movdqa xmm4, xmm2
|
||||
pshufb xmm2, [GLOBAL(shuf_t4t5)]
|
||||
pmaddubsw xmm2, k4k5
|
||||
|
||||
pshufb xmm4, [GLOBAL(shuf_t6t7)]
|
||||
pmaddubsw xmm4, k6k7
|
||||
|
||||
paddsw xmm3, xmm1
|
||||
paddsw xmm3, xmm4
|
||||
paddsw xmm3, xmm2
|
||||
paddsw xmm3, krd
|
||||
psraw xmm3, 7
|
||||
packuswb xmm3, xmm3
|
||||
punpcklqdq xmm0, xmm3
|
||||
packuswb xmm4, xmm4
|
||||
punpcklqdq xmm0, xmm4
|
||||
%if %1
|
||||
movdqa xmm1, [rdi]
|
||||
pavgb xmm0, xmm1
|
||||
|
@@ -1,230 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_add_constant_residual_8x8_neon|
|
||||
EXPORT |vp9_add_constant_residual_16x16_neon|
|
||||
EXPORT |vp9_add_constant_residual_32x32_neon|
|
||||
ARM
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
MACRO
|
||||
LD_16x8 $src, $stride
|
||||
vld1.8 {q8}, [$src], $stride
|
||||
vld1.8 {q9}, [$src], $stride
|
||||
vld1.8 {q10}, [$src], $stride
|
||||
vld1.8 {q11}, [$src], $stride
|
||||
vld1.8 {q12}, [$src], $stride
|
||||
vld1.8 {q13}, [$src], $stride
|
||||
vld1.8 {q14}, [$src], $stride
|
||||
vld1.8 {q15}, [$src], $stride
|
||||
MEND
|
||||
|
||||
MACRO
|
||||
ADD_DIFF_16x8 $diff
|
||||
vqadd.u8 q8, q8, $diff
|
||||
vqadd.u8 q9, q9, $diff
|
||||
vqadd.u8 q10, q10, $diff
|
||||
vqadd.u8 q11, q11, $diff
|
||||
vqadd.u8 q12, q12, $diff
|
||||
vqadd.u8 q13, q13, $diff
|
||||
vqadd.u8 q14, q14, $diff
|
||||
vqadd.u8 q15, q15, $diff
|
||||
MEND
|
||||
|
||||
MACRO
|
||||
SUB_DIFF_16x8 $diff
|
||||
vqsub.u8 q8, q8, $diff
|
||||
vqsub.u8 q9, q9, $diff
|
||||
vqsub.u8 q10, q10, $diff
|
||||
vqsub.u8 q11, q11, $diff
|
||||
vqsub.u8 q12, q12, $diff
|
||||
vqsub.u8 q13, q13, $diff
|
||||
vqsub.u8 q14, q14, $diff
|
||||
vqsub.u8 q15, q15, $diff
|
||||
MEND
|
||||
|
||||
MACRO
|
||||
ST_16x8 $dst, $stride
|
||||
vst1.8 {q8}, [$dst], $stride
|
||||
vst1.8 {q9}, [$dst], $stride
|
||||
vst1.8 {q10}, [$dst], $stride
|
||||
vst1.8 {q11}, [$dst], $stride
|
||||
vst1.8 {q12}, [$dst], $stride
|
||||
vst1.8 {q13}, [$dst], $stride
|
||||
vst1.8 {q14}, [$dst], $stride
|
||||
vst1.8 {q15}, [$dst], $stride
|
||||
MEND
|
||||
|
||||
; void add_constant_residual(const int16_t diff, uint8_t *dest, int stride,
|
||||
; int width, int height) {
|
||||
; int r, c;
|
||||
;
|
||||
; for (r = 0; r < height; r++) {
|
||||
; for (c = 0; c < width; c++)
|
||||
; dest[c] = clip_pixel(diff + dest[c]);
|
||||
;
|
||||
; dest += stride;
|
||||
; }
|
||||
;}
|
||||
;void vp9_add_constant_residual_8x8_c(const int16_t diff, uint8_t *dest,
|
||||
; int stride) {
|
||||
; add_constant_residual(diff, dest, stride, 8, 8);
|
||||
;}
|
||||
; r0 : const int16_t diff
|
||||
; r1 : const uint8_t *dest
|
||||
; r2 : int stride
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|vp9_add_constant_residual_8x8_neon| PROC
|
||||
mov r3, r1 ; r3: save dest to r3
|
||||
vld1.8 {d0}, [r1], r2
|
||||
vld1.8 {d1}, [r1], r2
|
||||
vld1.8 {d2}, [r1], r2
|
||||
vld1.8 {d3}, [r1], r2
|
||||
vld1.8 {d4}, [r1], r2
|
||||
vld1.8 {d5}, [r1], r2
|
||||
vld1.8 {d6}, [r1], r2
|
||||
vld1.8 {d7}, [r1], r2
|
||||
cmp r0, #0
|
||||
bge DIFF_POSITIVE_8x8
|
||||
|
||||
DIFF_NEGATIVE_8x8 ; diff < 0
|
||||
neg r0, r0
|
||||
usat r0, #8, r0
|
||||
vdup.u8 q8, r0
|
||||
|
||||
vqsub.u8 q0, q0, q8
|
||||
vqsub.u8 q1, q1, q8
|
||||
vqsub.u8 q2, q2, q8
|
||||
vqsub.u8 q3, q3, q8
|
||||
b DIFF_SAVE_8x8
|
||||
|
||||
DIFF_POSITIVE_8x8 ; diff >= 0
|
||||
usat r0, #8, r0
|
||||
vdup.u8 q8, r0
|
||||
|
||||
vqadd.u8 q0, q0, q8
|
||||
vqadd.u8 q1, q1, q8
|
||||
vqadd.u8 q2, q2, q8
|
||||
vqadd.u8 q3, q3, q8
|
||||
|
||||
DIFF_SAVE_8x8
|
||||
vst1.8 {d0}, [r3], r2
|
||||
vst1.8 {d1}, [r3], r2
|
||||
vst1.8 {d2}, [r3], r2
|
||||
vst1.8 {d3}, [r3], r2
|
||||
vst1.8 {d4}, [r3], r2
|
||||
vst1.8 {d5}, [r3], r2
|
||||
vst1.8 {d6}, [r3], r2
|
||||
vst1.8 {d7}, [r3], r2
|
||||
|
||||
bx lr
|
||||
ENDP
|
||||
|
||||
;void vp9_add_constant_residual_16x16_c(const int16_t diff, uint8_t *dest,
|
||||
; int stride) {
|
||||
; add_constant_residual(diff, dest, stride, 16, 16);
|
||||
;}
|
||||
; r0 : const int16_t diff
|
||||
; r1 : const uint8_t *dest
|
||||
; r2 : int stride
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|vp9_add_constant_residual_16x16_neon| PROC
|
||||
mov r3, r1
|
||||
LD_16x8 r1, r2
|
||||
cmp r0, #0
|
||||
bge DIFF_POSITIVE_16x16
|
||||
|
||||
|DIFF_NEGATIVE_16x16|
|
||||
neg r0, r0
|
||||
usat r0, #8, r0
|
||||
vdup.u8 q0, r0
|
||||
|
||||
SUB_DIFF_16x8 q0
|
||||
ST_16x8 r3, r2
|
||||
LD_16x8 r1, r2
|
||||
SUB_DIFF_16x8 q0
|
||||
b DIFF_SAVE_16x16
|
||||
|
||||
|DIFF_POSITIVE_16x16|
|
||||
usat r0, #8, r0
|
||||
vdup.u8 q0, r0
|
||||
|
||||
ADD_DIFF_16x8 q0
|
||||
ST_16x8 r3, r2
|
||||
LD_16x8 r1, r2
|
||||
ADD_DIFF_16x8 q0
|
||||
|
||||
|DIFF_SAVE_16x16|
|
||||
ST_16x8 r3, r2
|
||||
bx lr
|
||||
ENDP
|
||||
|
||||
;void vp9_add_constant_residual_32x32_c(const int16_t diff, uint8_t *dest,
|
||||
; int stride) {
|
||||
; add_constant_residual(diff, dest, stride, 32, 32);
|
||||
;}
|
||||
; r0 : const int16_t diff
|
||||
; r1 : const uint8_t *dest
|
||||
; r2 : int stride
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|vp9_add_constant_residual_32x32_neon| PROC
|
||||
push {r4,lr}
|
||||
pld [r1]
|
||||
mov r3, r1
|
||||
add r4, r1, #16 ; r4 dest + 16 for second loop
|
||||
cmp r0, #0
|
||||
bge DIFF_POSITIVE_32x32
|
||||
|
||||
|DIFF_NEGATIVE_32x32|
|
||||
neg r0, r0
|
||||
usat r0, #8, r0
|
||||
vdup.u8 q0, r0
|
||||
mov r0, #4
|
||||
|
||||
|DIFF_NEGATIVE_32x32_LOOP|
|
||||
sub r0, #1
|
||||
LD_16x8 r1, r2
|
||||
SUB_DIFF_16x8 q0
|
||||
ST_16x8 r3, r2
|
||||
|
||||
LD_16x8 r1, r2
|
||||
SUB_DIFF_16x8 q0
|
||||
ST_16x8 r3, r2
|
||||
cmp r0, #2
|
||||
moveq r1, r4
|
||||
moveq r3, r4
|
||||
cmp r0, #0
|
||||
bne DIFF_NEGATIVE_32x32_LOOP
|
||||
pop {r4,pc}
|
||||
|
||||
|DIFF_POSITIVE_32x32|
|
||||
usat r0, #8, r0
|
||||
vdup.u8 q0, r0
|
||||
mov r0, #4
|
||||
|
||||
|DIFF_POSITIVE_32x32_LOOP|
|
||||
sub r0, #1
|
||||
LD_16x8 r1, r2
|
||||
ADD_DIFF_16x8 q0
|
||||
ST_16x8 r3, r2
|
||||
|
||||
LD_16x8 r1, r2
|
||||
ADD_DIFF_16x8 q0
|
||||
ST_16x8 r3, r2
|
||||
cmp r0, #2
|
||||
moveq r1, r4
|
||||
moveq r3, r4
|
||||
cmp r0, #0
|
||||
bne DIFF_POSITIVE_32x32_LOOP
|
||||
pop {r4,pc}
|
||||
ENDP
|
||||
|
||||
END
|
@@ -30,10 +30,26 @@ static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) {
|
||||
return (MB_PREDICTION_MODE)treed_read(r, vp9_intra_mode_tree, p);
|
||||
}
|
||||
|
||||
static MB_PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, vp9_reader *r,
|
||||
int size_group) {
|
||||
const MB_PREDICTION_MODE y_mode = read_intra_mode(r,
|
||||
cm->fc.y_mode_prob[size_group]);
|
||||
++cm->counts.y_mode[size_group][y_mode];
|
||||
return y_mode;
|
||||
}
|
||||
|
||||
static MB_PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, vp9_reader *r,
|
||||
MB_PREDICTION_MODE y_mode) {
|
||||
const MB_PREDICTION_MODE uv_mode = read_intra_mode(r,
|
||||
cm->fc.uv_mode_prob[y_mode]);
|
||||
++cm->counts.uv_mode[y_mode][uv_mode];
|
||||
return uv_mode;
|
||||
}
|
||||
|
||||
static MB_PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, vp9_reader *r,
|
||||
uint8_t context) {
|
||||
MB_PREDICTION_MODE mode = treed_read(r, vp9_inter_mode_tree,
|
||||
cm->fc.inter_mode_probs[context]);
|
||||
const MB_PREDICTION_MODE mode = treed_read(r, vp9_inter_mode_tree,
|
||||
cm->fc.inter_mode_probs[context]);
|
||||
++cm->counts.inter_mode[context][inter_mode_offset(mode)];
|
||||
return mode;
|
||||
}
|
||||
@@ -75,28 +91,9 @@ static TX_SIZE read_tx_size(VP9D_COMP *pbi, TX_MODE tx_mode,
|
||||
return TX_4X4;
|
||||
}
|
||||
|
||||
static void set_segment_id(VP9_COMMON *cm, BLOCK_SIZE bsize,
|
||||
int mi_row, int mi_col, int segment_id) {
|
||||
const int mi_offset = mi_row * cm->mi_cols + mi_col;
|
||||
const int bw = 1 << mi_width_log2(bsize);
|
||||
const int bh = 1 << mi_height_log2(bsize);
|
||||
const int xmis = MIN(cm->mi_cols - mi_col, bw);
|
||||
const int ymis = MIN(cm->mi_rows - mi_row, bh);
|
||||
int x, y;
|
||||
|
||||
assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
|
||||
|
||||
for (y = 0; y < ymis; y++)
|
||||
for (x = 0; x < xmis; x++)
|
||||
cm->last_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id;
|
||||
}
|
||||
|
||||
static int read_intra_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col,
|
||||
vp9_reader *r) {
|
||||
MACROBLOCKD *const xd = &pbi->mb;
|
||||
struct segmentation *const seg = &pbi->common.seg;
|
||||
const BLOCK_SIZE bsize = xd->this_mi->mbmi.sb_type;
|
||||
int segment_id;
|
||||
|
||||
if (!seg->enabled)
|
||||
return 0; // Default for disabled segmentation
|
||||
@@ -104,9 +101,7 @@ static int read_intra_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col,
|
||||
if (!seg->update_map)
|
||||
return 0;
|
||||
|
||||
segment_id = read_segment_id(r, seg);
|
||||
set_segment_id(&pbi->common, bsize, mi_row, mi_col, segment_id);
|
||||
return segment_id;
|
||||
return read_segment_id(r, seg);
|
||||
}
|
||||
|
||||
static int read_inter_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col,
|
||||
@@ -115,7 +110,7 @@ static int read_inter_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col,
|
||||
MACROBLOCKD *const xd = &pbi->mb;
|
||||
struct segmentation *const seg = &cm->seg;
|
||||
const BLOCK_SIZE bsize = xd->this_mi->mbmi.sb_type;
|
||||
int pred_segment_id, segment_id;
|
||||
int pred_segment_id;;
|
||||
|
||||
if (!seg->enabled)
|
||||
return 0; // Default for disabled segmentation
|
||||
@@ -129,13 +124,10 @@ static int read_inter_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col,
|
||||
const vp9_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
|
||||
const int pred_flag = vp9_read(r, pred_prob);
|
||||
vp9_set_pred_flag_seg_id(xd, pred_flag);
|
||||
segment_id = pred_flag ? pred_segment_id
|
||||
: read_segment_id(r, seg);
|
||||
return pred_flag ? pred_segment_id : read_segment_id(r, seg);
|
||||
} else {
|
||||
segment_id = read_segment_id(r, seg);
|
||||
return read_segment_id(r, seg);
|
||||
}
|
||||
set_segment_id(cm, bsize, mi_row, mi_col, segment_id);
|
||||
return segment_id;
|
||||
}
|
||||
|
||||
static uint8_t read_skip_coeff(VP9D_COMP *pbi, int segment_id, vp9_reader *r) {
|
||||
@@ -200,7 +192,6 @@ static void read_intra_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *m,
|
||||
|
||||
static int read_mv_component(vp9_reader *r,
|
||||
const nmv_component *mvcomp, int usehp) {
|
||||
|
||||
int mag, d, fr, hp;
|
||||
const int sign = vp9_read(r, mvcomp->sign);
|
||||
const int mv_class = treed_read(r, vp9_mv_class_tree, mvcomp->classes);
|
||||
@@ -388,9 +379,7 @@ static void read_intra_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
|
||||
mbmi->ref_frame[1] = NONE;
|
||||
|
||||
if (bsize >= BLOCK_8X8) {
|
||||
const int size_group = size_group_lookup[bsize];
|
||||
mbmi->mode = read_intra_mode(r, cm->fc.y_mode_prob[size_group]);
|
||||
cm->counts.y_mode[size_group][mbmi->mode]++;
|
||||
mbmi->mode = read_intra_mode_y(cm, r, size_group_lookup[bsize]);
|
||||
} else {
|
||||
// Only 4x4, 4x8, 8x4 blocks
|
||||
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2
|
||||
@@ -400,10 +389,8 @@ static void read_intra_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
|
||||
for (idy = 0; idy < 2; idy += num_4x4_h) {
|
||||
for (idx = 0; idx < 2; idx += num_4x4_w) {
|
||||
const int ib = idy * 2 + idx;
|
||||
const int b_mode = read_intra_mode(r, cm->fc.y_mode_prob[0]);
|
||||
const int b_mode = read_intra_mode_y(cm, r, 0);
|
||||
mi->bmi[ib].as_mode = b_mode;
|
||||
cm->counts.y_mode[0][b_mode]++;
|
||||
|
||||
if (num_4x4_h == 2)
|
||||
mi->bmi[ib + 2].as_mode = b_mode;
|
||||
if (num_4x4_w == 2)
|
||||
@@ -413,8 +400,7 @@ static void read_intra_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
|
||||
mbmi->mode = mi->bmi[3].as_mode;
|
||||
}
|
||||
|
||||
mbmi->uv_mode = read_intra_mode(r, cm->fc.uv_mode_prob[mbmi->mode]);
|
||||
cm->counts.uv_mode[mbmi->mode][mbmi->uv_mode]++;
|
||||
mbmi->uv_mode = read_intra_mode_uv(cm, r, mbmi->mode);
|
||||
}
|
||||
|
||||
static int read_is_inter_block(VP9D_COMP *pbi, int segment_id, vp9_reader *r) {
|
||||
@@ -443,8 +429,7 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
|
||||
const BLOCK_SIZE bsize = mbmi->sb_type;
|
||||
const int allow_hp = xd->allow_high_precision_mv;
|
||||
|
||||
int_mv nearest, nearby, best_mv;
|
||||
int_mv nearest_second, nearby_second, best_mv_second;
|
||||
int_mv nearest[2], nearmv[2], best[2];
|
||||
uint8_t inter_mode_ctx;
|
||||
MV_REFERENCE_FRAME ref0;
|
||||
int is_compound;
|
||||
@@ -469,8 +454,8 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
|
||||
|
||||
// nearest, nearby
|
||||
if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
|
||||
vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref0], &nearest, &nearby);
|
||||
best_mv.as_int = mbmi->ref_mvs[ref0][0].as_int;
|
||||
vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref0], &nearest[0], &nearmv[0]);
|
||||
best[0].as_int = nearest[0].as_int;
|
||||
}
|
||||
|
||||
if (is_compound) {
|
||||
@@ -479,9 +464,8 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
|
||||
ref1, mbmi->ref_mvs[ref1], mi_row, mi_col);
|
||||
|
||||
if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
|
||||
vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1],
|
||||
&nearest_second, &nearby_second);
|
||||
best_mv_second.as_int = mbmi->ref_mvs[ref1][0].as_int;
|
||||
vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1], &nearest[1], &nearmv[1]);
|
||||
best[1].as_int = nearest[1].as_int;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -493,75 +477,82 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
|
||||
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2
|
||||
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2
|
||||
int idx, idy;
|
||||
int b_mode;
|
||||
for (idy = 0; idy < 2; idy += num_4x4_h) {
|
||||
for (idx = 0; idx < 2; idx += num_4x4_w) {
|
||||
int_mv blockmv, secondmv;
|
||||
int_mv block[2];
|
||||
const int j = idy * 2 + idx;
|
||||
const int b_mode = read_inter_mode(cm, r, inter_mode_ctx);
|
||||
b_mode = read_inter_mode(cm, r, inter_mode_ctx);
|
||||
|
||||
if (b_mode == NEARESTMV || b_mode == NEARMV) {
|
||||
vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0,
|
||||
vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest[0],
|
||||
&nearmv[0], j, 0,
|
||||
mi_row, mi_col);
|
||||
|
||||
if (is_compound)
|
||||
vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest_second,
|
||||
&nearby_second, j, 1,
|
||||
mi_row, mi_col);
|
||||
vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest[1],
|
||||
&nearmv[1], j, 1,
|
||||
mi_row, mi_col);
|
||||
}
|
||||
|
||||
switch (b_mode) {
|
||||
case NEWMV:
|
||||
read_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc,
|
||||
&cm->counts.mv, allow_hp);
|
||||
|
||||
read_mv(r, &block[0].as_mv, &best[0].as_mv, nmvc, &cm->counts.mv,
|
||||
allow_hp);
|
||||
if (is_compound)
|
||||
read_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
|
||||
&cm->counts.mv, allow_hp);
|
||||
read_mv(r, &block[1].as_mv, &best[1].as_mv, nmvc, &cm->counts.mv,
|
||||
allow_hp);
|
||||
break;
|
||||
case NEARESTMV:
|
||||
blockmv.as_int = nearest.as_int;
|
||||
block[0].as_int = nearest[0].as_int;
|
||||
if (is_compound)
|
||||
secondmv.as_int = nearest_second.as_int;
|
||||
block[1].as_int = nearest[1].as_int;
|
||||
break;
|
||||
case NEARMV:
|
||||
blockmv.as_int = nearby.as_int;
|
||||
block[0].as_int = nearmv[0].as_int;
|
||||
if (is_compound)
|
||||
secondmv.as_int = nearby_second.as_int;
|
||||
block[1].as_int = nearmv[1].as_int;
|
||||
break;
|
||||
case ZEROMV:
|
||||
blockmv.as_int = 0;
|
||||
block[0].as_int = 0;
|
||||
if (is_compound)
|
||||
secondmv.as_int = 0;
|
||||
block[1].as_int = 0;
|
||||
break;
|
||||
default:
|
||||
assert(!"Invalid inter mode value");
|
||||
}
|
||||
mi->bmi[j].as_mv[0].as_int = blockmv.as_int;
|
||||
if (is_compound)
|
||||
mi->bmi[j].as_mv[1].as_int = secondmv.as_int;
|
||||
mi->bmi[j].as_mv[0].as_int = block[0].as_int;
|
||||
assert(block[0].as_mv.row < MV_UPP && block[0].as_mv.row > MV_LOW);
|
||||
assert(block[0].as_mv.col < MV_UPP && block[0].as_mv.col > MV_LOW);
|
||||
|
||||
if (is_compound) {
|
||||
mi->bmi[j].as_mv[1].as_int = block[1].as_int;
|
||||
assert(block[1].as_mv.row < MV_UPP && block[1].as_mv.row > MV_LOW);
|
||||
assert(block[1].as_mv.col < MV_UPP && block[1].as_mv.col > MV_LOW);
|
||||
}
|
||||
|
||||
if (num_4x4_h == 2)
|
||||
mi->bmi[j + 2] = mi->bmi[j];
|
||||
if (num_4x4_w == 2)
|
||||
mi->bmi[j + 1] = mi->bmi[j];
|
||||
mi->mbmi.mode = b_mode;
|
||||
}
|
||||
}
|
||||
|
||||
mi->mbmi.mode = b_mode;
|
||||
mv0->as_int = mi->bmi[3].as_mv[0].as_int;
|
||||
mv1->as_int = mi->bmi[3].as_mv[1].as_int;
|
||||
} else {
|
||||
switch (mbmi->mode) {
|
||||
case NEARMV:
|
||||
mv0->as_int = nearby.as_int;
|
||||
mv0->as_int = nearmv[0].as_int;
|
||||
if (is_compound)
|
||||
mv1->as_int = nearby_second.as_int;
|
||||
mv1->as_int = nearmv[1].as_int;
|
||||
break;
|
||||
|
||||
case NEARESTMV:
|
||||
mv0->as_int = nearest.as_int;
|
||||
mv0->as_int = nearest[0].as_int;
|
||||
if (is_compound)
|
||||
mv1->as_int = nearest_second.as_int;
|
||||
mv1->as_int = nearest[1].as_int;
|
||||
break;
|
||||
|
||||
case ZEROMV:
|
||||
@@ -571,14 +562,20 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
|
||||
break;
|
||||
|
||||
case NEWMV:
|
||||
read_mv(r, &mv0->as_mv, &best_mv.as_mv, nmvc, &cm->counts.mv, allow_hp);
|
||||
read_mv(r, &mv0->as_mv, &best[0].as_mv, nmvc, &cm->counts.mv, allow_hp);
|
||||
if (is_compound)
|
||||
read_mv(r, &mv1->as_mv, &best_mv_second.as_mv, nmvc, &cm->counts.mv,
|
||||
read_mv(r, &mv1->as_mv, &best[1].as_mv, nmvc, &cm->counts.mv,
|
||||
allow_hp);
|
||||
break;
|
||||
default:
|
||||
assert(!"Invalid inter mode value");
|
||||
}
|
||||
assert(mv0->as_mv.row < MV_UPP && mv0->as_mv.row > MV_LOW);
|
||||
assert(mv0->as_mv.col < MV_UPP && mv0->as_mv.col > MV_LOW);
|
||||
if (is_compound) {
|
||||
assert(mv1->as_mv.row < MV_UPP && mv1->as_mv.row > MV_LOW);
|
||||
assert(mv1->as_mv.col < MV_UPP && mv1->as_mv.col > MV_LOW);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -436,7 +436,6 @@ static void setup_segmentation(struct segmentation *seg,
|
||||
|
||||
static void setup_loopfilter(struct loopfilter *lf,
|
||||
struct vp9_read_bit_buffer *rb) {
|
||||
|
||||
lf->filter_level = vp9_rb_read_literal(rb, 6);
|
||||
lf->sharpness_level = vp9_rb_read_literal(rb, 3);
|
||||
|
||||
@@ -492,8 +491,7 @@ static INTERPOLATIONFILTERTYPE read_interp_filter_type(
|
||||
struct vp9_read_bit_buffer *rb) {
|
||||
const INTERPOLATIONFILTERTYPE literal_to_type[] = { EIGHTTAP_SMOOTH,
|
||||
EIGHTTAP,
|
||||
EIGHTTAP_SHARP,
|
||||
BILINEAR };
|
||||
EIGHTTAP_SHARP };
|
||||
return vp9_rb_read_bit(rb) ? SWITCHABLE
|
||||
: literal_to_type[vp9_rb_read_literal(rb, 2)];
|
||||
}
|
||||
@@ -936,6 +934,15 @@ void vp9_init_dequantizer(VP9_COMMON *cm) {
|
||||
}
|
||||
}
|
||||
|
||||
static void update_segmentation_map(VP9_COMMON *cm) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < cm->mi_rows; ++i)
|
||||
for (j = 0; j < cm->mi_cols; ++j)
|
||||
cm->last_frame_seg_map[i * cm->mi_cols + j] =
|
||||
cm->mi_grid_visible[i * cm->mode_info_stride + j]->mbmi.segment_id;
|
||||
}
|
||||
|
||||
int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
|
||||
int i;
|
||||
VP9_COMMON *const cm = &pbi->common;
|
||||
@@ -1015,5 +1022,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
|
||||
if (cm->refresh_frame_context)
|
||||
cm->frame_contexts[cm->frame_context_idx] = cm->fc;
|
||||
|
||||
update_segmentation_map(cm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@@ -105,9 +105,8 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
|
||||
const int16_t *scan, *nb;
|
||||
const uint8_t *band_translate;
|
||||
uint8_t token_cache[1024];
|
||||
int pt = get_entropy_context(xd, tx_size, type, block_idx, A, L,
|
||||
&scan, &band_translate);
|
||||
nb = vp9_get_coef_neighbors_handle(scan);
|
||||
int pt = get_entropy_context(tx_size, A, L);
|
||||
get_scan_and_band(xd, tx_size, type, block_idx, &scan, &nb, &band_translate);
|
||||
|
||||
while (1) {
|
||||
int val;
|
||||
@@ -122,7 +121,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
|
||||
if (!vp9_read(r, prob[EOB_CONTEXT_NODE]))
|
||||
break;
|
||||
|
||||
SKIP_START:
|
||||
SKIP_START:
|
||||
if (c >= seg_eob)
|
||||
break;
|
||||
if (c)
|
||||
|
@@ -67,7 +67,6 @@ static int inv_remap_prob(int v, int m) {
|
||||
206, 207, 208, 209, 210, 211, 212, 213, 215, 216, 217, 218, 219, 220, 221,
|
||||
222, 223, 224, 225, 226, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237,
|
||||
238, 239, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252,
|
||||
|
||||
};
|
||||
// v = merge_index(v, MAX_PROBS - 1, MODULUS_PARAM);
|
||||
v = inv_map_table[v];
|
||||
|
@@ -8,37 +8,10 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vp9_rtcd.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_blockd.h"
|
||||
#include "vp9/decoder/vp9_idct_blk.h"
|
||||
|
||||
static void add_constant_residual(const int16_t diff, uint8_t *dest, int stride,
|
||||
int width, int height) {
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < height; r++) {
|
||||
for (c = 0; c < width; c++)
|
||||
dest[c] = clip_pixel(diff + dest[c]);
|
||||
|
||||
dest += stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_add_constant_residual_8x8_c(const int16_t diff, uint8_t *dest,
|
||||
int stride) {
|
||||
add_constant_residual(diff, dest, stride, 8, 8);
|
||||
}
|
||||
|
||||
void vp9_add_constant_residual_16x16_c(const int16_t diff, uint8_t *dest,
|
||||
int stride) {
|
||||
add_constant_residual(diff, dest, stride, 16, 16);
|
||||
}
|
||||
|
||||
void vp9_add_constant_residual_32x32_c(const int16_t diff, uint8_t *dest,
|
||||
int stride) {
|
||||
add_constant_residual(diff, dest, stride, 32, 32);
|
||||
}
|
||||
|
||||
void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride,
|
||||
int eob) {
|
||||
if (tx_type == DCT_DCT) {
|
||||
@@ -96,7 +69,7 @@ void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) {
|
||||
vp9_short_idct8x8_1_add(input, dest, stride);
|
||||
input[0] = 0;
|
||||
} else if (eob <= 10) {
|
||||
vp9_short_idct10_8x8_add(input, dest, stride);
|
||||
vp9_short_idct8x8_10_add(input, dest, stride);
|
||||
vpx_memset(input, 0, 128);
|
||||
} else {
|
||||
vp9_short_idct8x8_add(input, dest, stride);
|
||||
@@ -126,7 +99,7 @@ void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) {
|
||||
vp9_short_idct16x16_1_add(input, dest, stride);
|
||||
input[0] = 0;
|
||||
} else if (eob <= 10) {
|
||||
vp9_short_idct10_16x16_add(input, dest, stride);
|
||||
vp9_short_idct16x16_10_add(input, dest, stride);
|
||||
vpx_memset(input, 0, 512);
|
||||
} else {
|
||||
vp9_short_idct16x16_add(input, dest, stride);
|
||||
@@ -136,12 +109,9 @@ void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) {
|
||||
}
|
||||
|
||||
void vp9_idct_add_32x32_c(int16_t *input, uint8_t *dest, int stride, int eob) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 1024);
|
||||
|
||||
if (eob) {
|
||||
if (eob == 1) {
|
||||
vp9_short_idct1_32x32(input, output);
|
||||
vp9_add_constant_residual_32x32(output[0], dest, stride);
|
||||
vp9_short_idct32x32_1_add(input, dest, stride);
|
||||
input[0] = 0;
|
||||
} else {
|
||||
vp9_short_idct32x32_add(input, dest, stride);
|
||||
|
@@ -14,17 +14,16 @@
|
||||
|
||||
#include "vp9/common/vp9_blockd.h"
|
||||
|
||||
void vp9_idct_add_lossless_c(int16_t *input, uint8_t *dest,
|
||||
int stride, int eob);
|
||||
|
||||
void vp9_idct_add_lossless_c(int16_t *input, unsigned char *dest, int stride,
|
||||
int eob);
|
||||
|
||||
void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest,
|
||||
void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
|
||||
int stride, int eob);
|
||||
|
||||
void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest,
|
||||
void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
|
||||
int stride, int eob);
|
||||
|
||||
void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest,
|
||||
void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
|
||||
int stride, int eob);
|
||||
|
||||
#endif // VP9_DECODER_VP9_IDCT_BLK_H_
|
||||
|
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP9_COMMON_VP9_ONYXD_H_
|
||||
#define VP9_COMMON_VP9_ONYXD_H_
|
||||
#ifndef VP9_DECODER_VP9_ONYXD_H_
|
||||
#define VP9_DECODER_VP9_ONYXD_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -40,7 +40,7 @@ typedef enum {
|
||||
void vp9_initialize_dec();
|
||||
|
||||
int vp9_receive_compressed_data(VP9D_PTR comp,
|
||||
uint64_t size, const uint8_t **dest,
|
||||
size_t size, const uint8_t **dest,
|
||||
int64_t time_stamp);
|
||||
|
||||
int vp9_get_raw_frame(VP9D_PTR comp, YV12_BUFFER_CONFIG *sd,
|
||||
@@ -66,4 +66,4 @@ void vp9_remove_decompressor(VP9D_PTR comp);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // VP9_COMMON_VP9_ONYXD_H_
|
||||
#endif // VP9_DECODER_VP9_ONYXD_H_
|
||||
|
@@ -65,13 +65,12 @@ static void recon_write_yuv_frame(const char *name,
|
||||
#endif
|
||||
#if WRITE_RECON_BUFFER == 2
|
||||
void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
|
||||
|
||||
// write the frame
|
||||
FILE *yframe;
|
||||
int i;
|
||||
char filename[255];
|
||||
|
||||
sprintf(filename, "dx\\y%04d.raw", this_frame);
|
||||
snprintf(filename, sizeof(filename)-1, "dx\\y%04d.raw", this_frame);
|
||||
yframe = fopen(filename, "wb");
|
||||
|
||||
for (i = 0; i < frame->y_height; i++)
|
||||
@@ -79,7 +78,7 @@ void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
|
||||
frame->y_width, 1, yframe);
|
||||
|
||||
fclose(yframe);
|
||||
sprintf(filename, "dx\\u%04d.raw", this_frame);
|
||||
snprintf(filename, sizeof(filename)-1, "dx\\u%04d.raw", this_frame);
|
||||
yframe = fopen(filename, "wb");
|
||||
|
||||
for (i = 0; i < frame->uv_height; i++)
|
||||
@@ -87,7 +86,7 @@ void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
|
||||
frame->uv_width, 1, yframe);
|
||||
|
||||
fclose(yframe);
|
||||
sprintf(filename, "dx\\v%04d.raw", this_frame);
|
||||
snprintf(filename, sizeof(filename)-1, "dx\\v%04d.raw", this_frame);
|
||||
yframe = fopen(filename, "wb");
|
||||
|
||||
for (i = 0; i < frame->uv_height; i++)
|
||||
@@ -214,13 +213,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag,
|
||||
* vpxenc --test-decode functionality working, and will be replaced in a
|
||||
* later commit that adds VP9-specific controls for this functionality.
|
||||
*/
|
||||
if (ref_frame_flag == VP9_LAST_FLAG)
|
||||
if (ref_frame_flag == VP9_LAST_FLAG) {
|
||||
ref_fb_ptr = &pbi->common.active_ref_idx[0];
|
||||
else if (ref_frame_flag == VP9_GOLD_FLAG)
|
||||
} else if (ref_frame_flag == VP9_GOLD_FLAG) {
|
||||
ref_fb_ptr = &pbi->common.active_ref_idx[1];
|
||||
else if (ref_frame_flag == VP9_ALT_FLAG)
|
||||
} else if (ref_frame_flag == VP9_ALT_FLAG) {
|
||||
ref_fb_ptr = &pbi->common.active_ref_idx[2];
|
||||
else {
|
||||
} else {
|
||||
vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
|
||||
"Invalid reference frame");
|
||||
return pbi->common.error.error_code;
|
||||
@@ -277,7 +276,7 @@ static void swap_frame_buffers(VP9D_COMP *pbi) {
|
||||
}
|
||||
|
||||
int vp9_receive_compressed_data(VP9D_PTR ptr,
|
||||
uint64_t size, const uint8_t **psource,
|
||||
size_t size, const uint8_t **psource,
|
||||
int64_t time_stamp) {
|
||||
VP9D_COMP *pbi = (VP9D_COMP *) ptr;
|
||||
VP9_COMMON *cm = &pbi->common;
|
||||
|
@@ -25,7 +25,7 @@ typedef struct VP9Decompressor {
|
||||
VP9D_CONFIG oxcf;
|
||||
|
||||
const uint8_t *source;
|
||||
uint32_t source_sz;
|
||||
size_t source_sz;
|
||||
|
||||
int64_t last_time_stamp;
|
||||
int ready_for_new_data;
|
||||
@@ -41,4 +41,4 @@ typedef struct VP9Decompressor {
|
||||
VP9Worker lf_worker;
|
||||
} VP9D_COMP;
|
||||
|
||||
#endif // VP9_DECODER_VP9_TREEREADER_H_
|
||||
#endif // VP9_DECODER_VP9_ONYXD_INT_H_
|
||||
|
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP9_READ_BIT_BUFFER_
|
||||
#define VP9_READ_BIT_BUFFER_
|
||||
#ifndef VP9_DECODER_VP9_READ_BIT_BUFFER_H_
|
||||
#define VP9_DECODER_VP9_READ_BIT_BUFFER_H_
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
@@ -57,4 +57,4 @@ static int vp9_rb_read_signed_literal(struct vp9_read_bit_buffer *rb,
|
||||
return vp9_rb_read_bit(rb) ? -value : value;
|
||||
}
|
||||
|
||||
#endif // VP9_READ_BIT_BUFFER_
|
||||
#endif // VP9_DECODER_VP9_READ_BIT_BUFFER_H_
|
||||
|
@@ -17,7 +17,7 @@
|
||||
#ifndef VP9_DECODER_VP9_THREAD_H_
|
||||
#define VP9_DECODER_VP9_THREAD_H_
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
@@ -27,7 +27,7 @@ extern "C" {
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
#include <windows.h>
|
||||
#include <windows.h> // NOLINT
|
||||
typedef HANDLE pthread_t;
|
||||
typedef CRITICAL_SECTION pthread_mutex_t;
|
||||
typedef struct {
|
||||
@@ -38,7 +38,7 @@ typedef struct {
|
||||
|
||||
#else
|
||||
|
||||
#include <pthread.h>
|
||||
#include <pthread.h> // NOLINT
|
||||
|
||||
#endif /* _WIN32 */
|
||||
#endif /* CONFIG_MULTITHREAD */
|
||||
@@ -90,4 +90,4 @@ void vp9_worker_end(VP9Worker* const worker);
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif /* VP9_DECODER_VP9_THREAD_H_ */
|
||||
#endif // VP9_DECODER_VP9_THREAD_H_
|
||||
|
@@ -23,7 +23,8 @@ static int treed_read(vp9_reader *const r, /* !!! must return a 0 or 1 !!! */
|
||||
const vp9_prob *const p) {
|
||||
register vp9_tree_index i = 0;
|
||||
|
||||
while ((i = t[ i + vp9_read(r, p[i >> 1])]) > 0);
|
||||
while ((i = t[ i + vp9_read(r, p[i >> 1])]) > 0)
|
||||
continue;
|
||||
|
||||
return -i;
|
||||
}
|
||||
|
@@ -1,220 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <emmintrin.h> // SSE2
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/vp9_idct.h"
|
||||
|
||||
void vp9_add_constant_residual_8x8_sse2(const int16_t diff, uint8_t *dest,
|
||||
int stride) {
|
||||
uint8_t abs_diff;
|
||||
__m128i d;
|
||||
|
||||
// Prediction data.
|
||||
__m128i p0 = _mm_loadl_epi64((const __m128i *)(dest + 0 * stride));
|
||||
__m128i p1 = _mm_loadl_epi64((const __m128i *)(dest + 1 * stride));
|
||||
__m128i p2 = _mm_loadl_epi64((const __m128i *)(dest + 2 * stride));
|
||||
__m128i p3 = _mm_loadl_epi64((const __m128i *)(dest + 3 * stride));
|
||||
__m128i p4 = _mm_loadl_epi64((const __m128i *)(dest + 4 * stride));
|
||||
__m128i p5 = _mm_loadl_epi64((const __m128i *)(dest + 5 * stride));
|
||||
__m128i p6 = _mm_loadl_epi64((const __m128i *)(dest + 6 * stride));
|
||||
__m128i p7 = _mm_loadl_epi64((const __m128i *)(dest + 7 * stride));
|
||||
|
||||
p0 = _mm_unpacklo_epi64(p0, p1);
|
||||
p2 = _mm_unpacklo_epi64(p2, p3);
|
||||
p4 = _mm_unpacklo_epi64(p4, p5);
|
||||
p6 = _mm_unpacklo_epi64(p6, p7);
|
||||
|
||||
// Clip diff value to [0, 255] range. Then, do addition or subtraction
|
||||
// according to its sign.
|
||||
if (diff >= 0) {
|
||||
abs_diff = (diff > 255) ? 255 : diff;
|
||||
d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
|
||||
|
||||
p0 = _mm_adds_epu8(p0, d);
|
||||
p2 = _mm_adds_epu8(p2, d);
|
||||
p4 = _mm_adds_epu8(p4, d);
|
||||
p6 = _mm_adds_epu8(p6, d);
|
||||
} else {
|
||||
abs_diff = (diff < -255) ? 255 : -diff;
|
||||
d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
|
||||
|
||||
p0 = _mm_subs_epu8(p0, d);
|
||||
p2 = _mm_subs_epu8(p2, d);
|
||||
p4 = _mm_subs_epu8(p4, d);
|
||||
p6 = _mm_subs_epu8(p6, d);
|
||||
}
|
||||
|
||||
_mm_storel_epi64((__m128i *)(dest + 0 * stride), p0);
|
||||
p0 = _mm_srli_si128(p0, 8);
|
||||
_mm_storel_epi64((__m128i *)(dest + 1 * stride), p0);
|
||||
|
||||
_mm_storel_epi64((__m128i *)(dest + 2 * stride), p2);
|
||||
p2 = _mm_srli_si128(p2, 8);
|
||||
_mm_storel_epi64((__m128i *)(dest + 3 * stride), p2);
|
||||
|
||||
_mm_storel_epi64((__m128i *)(dest + 4 * stride), p4);
|
||||
p4 = _mm_srli_si128(p4, 8);
|
||||
_mm_storel_epi64((__m128i *)(dest + 5 * stride), p4);
|
||||
|
||||
_mm_storel_epi64((__m128i *)(dest + 6 * stride), p6);
|
||||
p6 = _mm_srli_si128(p6, 8);
|
||||
_mm_storel_epi64((__m128i *)(dest + 7 * stride), p6);
|
||||
}
|
||||
|
||||
void vp9_add_constant_residual_16x16_sse2(const int16_t diff, uint8_t *dest,
|
||||
int stride) {
|
||||
uint8_t abs_diff;
|
||||
__m128i d;
|
||||
|
||||
// Prediction data.
|
||||
__m128i p0 = _mm_load_si128((const __m128i *)(dest + 0 * stride));
|
||||
__m128i p1 = _mm_load_si128((const __m128i *)(dest + 1 * stride));
|
||||
__m128i p2 = _mm_load_si128((const __m128i *)(dest + 2 * stride));
|
||||
__m128i p3 = _mm_load_si128((const __m128i *)(dest + 3 * stride));
|
||||
__m128i p4 = _mm_load_si128((const __m128i *)(dest + 4 * stride));
|
||||
__m128i p5 = _mm_load_si128((const __m128i *)(dest + 5 * stride));
|
||||
__m128i p6 = _mm_load_si128((const __m128i *)(dest + 6 * stride));
|
||||
__m128i p7 = _mm_load_si128((const __m128i *)(dest + 7 * stride));
|
||||
__m128i p8 = _mm_load_si128((const __m128i *)(dest + 8 * stride));
|
||||
__m128i p9 = _mm_load_si128((const __m128i *)(dest + 9 * stride));
|
||||
__m128i p10 = _mm_load_si128((const __m128i *)(dest + 10 * stride));
|
||||
__m128i p11 = _mm_load_si128((const __m128i *)(dest + 11 * stride));
|
||||
__m128i p12 = _mm_load_si128((const __m128i *)(dest + 12 * stride));
|
||||
__m128i p13 = _mm_load_si128((const __m128i *)(dest + 13 * stride));
|
||||
__m128i p14 = _mm_load_si128((const __m128i *)(dest + 14 * stride));
|
||||
__m128i p15 = _mm_load_si128((const __m128i *)(dest + 15 * stride));
|
||||
|
||||
// Clip diff value to [0, 255] range. Then, do addition or subtraction
|
||||
// according to its sign.
|
||||
if (diff >= 0) {
|
||||
abs_diff = (diff > 255) ? 255 : diff;
|
||||
d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
|
||||
|
||||
p0 = _mm_adds_epu8(p0, d);
|
||||
p1 = _mm_adds_epu8(p1, d);
|
||||
p2 = _mm_adds_epu8(p2, d);
|
||||
p3 = _mm_adds_epu8(p3, d);
|
||||
p4 = _mm_adds_epu8(p4, d);
|
||||
p5 = _mm_adds_epu8(p5, d);
|
||||
p6 = _mm_adds_epu8(p6, d);
|
||||
p7 = _mm_adds_epu8(p7, d);
|
||||
p8 = _mm_adds_epu8(p8, d);
|
||||
p9 = _mm_adds_epu8(p9, d);
|
||||
p10 = _mm_adds_epu8(p10, d);
|
||||
p11 = _mm_adds_epu8(p11, d);
|
||||
p12 = _mm_adds_epu8(p12, d);
|
||||
p13 = _mm_adds_epu8(p13, d);
|
||||
p14 = _mm_adds_epu8(p14, d);
|
||||
p15 = _mm_adds_epu8(p15, d);
|
||||
} else {
|
||||
abs_diff = (diff < -255) ? 255 : -diff;
|
||||
d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
|
||||
|
||||
p0 = _mm_subs_epu8(p0, d);
|
||||
p1 = _mm_subs_epu8(p1, d);
|
||||
p2 = _mm_subs_epu8(p2, d);
|
||||
p3 = _mm_subs_epu8(p3, d);
|
||||
p4 = _mm_subs_epu8(p4, d);
|
||||
p5 = _mm_subs_epu8(p5, d);
|
||||
p6 = _mm_subs_epu8(p6, d);
|
||||
p7 = _mm_subs_epu8(p7, d);
|
||||
p8 = _mm_subs_epu8(p8, d);
|
||||
p9 = _mm_subs_epu8(p9, d);
|
||||
p10 = _mm_subs_epu8(p10, d);
|
||||
p11 = _mm_subs_epu8(p11, d);
|
||||
p12 = _mm_subs_epu8(p12, d);
|
||||
p13 = _mm_subs_epu8(p13, d);
|
||||
p14 = _mm_subs_epu8(p14, d);
|
||||
p15 = _mm_subs_epu8(p15, d);
|
||||
}
|
||||
|
||||
// Store results
|
||||
_mm_store_si128((__m128i *)(dest + 0 * stride), p0);
|
||||
_mm_store_si128((__m128i *)(dest + 1 * stride), p1);
|
||||
_mm_store_si128((__m128i *)(dest + 2 * stride), p2);
|
||||
_mm_store_si128((__m128i *)(dest + 3 * stride), p3);
|
||||
_mm_store_si128((__m128i *)(dest + 4 * stride), p4);
|
||||
_mm_store_si128((__m128i *)(dest + 5 * stride), p5);
|
||||
_mm_store_si128((__m128i *)(dest + 6 * stride), p6);
|
||||
_mm_store_si128((__m128i *)(dest + 7 * stride), p7);
|
||||
_mm_store_si128((__m128i *)(dest + 8 * stride), p8);
|
||||
_mm_store_si128((__m128i *)(dest + 9 * stride), p9);
|
||||
_mm_store_si128((__m128i *)(dest + 10 * stride), p10);
|
||||
_mm_store_si128((__m128i *)(dest + 11 * stride), p11);
|
||||
_mm_store_si128((__m128i *)(dest + 12 * stride), p12);
|
||||
_mm_store_si128((__m128i *)(dest + 13 * stride), p13);
|
||||
_mm_store_si128((__m128i *)(dest + 14 * stride), p14);
|
||||
_mm_store_si128((__m128i *)(dest + 15 * stride), p15);
|
||||
}
|
||||
|
||||
void vp9_add_constant_residual_32x32_sse2(const int16_t diff, uint8_t *dest,
|
||||
int stride) {
|
||||
uint8_t abs_diff;
|
||||
__m128i d;
|
||||
int i = 8;
|
||||
|
||||
if (diff >= 0) {
|
||||
abs_diff = (diff > 255) ? 255 : diff;
|
||||
d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
|
||||
} else {
|
||||
abs_diff = (diff < -255) ? 255 : -diff;
|
||||
d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
|
||||
}
|
||||
|
||||
do {
|
||||
// Prediction data.
|
||||
__m128i p0 = _mm_load_si128((const __m128i *)(dest + 0 * stride));
|
||||
__m128i p1 = _mm_load_si128((const __m128i *)(dest + 0 * stride + 16));
|
||||
__m128i p2 = _mm_load_si128((const __m128i *)(dest + 1 * stride));
|
||||
__m128i p3 = _mm_load_si128((const __m128i *)(dest + 1 * stride + 16));
|
||||
__m128i p4 = _mm_load_si128((const __m128i *)(dest + 2 * stride));
|
||||
__m128i p5 = _mm_load_si128((const __m128i *)(dest + 2 * stride + 16));
|
||||
__m128i p6 = _mm_load_si128((const __m128i *)(dest + 3 * stride));
|
||||
__m128i p7 = _mm_load_si128((const __m128i *)(dest + 3 * stride + 16));
|
||||
|
||||
// Clip diff value to [0, 255] range. Then, do addition or subtraction
|
||||
// according to its sign.
|
||||
if (diff >= 0) {
|
||||
p0 = _mm_adds_epu8(p0, d);
|
||||
p1 = _mm_adds_epu8(p1, d);
|
||||
p2 = _mm_adds_epu8(p2, d);
|
||||
p3 = _mm_adds_epu8(p3, d);
|
||||
p4 = _mm_adds_epu8(p4, d);
|
||||
p5 = _mm_adds_epu8(p5, d);
|
||||
p6 = _mm_adds_epu8(p6, d);
|
||||
p7 = _mm_adds_epu8(p7, d);
|
||||
} else {
|
||||
p0 = _mm_subs_epu8(p0, d);
|
||||
p1 = _mm_subs_epu8(p1, d);
|
||||
p2 = _mm_subs_epu8(p2, d);
|
||||
p3 = _mm_subs_epu8(p3, d);
|
||||
p4 = _mm_subs_epu8(p4, d);
|
||||
p5 = _mm_subs_epu8(p5, d);
|
||||
p6 = _mm_subs_epu8(p6, d);
|
||||
p7 = _mm_subs_epu8(p7, d);
|
||||
}
|
||||
|
||||
// Store results
|
||||
_mm_store_si128((__m128i *)(dest + 0 * stride), p0);
|
||||
_mm_store_si128((__m128i *)(dest + 0 * stride + 16), p1);
|
||||
_mm_store_si128((__m128i *)(dest + 1 * stride), p2);
|
||||
_mm_store_si128((__m128i *)(dest + 1 * stride + 16), p3);
|
||||
_mm_store_si128((__m128i *)(dest + 2 * stride), p4);
|
||||
_mm_store_si128((__m128i *)(dest + 2 * stride + 16), p5);
|
||||
_mm_store_si128((__m128i *)(dest + 3 * stride), p6);
|
||||
_mm_store_si128((__m128i *)(dest + 3 * stride + 16), p7);
|
||||
|
||||
dest += 4 * stride;
|
||||
} while (--i);
|
||||
}
|
@@ -283,7 +283,7 @@ static void pack_mb_tokens(vp9_writer* const bc,
|
||||
const TOKENEXTRA *const stop) {
|
||||
TOKENEXTRA *p = *tp;
|
||||
|
||||
while (p < stop) {
|
||||
while (p < stop && p->token != EOSB_TOKEN) {
|
||||
const int t = p->token;
|
||||
const struct vp9_token *const a = vp9_coef_encodings + t;
|
||||
const vp9_extra_bit *const b = vp9_extra_bits + t;
|
||||
@@ -293,10 +293,6 @@ static void pack_mb_tokens(vp9_writer* const bc,
|
||||
int n = a->len;
|
||||
vp9_prob probs[ENTROPY_NODES];
|
||||
|
||||
if (t == EOSB_TOKEN) {
|
||||
++p;
|
||||
break;
|
||||
}
|
||||
if (t >= TWO_TOKEN) {
|
||||
vp9_model_to_full_probs(p->context_tree, probs);
|
||||
pp = probs;
|
||||
@@ -338,7 +334,7 @@ static void pack_mb_tokens(vp9_writer* const bc,
|
||||
++p;
|
||||
}
|
||||
|
||||
*tp = p;
|
||||
*tp = p + (p->token == EOSB_TOKEN);
|
||||
}
|
||||
|
||||
static void write_sb_mv_ref(vp9_writer *w, MB_PREDICTION_MODE mode,
|
||||
@@ -488,17 +484,13 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
|
||||
}
|
||||
|
||||
if (bsize < BLOCK_8X8) {
|
||||
int j;
|
||||
MB_PREDICTION_MODE blockmode;
|
||||
int_mv blockmv;
|
||||
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
|
||||
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
|
||||
int idx, idy;
|
||||
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
|
||||
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
|
||||
j = idy * 2 + idx;
|
||||
blockmode = x->partition_info->bmi[j].mode;
|
||||
blockmv = m->bmi[j].as_mv[0];
|
||||
const int j = idy * 2 + idx;
|
||||
const MB_PREDICTION_MODE blockmode = x->partition_info->bmi[j].mode;
|
||||
write_sb_mv_ref(bc, blockmode, mv_ref_p);
|
||||
++cm->counts.inter_mode[mi->mode_context[rf]]
|
||||
[inter_mode_offset(blockmode)];
|
||||
@@ -507,14 +499,12 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
|
||||
#ifdef ENTROPY_STATS
|
||||
active_section = 11;
|
||||
#endif
|
||||
vp9_encode_mv(cpi, bc, &blockmv.as_mv, &mi->best_mv.as_mv,
|
||||
nmvc, allow_hp);
|
||||
vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[0].as_mv,
|
||||
&mi->best_mv[0].as_mv, nmvc, allow_hp);
|
||||
|
||||
if (mi->ref_frame[1] > INTRA_FRAME)
|
||||
vp9_encode_mv(cpi, bc,
|
||||
&m->bmi[j].as_mv[1].as_mv,
|
||||
&mi->best_second_mv.as_mv,
|
||||
nmvc, allow_hp);
|
||||
if (has_second_ref(mi))
|
||||
vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[1].as_mv,
|
||||
&mi->best_mv[1].as_mv, nmvc, allow_hp);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -522,12 +512,12 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
|
||||
#ifdef ENTROPY_STATS
|
||||
active_section = 5;
|
||||
#endif
|
||||
vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv, &mi->best_mv.as_mv,
|
||||
nmvc, allow_hp);
|
||||
vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv,
|
||||
&mi->best_mv[0].as_mv, nmvc, allow_hp);
|
||||
|
||||
if (mi->ref_frame[1] > INTRA_FRAME)
|
||||
vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv, &mi->best_second_mv.as_mv,
|
||||
nmvc, allow_hp);
|
||||
if (has_second_ref(mi))
|
||||
vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv,
|
||||
&mi->best_mv[1].as_mv, nmvc, allow_hp);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1162,7 +1152,7 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) {
|
||||
|
||||
static void write_interp_filter_type(INTERPOLATIONFILTERTYPE type,
|
||||
struct vp9_write_bit_buffer *wb) {
|
||||
const int type_to_literal[] = { 1, 0, 2, 3 };
|
||||
const int type_to_literal[] = { 1, 0, 2 };
|
||||
|
||||
vp9_wb_write_bit(wb, type == SWITCHABLE);
|
||||
if (type != SWITCHABLE)
|
||||
|
@@ -30,10 +30,10 @@ typedef struct {
|
||||
} PARTITION_INFO;
|
||||
|
||||
// Structure to hold snapshot of coding context during the mode picking process
|
||||
// TODO Do we need all of these?
|
||||
typedef struct {
|
||||
MODE_INFO mic;
|
||||
PARTITION_INFO partition_info;
|
||||
unsigned char zcoeff_blk[256];
|
||||
int skip;
|
||||
int_mv best_ref_mv;
|
||||
int_mv second_best_ref_mv;
|
||||
@@ -136,6 +136,7 @@ struct macroblock {
|
||||
int mv_row_min;
|
||||
int mv_row_max;
|
||||
|
||||
unsigned char zcoeff_blk[TX_SIZES][256];
|
||||
int skip;
|
||||
|
||||
int encode_breakout;
|
||||
|
@@ -58,10 +58,10 @@ void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) {
|
||||
for (i = 0; i < 4; ++i) {
|
||||
// Load inputs.
|
||||
if (0 == pass) {
|
||||
input[0] = in[0 * stride] << 4;
|
||||
input[1] = in[1 * stride] << 4;
|
||||
input[2] = in[2 * stride] << 4;
|
||||
input[3] = in[3 * stride] << 4;
|
||||
input[0] = in[0 * stride] * 16;
|
||||
input[1] = in[1 * stride] * 16;
|
||||
input[2] = in[2 * stride] * 16;
|
||||
input[3] = in[3 * stride] * 16;
|
||||
if (i == 0 && input[0]) {
|
||||
input[0] += 1;
|
||||
}
|
||||
@@ -160,7 +160,7 @@ void vp9_short_fht4x4_c(int16_t *input, int16_t *output,
|
||||
// Columns
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 4; ++j)
|
||||
temp_in[j] = input[j * pitch + i] << 4;
|
||||
temp_in[j] = input[j * pitch + i] * 16;
|
||||
if (i == 0 && temp_in[0])
|
||||
temp_in[0] += 1;
|
||||
ht.cols(temp_in, temp_out);
|
||||
@@ -250,14 +250,14 @@ void vp9_short_fdct8x8_c(int16_t *input, int16_t *final_output, int pitch) {
|
||||
int i;
|
||||
for (i = 0; i < 8; i++) {
|
||||
// stage 1
|
||||
s0 = (input[0 * stride] + input[7 * stride]) << 2;
|
||||
s1 = (input[1 * stride] + input[6 * stride]) << 2;
|
||||
s2 = (input[2 * stride] + input[5 * stride]) << 2;
|
||||
s3 = (input[3 * stride] + input[4 * stride]) << 2;
|
||||
s4 = (input[3 * stride] - input[4 * stride]) << 2;
|
||||
s5 = (input[2 * stride] - input[5 * stride]) << 2;
|
||||
s6 = (input[1 * stride] - input[6 * stride]) << 2;
|
||||
s7 = (input[0 * stride] - input[7 * stride]) << 2;
|
||||
s0 = (input[0 * stride] + input[7 * stride]) * 4;
|
||||
s1 = (input[1 * stride] + input[6 * stride]) * 4;
|
||||
s2 = (input[2 * stride] + input[5 * stride]) * 4;
|
||||
s3 = (input[3 * stride] + input[4 * stride]) * 4;
|
||||
s4 = (input[3 * stride] - input[4 * stride]) * 4;
|
||||
s5 = (input[2 * stride] - input[5 * stride]) * 4;
|
||||
s6 = (input[1 * stride] - input[6 * stride]) * 4;
|
||||
s7 = (input[0 * stride] - input[7 * stride]) * 4;
|
||||
|
||||
// fdct4_1d(step, step);
|
||||
x0 = s0 + s3;
|
||||
@@ -331,23 +331,23 @@ void vp9_short_fdct16x16_c(int16_t *input, int16_t *output, int pitch) {
|
||||
for (i = 0; i < 16; i++) {
|
||||
if (0 == pass) {
|
||||
// Calculate input for the first 8 results.
|
||||
input[0] = (in[0 * stride] + in[15 * stride]) << 2;
|
||||
input[1] = (in[1 * stride] + in[14 * stride]) << 2;
|
||||
input[2] = (in[2 * stride] + in[13 * stride]) << 2;
|
||||
input[3] = (in[3 * stride] + in[12 * stride]) << 2;
|
||||
input[4] = (in[4 * stride] + in[11 * stride]) << 2;
|
||||
input[5] = (in[5 * stride] + in[10 * stride]) << 2;
|
||||
input[6] = (in[6 * stride] + in[ 9 * stride]) << 2;
|
||||
input[7] = (in[7 * stride] + in[ 8 * stride]) << 2;
|
||||
input[0] = (in[0 * stride] + in[15 * stride]) * 4;
|
||||
input[1] = (in[1 * stride] + in[14 * stride]) * 4;
|
||||
input[2] = (in[2 * stride] + in[13 * stride]) * 4;
|
||||
input[3] = (in[3 * stride] + in[12 * stride]) * 4;
|
||||
input[4] = (in[4 * stride] + in[11 * stride]) * 4;
|
||||
input[5] = (in[5 * stride] + in[10 * stride]) * 4;
|
||||
input[6] = (in[6 * stride] + in[ 9 * stride]) * 4;
|
||||
input[7] = (in[7 * stride] + in[ 8 * stride]) * 4;
|
||||
// Calculate input for the next 8 results.
|
||||
step1[0] = (in[7 * stride] - in[ 8 * stride]) << 2;
|
||||
step1[1] = (in[6 * stride] - in[ 9 * stride]) << 2;
|
||||
step1[2] = (in[5 * stride] - in[10 * stride]) << 2;
|
||||
step1[3] = (in[4 * stride] - in[11 * stride]) << 2;
|
||||
step1[4] = (in[3 * stride] - in[12 * stride]) << 2;
|
||||
step1[5] = (in[2 * stride] - in[13 * stride]) << 2;
|
||||
step1[6] = (in[1 * stride] - in[14 * stride]) << 2;
|
||||
step1[7] = (in[0 * stride] - in[15 * stride]) << 2;
|
||||
step1[0] = (in[7 * stride] - in[ 8 * stride]) * 4;
|
||||
step1[1] = (in[6 * stride] - in[ 9 * stride]) * 4;
|
||||
step1[2] = (in[5 * stride] - in[10 * stride]) * 4;
|
||||
step1[3] = (in[4 * stride] - in[11 * stride]) * 4;
|
||||
step1[4] = (in[3 * stride] - in[12 * stride]) * 4;
|
||||
step1[5] = (in[2 * stride] - in[13 * stride]) * 4;
|
||||
step1[6] = (in[1 * stride] - in[14 * stride]) * 4;
|
||||
step1[7] = (in[0 * stride] - in[15 * stride]) * 4;
|
||||
} else {
|
||||
// Calculate input for the first 8 results.
|
||||
input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
|
||||
@@ -575,7 +575,7 @@ void vp9_short_fht8x8_c(int16_t *input, int16_t *output,
|
||||
// Columns
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (j = 0; j < 8; ++j)
|
||||
temp_in[j] = input[j * pitch + i] << 2;
|
||||
temp_in[j] = input[j * pitch + i] * 4;
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < 8; ++j)
|
||||
outptr[j * 8 + i] = temp_out[j];
|
||||
@@ -637,10 +637,10 @@ void vp9_short_walsh4x4_c(short *input, short *output, int pitch) {
|
||||
c1 = e1 - c1;
|
||||
a1 -= c1;
|
||||
d1 += b1;
|
||||
op[0] = a1 << WHT_UPSCALE_FACTOR;
|
||||
op[1] = c1 << WHT_UPSCALE_FACTOR;
|
||||
op[2] = d1 << WHT_UPSCALE_FACTOR;
|
||||
op[3] = b1 << WHT_UPSCALE_FACTOR;
|
||||
op[0] = a1 * UNIT_QUANT_FACTOR;
|
||||
op[1] = c1 * UNIT_QUANT_FACTOR;
|
||||
op[2] = d1 * UNIT_QUANT_FACTOR;
|
||||
op[3] = b1 * UNIT_QUANT_FACTOR;
|
||||
|
||||
ip += 4;
|
||||
op += 4;
|
||||
@@ -975,7 +975,7 @@ void vp9_short_fht16x16_c(int16_t *input, int16_t *output,
|
||||
// Columns
|
||||
for (i = 0; i < 16; ++i) {
|
||||
for (j = 0; j < 16; ++j)
|
||||
temp_in[j] = input[j * pitch + i] << 2;
|
||||
temp_in[j] = input[j * pitch + i] * 4;
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < 16; ++j)
|
||||
outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
|
||||
@@ -1335,7 +1335,7 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
|
||||
for (i = 0; i < 32; ++i) {
|
||||
int temp_in[32], temp_out[32];
|
||||
for (j = 0; j < 32; ++j)
|
||||
temp_in[j] = input[j * shortpitch + i] << 2;
|
||||
temp_in[j] = input[j * shortpitch + i] * 4;
|
||||
dct32_1d(temp_in, temp_out, 0);
|
||||
for (j = 0; j < 32; ++j)
|
||||
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
|
||||
@@ -1364,7 +1364,7 @@ void vp9_short_fdct32x32_rd_c(int16_t *input, int16_t *out, int pitch) {
|
||||
for (i = 0; i < 32; ++i) {
|
||||
int temp_in[32], temp_out[32];
|
||||
for (j = 0; j < 32; ++j)
|
||||
temp_in[j] = input[j * shortpitch + i] << 2;
|
||||
temp_in[j] = input[j * shortpitch + i] * 4;
|
||||
dct32_1d(temp_in, temp_out, 0);
|
||||
for (j = 0; j < 32; ++j)
|
||||
// TODO(cd): see quality impact of only doing
|
||||
|
@@ -390,6 +390,9 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
|
||||
}
|
||||
|
||||
x->skip = ctx->skip;
|
||||
vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
|
||||
sizeof(ctx->zcoeff_blk));
|
||||
|
||||
if (!output_enabled)
|
||||
return;
|
||||
|
||||
@@ -428,18 +431,19 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
|
||||
cpi->mode_chosen_counts[mb_mode_index]++;
|
||||
if (is_inter_block(mbmi)
|
||||
&& (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
|
||||
int_mv best_mv, best_second_mv;
|
||||
int_mv best_mv[2];
|
||||
const MV_REFERENCE_FRAME rf1 = mbmi->ref_frame[0];
|
||||
const MV_REFERENCE_FRAME rf2 = mbmi->ref_frame[1];
|
||||
best_mv.as_int = ctx->best_ref_mv.as_int;
|
||||
best_second_mv.as_int = ctx->second_best_ref_mv.as_int;
|
||||
best_mv[0].as_int = ctx->best_ref_mv.as_int;
|
||||
best_mv[1].as_int = ctx->second_best_ref_mv.as_int;
|
||||
if (mbmi->mode == NEWMV) {
|
||||
best_mv.as_int = mbmi->ref_mvs[rf1][0].as_int;
|
||||
best_second_mv.as_int = mbmi->ref_mvs[rf2][0].as_int;
|
||||
best_mv[0].as_int = mbmi->ref_mvs[rf1][0].as_int;
|
||||
if (rf2 > 0)
|
||||
best_mv[1].as_int = mbmi->ref_mvs[rf2][0].as_int;
|
||||
}
|
||||
mbmi->best_mv.as_int = best_mv.as_int;
|
||||
mbmi->best_second_mv.as_int = best_second_mv.as_int;
|
||||
vp9_update_nmv_count(cpi, x, &best_mv, &best_second_mv);
|
||||
mbmi->best_mv[0].as_int = best_mv[0].as_int;
|
||||
mbmi->best_mv[1].as_int = best_mv[1].as_int;
|
||||
vp9_update_mv_count(cpi, x, best_mv);
|
||||
}
|
||||
|
||||
if (cm->mcomp_filter_type == SWITCHABLE && is_inter_mode(mbmi->mode)) {
|
||||
@@ -936,7 +940,7 @@ static void copy_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8,
|
||||
for (block_col = 0; block_col < 8; ++block_col) {
|
||||
MODE_INFO * prev_mi = prev_mi_8x8[block_row * mis + block_col];
|
||||
BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
|
||||
int offset;
|
||||
ptrdiff_t offset;
|
||||
|
||||
if (prev_mi) {
|
||||
offset = prev_mi - cm->prev_mi;
|
||||
@@ -947,323 +951,6 @@ static void copy_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8,
|
||||
}
|
||||
}
|
||||
|
||||
static void set_block_size(VP9_COMMON * const cm, MODE_INFO **mi_8x8,
|
||||
BLOCK_SIZE bsize, int mis, int mi_row,
|
||||
int mi_col) {
|
||||
int r, c;
|
||||
const int bs = MAX(num_8x8_blocks_wide_lookup[bsize],
|
||||
num_8x8_blocks_high_lookup[bsize]);
|
||||
const int idx_str = mis * mi_row + mi_col;
|
||||
MODE_INFO **const mi2 = &mi_8x8[idx_str];
|
||||
|
||||
mi2[0] = cm->mi + idx_str;
|
||||
mi2[0]->mbmi.sb_type = bsize;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (c = 0; c < bs; c++)
|
||||
if (mi_row + r < cm->mi_rows && mi_col + c < cm->mi_cols)
|
||||
mi2[r * mis + c] = mi2[0];
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
int64_t sum_square_error;
|
||||
int64_t sum_error;
|
||||
int count;
|
||||
int variance;
|
||||
} var;
|
||||
|
||||
typedef struct {
|
||||
var none;
|
||||
var horz[2];
|
||||
var vert[2];
|
||||
} partition_variance;
|
||||
|
||||
#define VT(TYPE, BLOCKSIZE) \
|
||||
typedef struct { \
|
||||
partition_variance vt; \
|
||||
BLOCKSIZE split[4]; } TYPE;
|
||||
|
||||
VT(v8x8, var)
|
||||
VT(v16x16, v8x8)
|
||||
VT(v32x32, v16x16)
|
||||
VT(v64x64, v32x32)
|
||||
|
||||
typedef struct {
|
||||
partition_variance *vt;
|
||||
var *split[4];
|
||||
} vt_node;
|
||||
|
||||
typedef enum {
|
||||
V16X16,
|
||||
V32X32,
|
||||
V64X64,
|
||||
} TREE_LEVEL;
|
||||
|
||||
static void tree_to_node(void *data, BLOCK_SIZE bsize, vt_node *node) {
|
||||
int i;
|
||||
switch (bsize) {
|
||||
case BLOCK_64X64: {
|
||||
v64x64 *vt = (v64x64 *) data;
|
||||
node->vt = &vt->vt;
|
||||
for (i = 0; i < 4; i++)
|
||||
node->split[i] = &vt->split[i].vt.none;
|
||||
break;
|
||||
}
|
||||
case BLOCK_32X32: {
|
||||
v32x32 *vt = (v32x32 *) data;
|
||||
node->vt = &vt->vt;
|
||||
for (i = 0; i < 4; i++)
|
||||
node->split[i] = &vt->split[i].vt.none;
|
||||
break;
|
||||
}
|
||||
case BLOCK_16X16: {
|
||||
v16x16 *vt = (v16x16 *) data;
|
||||
node->vt = &vt->vt;
|
||||
for (i = 0; i < 4; i++)
|
||||
node->split[i] = &vt->split[i].vt.none;
|
||||
break;
|
||||
}
|
||||
case BLOCK_8X8: {
|
||||
v8x8 *vt = (v8x8 *) data;
|
||||
node->vt = &vt->vt;
|
||||
for (i = 0; i < 4; i++)
|
||||
node->split[i] = &vt->split[i];
|
||||
break;
|
||||
}
|
||||
default:
|
||||
node->vt = 0;
|
||||
for (i = 0; i < 4; i++)
|
||||
node->split[i] = 0;
|
||||
assert(-1);
|
||||
}
|
||||
}
|
||||
|
||||
// Set variance values given sum square error, sum error, count.
|
||||
static void fill_variance(var *v, int64_t s2, int64_t s, int c) {
|
||||
v->sum_square_error = s2;
|
||||
v->sum_error = s;
|
||||
v->count = c;
|
||||
if (c > 0)
|
||||
v->variance = 256
|
||||
* (v->sum_square_error - v->sum_error * v->sum_error / v->count)
|
||||
/ v->count;
|
||||
else
|
||||
v->variance = 0;
|
||||
}
|
||||
|
||||
// Combine 2 variance structures by summing the sum_error, sum_square_error,
|
||||
// and counts and then calculating the new variance.
|
||||
void sum_2_variances(var *r, var *a, var*b) {
|
||||
fill_variance(r, a->sum_square_error + b->sum_square_error,
|
||||
a->sum_error + b->sum_error, a->count + b->count);
|
||||
}
|
||||
|
||||
static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
|
||||
vt_node node;
|
||||
tree_to_node(data, bsize, &node);
|
||||
sum_2_variances(&node.vt->horz[0], node.split[0], node.split[1]);
|
||||
sum_2_variances(&node.vt->horz[1], node.split[2], node.split[3]);
|
||||
sum_2_variances(&node.vt->vert[0], node.split[0], node.split[2]);
|
||||
sum_2_variances(&node.vt->vert[1], node.split[1], node.split[3]);
|
||||
sum_2_variances(&node.vt->none, &node.vt->vert[0], &node.vt->vert[1]);
|
||||
}
|
||||
|
||||
#if PERFORM_RANDOM_PARTITIONING
|
||||
static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO *m,
|
||||
BLOCK_SIZE block_size, int mi_row,
|
||||
int mi_col, int mi_size) {
|
||||
VP9_COMMON * const cm = &cpi->common;
|
||||
vt_node vt;
|
||||
const int mis = cm->mode_info_stride;
|
||||
int64_t threshold = 4 * cpi->common.base_qindex * cpi->common.base_qindex;
|
||||
|
||||
tree_to_node(data, block_size, &vt);
|
||||
|
||||
// split none is available only if we have more than half a block size
|
||||
// in width and height inside the visible image
|
||||
if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows &&
|
||||
(rand() & 3) < 1) {
|
||||
set_block_size(cm, m, block_size, mis, mi_row, mi_col);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// vertical split is available on all but the bottom border
|
||||
if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold
|
||||
&& (rand() & 3) < 1) {
|
||||
set_block_size(cm, m, get_subsize(block_size, PARTITION_VERT), mis, mi_row,
|
||||
mi_col);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// horizontal split is available on all but the right border
|
||||
if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold
|
||||
&& (rand() & 3) < 1) {
|
||||
set_block_size(cm, m, get_subsize(block_size, PARTITION_HORZ), mis, mi_row,
|
||||
mi_col);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else // !PERFORM_RANDOM_PARTITIONING
|
||||
|
||||
static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO **m,
|
||||
BLOCK_SIZE bsize, int mi_row,
|
||||
int mi_col, int mi_size) {
|
||||
VP9_COMMON * const cm = &cpi->common;
|
||||
vt_node vt;
|
||||
const int mis = cm->mode_info_stride;
|
||||
int64_t threshold = 50 * cpi->common.base_qindex;
|
||||
|
||||
tree_to_node(data, bsize, &vt);
|
||||
|
||||
// split none is available only if we have more than half a block size
|
||||
// in width and height inside the visible image
|
||||
if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows
|
||||
&& vt.vt->none.variance < threshold) {
|
||||
set_block_size(cm, m, bsize, mis, mi_row, mi_col);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// vertical split is available on all but the bottom border
|
||||
if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold
|
||||
&& vt.vt->vert[1].variance < threshold) {
|
||||
set_block_size(cm, m, get_subsize(bsize, PARTITION_VERT), mis, mi_row,
|
||||
mi_col);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// horizontal split is available on all but the right border
|
||||
if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold
|
||||
&& vt.vt->horz[1].variance < threshold) {
|
||||
set_block_size(cm, m, get_subsize(bsize, PARTITION_HORZ), mis, mi_row,
|
||||
mi_col);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif // PERFORM_RANDOM_PARTITIONING
|
||||
|
||||
static void choose_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8,
|
||||
int mi_row, int mi_col) {
|
||||
VP9_COMMON * const cm = &cpi->common;
|
||||
MACROBLOCK *x = &cpi->mb;
|
||||
MACROBLOCKD *xd = &cpi->mb.e_mbd;
|
||||
const int mis = cm->mode_info_stride;
|
||||
// TODO(JBB): More experimentation or testing of this threshold;
|
||||
int64_t threshold = 4;
|
||||
int i, j, k;
|
||||
v64x64 vt;
|
||||
unsigned char * s;
|
||||
int sp;
|
||||
const unsigned char * d;
|
||||
int dp;
|
||||
int pixels_wide = 64, pixels_high = 64;
|
||||
|
||||
vp9_zero(vt);
|
||||
set_offsets(cpi, mi_row, mi_col, BLOCK_64X64);
|
||||
|
||||
if (xd->mb_to_right_edge < 0)
|
||||
pixels_wide += (xd->mb_to_right_edge >> 3);
|
||||
|
||||
if (xd->mb_to_bottom_edge < 0)
|
||||
pixels_high += (xd->mb_to_bottom_edge >> 3);
|
||||
|
||||
s = x->plane[0].src.buf;
|
||||
sp = x->plane[0].src.stride;
|
||||
|
||||
// TODO(JBB): Clearly the higher the quantizer the fewer partitions we want
|
||||
// but this needs more experimentation.
|
||||
threshold = threshold * cpi->common.base_qindex * cpi->common.base_qindex;
|
||||
|
||||
d = vp9_64x64_zeros;
|
||||
dp = 64;
|
||||
if (cm->frame_type != KEY_FRAME) {
|
||||
int_mv nearest_mv, near_mv;
|
||||
const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, LAST_FRAME)];
|
||||
YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx];
|
||||
YV12_BUFFER_CONFIG *second_ref_fb = NULL;
|
||||
|
||||
setup_pre_planes(xd, 0, ref_fb, mi_row, mi_col,
|
||||
&xd->scale_factor[0]);
|
||||
setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
|
||||
&xd->scale_factor[1]);
|
||||
|
||||
xd->this_mi->mbmi.ref_frame[0] = LAST_FRAME;
|
||||
xd->this_mi->mbmi.sb_type = BLOCK_64X64;
|
||||
vp9_find_best_ref_mvs(xd,
|
||||
mi_8x8[0]->mbmi.ref_mvs[mi_8x8[0]->mbmi.ref_frame[0]],
|
||||
&nearest_mv, &near_mv);
|
||||
|
||||
xd->this_mi->mbmi.mv[0] = nearest_mv;
|
||||
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64);
|
||||
|
||||
d = xd->plane[0].dst.buf;
|
||||
dp = xd->plane[0].dst.stride;
|
||||
}
|
||||
|
||||
// Fill in the entire tree of 8x8 variances for splits.
|
||||
for (i = 0; i < 4; i++) {
|
||||
const int x32_idx = ((i & 1) << 5);
|
||||
const int y32_idx = ((i >> 1) << 5);
|
||||
for (j = 0; j < 4; j++) {
|
||||
const int x16_idx = x32_idx + ((j & 1) << 4);
|
||||
const int y16_idx = y32_idx + ((j >> 1) << 4);
|
||||
v16x16 *vst = &vt.split[i].split[j];
|
||||
for (k = 0; k < 4; k++) {
|
||||
int x_idx = x16_idx + ((k & 1) << 3);
|
||||
int y_idx = y16_idx + ((k >> 1) << 3);
|
||||
unsigned int sse = 0;
|
||||
int sum = 0;
|
||||
if (x_idx < pixels_wide && y_idx < pixels_high)
|
||||
vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
|
||||
d + y_idx * dp + x_idx, dp, &sse, &sum);
|
||||
fill_variance(&vst->split[k].vt.none, sse, sum, 64);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fill the rest of the variance tree by summing the split partition
|
||||
// values.
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
|
||||
}
|
||||
fill_variance_tree(&vt.split[i], BLOCK_32X32);
|
||||
}
|
||||
fill_variance_tree(&vt, BLOCK_64X64);
|
||||
// Now go through the entire structure, splitting every block size until
|
||||
// we get to one that's got a variance lower than our threshold, or we
|
||||
// hit 8x8.
|
||||
if (!set_vt_partitioning(cpi, &vt, mi_8x8, BLOCK_64X64, mi_row, mi_col,
|
||||
4)) {
|
||||
for (i = 0; i < 4; ++i) {
|
||||
const int x32_idx = ((i & 1) << 2);
|
||||
const int y32_idx = ((i >> 1) << 2);
|
||||
if (!set_vt_partitioning(cpi, &vt.split[i], mi_8x8, BLOCK_32X32,
|
||||
(mi_row + y32_idx), (mi_col + x32_idx), 2)) {
|
||||
for (j = 0; j < 4; ++j) {
|
||||
const int x16_idx = ((j & 1) << 1);
|
||||
const int y16_idx = ((j >> 1) << 1);
|
||||
if (!set_vt_partitioning(cpi, &vt.split[i].split[j], mi_8x8,
|
||||
BLOCK_16X16,
|
||||
(mi_row + y32_idx + y16_idx),
|
||||
(mi_col + x32_idx + x16_idx), 1)) {
|
||||
for (k = 0; k < 4; ++k) {
|
||||
const int x8_idx = (k & 1);
|
||||
const int y8_idx = (k >> 1);
|
||||
set_block_size(cm, mi_8x8, BLOCK_8X8, mis,
|
||||
(mi_row + y32_idx + y16_idx + y8_idx),
|
||||
(mi_col + x32_idx + x16_idx + x8_idx));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void rd_use_partition(VP9_COMP *cpi, MODE_INFO **mi_8x8,
|
||||
TOKENEXTRA **tp, int mi_row, int mi_col,
|
||||
BLOCK_SIZE bsize, int *rate, int64_t *dist,
|
||||
@@ -1881,12 +1568,12 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
|
||||
best_dist = sum_dist;
|
||||
best_rd = sum_rd;
|
||||
*(get_sb_partitioning(x, bsize)) = subsize;
|
||||
} else {
|
||||
// skip rectangular partition test when larger block size
|
||||
// gives better rd cost
|
||||
if (cpi->sf.less_rectangular_check)
|
||||
do_rect &= !partition_none_allowed;
|
||||
}
|
||||
} else {
|
||||
// skip rectangular partition test when larger block size
|
||||
// gives better rd cost
|
||||
if (cpi->sf.less_rectangular_check)
|
||||
do_rect &= !partition_none_allowed;
|
||||
}
|
||||
partition_split_done = 1;
|
||||
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
|
||||
@@ -2056,7 +1743,7 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
|
||||
if (cpi->sf.reference_masking)
|
||||
rd_pick_reference_frame(cpi, mi_row, mi_col);
|
||||
|
||||
if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning ||
|
||||
if (cpi->sf.use_lastframe_partitioning ||
|
||||
cpi->sf.use_one_partition_size_always ) {
|
||||
const int idx_str = cm->mode_info_stride * mi_row + mi_col;
|
||||
MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
|
||||
@@ -2068,10 +1755,6 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
|
||||
set_partitioning(cpi, mi_8x8, mi_row, mi_col);
|
||||
rd_use_partition(cpi, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
|
||||
&dummy_rate, &dummy_dist, 1);
|
||||
} else if (cpi->sf.partition_by_variance) {
|
||||
choose_partitioning(cpi, cm->mi_grid_visible, mi_row, mi_col);
|
||||
rd_use_partition(cpi, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
|
||||
&dummy_rate, &dummy_dist, 1);
|
||||
} else {
|
||||
if ((cpi->common.current_video_frame
|
||||
% cpi->sf.last_partitioning_redo_frequency) == 0
|
||||
@@ -2208,7 +1891,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
|
||||
cpi->inter_zz_count = 0;
|
||||
|
||||
vp9_zero(cm->counts.switchable_interp);
|
||||
vp9_zero(cpi->txfm_stepdown_count);
|
||||
vp9_zero(cpi->tx_stepdown_count);
|
||||
|
||||
xd->mi_8x8 = cm->mi_grid_visible;
|
||||
// required for vp9_frame_init_quantizer
|
||||
@@ -2347,18 +2030,19 @@ static void reset_skip_txfm_size_b(VP9_COMP *cpi, MODE_INFO **mi_8x8,
|
||||
int mis, TX_SIZE max_tx_size, int bw, int bh,
|
||||
int mi_row, int mi_col, BLOCK_SIZE bsize) {
|
||||
VP9_COMMON * const cm = &cpi->common;
|
||||
MB_MODE_INFO * const mbmi = &mi_8x8[0]->mbmi;
|
||||
|
||||
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
|
||||
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) {
|
||||
return;
|
||||
} else {
|
||||
MB_MODE_INFO * const mbmi = &mi_8x8[0]->mbmi;
|
||||
if (mbmi->tx_size > max_tx_size) {
|
||||
const int ymbs = MIN(bh, cm->mi_rows - mi_row);
|
||||
const int xmbs = MIN(bw, cm->mi_cols - mi_col);
|
||||
|
||||
if (mbmi->tx_size > max_tx_size) {
|
||||
const int ymbs = MIN(bh, cm->mi_rows - mi_row);
|
||||
const int xmbs = MIN(bw, cm->mi_cols - mi_col);
|
||||
|
||||
assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
|
||||
get_skip_flag(mi_8x8, mis, ymbs, xmbs));
|
||||
set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size);
|
||||
assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
|
||||
get_skip_flag(mi_8x8, mis, ymbs, xmbs));
|
||||
set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2453,9 +2137,9 @@ static void select_tx_mode(VP9_COMP *cpi) {
|
||||
unsigned int total = 0;
|
||||
int i;
|
||||
for (i = 0; i < TX_SIZES; ++i)
|
||||
total += cpi->txfm_stepdown_count[i];
|
||||
total += cpi->tx_stepdown_count[i];
|
||||
if (total) {
|
||||
double fraction = (double)cpi->txfm_stepdown_count[0] / total;
|
||||
double fraction = (double)cpi->tx_stepdown_count[0] / total;
|
||||
cpi->common.tx_mode = fraction > 0.90 ? ALLOW_32X32 : TX_MODE_SELECT;
|
||||
// printf("fraction = %f\n", fraction);
|
||||
} // else keep unchanged
|
||||
@@ -2627,7 +2311,6 @@ void vp9_encode_frame(VP9_COMP *cpi) {
|
||||
} else {
|
||||
encode_frame_internal(cpi);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void sum_intra_stats(VP9_COMP *cpi, const MODE_INFO *mi) {
|
||||
@@ -2732,7 +2415,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
|
||||
int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[0])];
|
||||
YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx];
|
||||
YV12_BUFFER_CONFIG *second_ref_fb = NULL;
|
||||
if (mbmi->ref_frame[1] > 0) {
|
||||
if (has_second_ref(mbmi)) {
|
||||
idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[1])];
|
||||
second_ref_fb = &cm->yv12_fb[idx];
|
||||
}
|
||||
@@ -2744,7 +2427,6 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
|
||||
setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
|
||||
&xd->scale_factor[1]);
|
||||
|
||||
|
||||
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
|
||||
}
|
||||
|
||||
|
@@ -53,7 +53,7 @@ static void inverse_transform_b_8x8_add(int eob,
|
||||
if (eob <= 1)
|
||||
vp9_short_idct8x8_1_add(dqcoeff, dest, stride);
|
||||
else if (eob <= 10)
|
||||
vp9_short_idct10_8x8_add(dqcoeff, dest, stride);
|
||||
vp9_short_idct8x8_10_add(dqcoeff, dest, stride);
|
||||
else
|
||||
vp9_short_idct8x8_add(dqcoeff, dest, stride);
|
||||
}
|
||||
@@ -64,7 +64,7 @@ static void inverse_transform_b_16x16_add(int eob,
|
||||
if (eob <= 1)
|
||||
vp9_short_idct16x16_1_add(dqcoeff, dest, stride);
|
||||
else if (eob <= 10)
|
||||
vp9_short_idct10_16x16_add(dqcoeff, dest, stride);
|
||||
vp9_short_idct16x16_10_add(dqcoeff, dest, stride);
|
||||
else
|
||||
vp9_short_idct16x16_add(dqcoeff, dest, stride);
|
||||
}
|
||||
@@ -161,7 +161,7 @@ static void optimize_b(MACROBLOCK *mb,
|
||||
int best, band, pt;
|
||||
PLANE_TYPE type = pd->plane_type;
|
||||
int err_mult = plane_rd_mult[type];
|
||||
int default_eob;
|
||||
const int default_eob = 16 << (tx_size << 1);
|
||||
const int16_t *scan, *nb;
|
||||
const int mul = 1 + (tx_size == TX_32X32);
|
||||
uint8_t token_cache[1024];
|
||||
@@ -172,29 +172,7 @@ static void optimize_b(MACROBLOCK *mb,
|
||||
assert((!type && !plane) || (type && plane));
|
||||
dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||
qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block);
|
||||
switch (tx_size) {
|
||||
default:
|
||||
case TX_4X4:
|
||||
default_eob = 16;
|
||||
scan = get_scan_4x4(get_tx_type_4x4(type, xd, ib));
|
||||
band_translate = vp9_coefband_trans_4x4;
|
||||
break;
|
||||
case TX_8X8:
|
||||
scan = get_scan_8x8(get_tx_type_8x8(type, xd));
|
||||
default_eob = 64;
|
||||
band_translate = vp9_coefband_trans_8x8plus;
|
||||
break;
|
||||
case TX_16X16:
|
||||
scan = get_scan_16x16(get_tx_type_16x16(type, xd));
|
||||
default_eob = 256;
|
||||
band_translate = vp9_coefband_trans_8x8plus;
|
||||
break;
|
||||
case TX_32X32:
|
||||
scan = vp9_default_scan_32x32;
|
||||
default_eob = 1024;
|
||||
band_translate = vp9_coefband_trans_8x8plus;
|
||||
break;
|
||||
}
|
||||
get_scan_and_band(xd, tx_size, type, ib, &scan, &nb, &band_translate);
|
||||
assert(eob <= default_eob);
|
||||
|
||||
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
|
||||
@@ -213,7 +191,6 @@ static void optimize_b(MACROBLOCK *mb,
|
||||
for (i = 0; i < eob; i++)
|
||||
token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[
|
||||
qcoeff_ptr[scan[i]]].token];
|
||||
nb = vp9_get_coef_neighbors_handle(scan);
|
||||
|
||||
for (i = eob; i-- > i0;) {
|
||||
int base_bits, d2, dx;
|
||||
@@ -387,36 +364,10 @@ static void optimize_init_b(int plane, BLOCK_SIZE bsize,
|
||||
const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
|
||||
const MB_MODE_INFO *mbmi = &xd->this_mi->mbmi;
|
||||
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size;
|
||||
int i;
|
||||
|
||||
switch (tx_size) {
|
||||
case TX_4X4:
|
||||
vpx_memcpy(args->ctx->ta[plane], pd->above_context,
|
||||
sizeof(ENTROPY_CONTEXT) * num_4x4_w);
|
||||
vpx_memcpy(args->ctx->tl[plane], pd->left_context,
|
||||
sizeof(ENTROPY_CONTEXT) * num_4x4_h);
|
||||
break;
|
||||
case TX_8X8:
|
||||
for (i = 0; i < num_4x4_w; i += 2)
|
||||
args->ctx->ta[plane][i] = !!*(uint16_t *)&pd->above_context[i];
|
||||
for (i = 0; i < num_4x4_h; i += 2)
|
||||
args->ctx->tl[plane][i] = !!*(uint16_t *)&pd->left_context[i];
|
||||
break;
|
||||
case TX_16X16:
|
||||
for (i = 0; i < num_4x4_w; i += 4)
|
||||
args->ctx->ta[plane][i] = !!*(uint32_t *)&pd->above_context[i];
|
||||
for (i = 0; i < num_4x4_h; i += 4)
|
||||
args->ctx->tl[plane][i] = !!*(uint32_t *)&pd->left_context[i];
|
||||
break;
|
||||
case TX_32X32:
|
||||
for (i = 0; i < num_4x4_w; i += 8)
|
||||
args->ctx->ta[plane][i] = !!*(uint64_t *)&pd->above_context[i];
|
||||
for (i = 0; i < num_4x4_h; i += 8)
|
||||
args->ctx->tl[plane][i] = !!*(uint64_t *)&pd->left_context[i];
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
vp9_get_entropy_contexts(tx_size, args->ctx->ta[plane], args->ctx->tl[plane],
|
||||
pd->above_context, pd->left_context,
|
||||
num_4x4_w, num_4x4_h);
|
||||
}
|
||||
|
||||
void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
@@ -504,6 +455,14 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||
uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block,
|
||||
pd->dst.buf, pd->dst.stride);
|
||||
|
||||
// TODO(jingning): per transformed block zero forcing only enabled for
|
||||
// luma component. will integrate chroma components as well.
|
||||
if (x->zcoeff_blk[tx_size][block] && plane == 0) {
|
||||
pd->eobs[block] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
|
||||
|
||||
if (x->optimize)
|
||||
|
@@ -314,44 +314,34 @@ void vp9_build_nmv_cost_table(int *mvjoint,
|
||||
build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp);
|
||||
}
|
||||
|
||||
void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int_mv *best_ref_mv, int_mv *second_best_ref_mv) {
|
||||
static void inc_mvs(int_mv mv[2], int_mv ref[2], int is_compound,
|
||||
nmv_context_counts *counts) {
|
||||
int i;
|
||||
for (i = 0; i < 1 + is_compound; ++i) {
|
||||
const MV diff = { mv[i].as_mv.row - ref[i].as_mv.row,
|
||||
mv[i].as_mv.col - ref[i].as_mv.col };
|
||||
vp9_inc_mv(&diff, counts);
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]) {
|
||||
MODE_INFO *mi = x->e_mbd.mi_8x8[0];
|
||||
MB_MODE_INFO *const mbmi = &mi->mbmi;
|
||||
MV diff;
|
||||
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
|
||||
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
|
||||
int idx, idy;
|
||||
const int is_compound = has_second_ref(mbmi);
|
||||
|
||||
if (mbmi->sb_type < BLOCK_8X8) {
|
||||
PARTITION_INFO *pi = x->partition_info;
|
||||
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
|
||||
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
|
||||
const int i = idy * 2 + idx;
|
||||
if (pi->bmi[i].mode == NEWMV) {
|
||||
diff.row = mi->bmi[i].as_mv[0].as_mv.row - best_ref_mv->as_mv.row;
|
||||
diff.col = mi->bmi[i].as_mv[0].as_mv.col - best_ref_mv->as_mv.col;
|
||||
vp9_inc_mv(&diff, &cpi->NMVcount);
|
||||
const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi->sb_type];
|
||||
const int num_4x4_h = num_4x4_blocks_high_lookup[mbmi->sb_type];
|
||||
int idx, idy;
|
||||
|
||||
if (mi->mbmi.ref_frame[1] > INTRA_FRAME) {
|
||||
diff.row = mi->bmi[i].as_mv[1].as_mv.row -
|
||||
second_best_ref_mv->as_mv.row;
|
||||
diff.col = mi->bmi[i].as_mv[1].as_mv.col -
|
||||
second_best_ref_mv->as_mv.col;
|
||||
vp9_inc_mv(&diff, &cpi->NMVcount);
|
||||
}
|
||||
}
|
||||
for (idy = 0; idy < 2; idy += num_4x4_h) {
|
||||
for (idx = 0; idx < 2; idx += num_4x4_w) {
|
||||
const int i = idy * 2 + idx;
|
||||
if (x->partition_info->bmi[i].mode == NEWMV)
|
||||
inc_mvs(mi->bmi[i].as_mv, best_ref_mv, is_compound, &cpi->NMVcount);
|
||||
}
|
||||
}
|
||||
} else if (mbmi->mode == NEWMV) {
|
||||
diff.row = mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row;
|
||||
diff.col = mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col;
|
||||
vp9_inc_mv(&diff, &cpi->NMVcount);
|
||||
|
||||
if (mbmi->ref_frame[1] > INTRA_FRAME) {
|
||||
diff.row = mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row;
|
||||
diff.col = mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col;
|
||||
vp9_inc_mv(&diff, &cpi->NMVcount);
|
||||
}
|
||||
inc_mvs(mbmi->mv, best_ref_mv, is_compound, &cpi->NMVcount);
|
||||
}
|
||||
}
|
||||
|
@@ -25,7 +25,7 @@ void vp9_build_nmv_cost_table(int *mvjoint,
|
||||
int usehp,
|
||||
int mvc_flag_v,
|
||||
int mvc_flag_h);
|
||||
void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int_mv *best_ref_mv, int_mv *second_best_ref_mv);
|
||||
|
||||
void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]);
|
||||
|
||||
#endif // VP9_ENCODER_VP9_ENCODEMV_H_
|
||||
|
@@ -534,10 +534,11 @@ void vp9_first_pass(VP9_COMP *cpi) {
|
||||
recon_yoffset = (mb_row * recon_y_stride * 16);
|
||||
recon_uvoffset = (mb_row * recon_uv_stride * 8);
|
||||
|
||||
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
|
||||
x->mv_row_min = -((mb_row * 16) + (VP9BORDERINPIXELS - 8));
|
||||
// Set up limit values for motion vectors to prevent them extending
|
||||
// outside the UMV borders
|
||||
x->mv_row_min = -((mb_row * 16) + BORDER_MV_PIXELS_B16);
|
||||
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
|
||||
+ (VP9BORDERINPIXELS - 8);
|
||||
+ BORDER_MV_PIXELS_B16;
|
||||
|
||||
// for each macroblock col in image
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
|
||||
@@ -583,9 +584,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
|
||||
intra_error += (int64_t)this_error;
|
||||
|
||||
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
|
||||
x->mv_col_min = -((mb_col * 16) + (VP9BORDERINPIXELS - 8));
|
||||
x->mv_col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);
|
||||
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
|
||||
+ (VP9BORDERINPIXELS - 8);
|
||||
+ BORDER_MV_PIXELS_B16;
|
||||
|
||||
// Other than for the first frame do a motion search
|
||||
if (cm->current_video_frame > 0) {
|
||||
@@ -660,8 +661,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
|
||||
neutral_count++;
|
||||
}
|
||||
|
||||
mv.as_mv.row <<= 3;
|
||||
mv.as_mv.col <<= 3;
|
||||
mv.as_mv.row *= 8;
|
||||
mv.as_mv.col *= 8;
|
||||
this_error = motion_error;
|
||||
vp9_set_mbmode_and_mvs(x, NEWMV, &mv);
|
||||
xd->this_mi->mbmi.tx_size = TX_4X4;
|
||||
@@ -2093,14 +2094,19 @@ void vp9_second_pass(VP9_COMP *cpi) {
|
||||
cpi->twopass.est_max_qcorrection_factor = 1.0;
|
||||
|
||||
// Set a cq_level in constrained quality mode.
|
||||
// Commenting this code out for now since it does not seem to be
|
||||
// working well.
|
||||
/*
|
||||
if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
|
||||
int est_cq = estimate_cq(cpi, &cpi->twopass.total_left_stats,
|
||||
section_target_bandwidth);
|
||||
section_target_bandwidth);
|
||||
|
||||
cpi->cq_target_quality = cpi->oxcf.cq_level;
|
||||
if (est_cq > cpi->cq_target_quality)
|
||||
cpi->cq_target_quality = est_cq;
|
||||
else
|
||||
cpi->cq_target_quality = cpi->oxcf.cq_level;
|
||||
}
|
||||
*/
|
||||
|
||||
// guess at maxq needed in 2nd pass
|
||||
cpi->twopass.maxq_max_limit = cpi->worst_quality;
|
||||
|
@@ -10,14 +10,17 @@
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include <vpx_mem/vpx_mem.h>
|
||||
#include <vp9/encoder/vp9_encodeintra.h>
|
||||
#include <vp9/encoder/vp9_rdopt.h>
|
||||
#include <vp9/common/vp9_blockd.h>
|
||||
#include <vp9/common/vp9_reconinter.h>
|
||||
#include <vp9/common/vp9_reconintra.h>
|
||||
#include <vp9/common/vp9_systemdependent.h>
|
||||
#include <vp9/encoder/vp9_segmentation.h>
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vp9/encoder/vp9_encodeintra.h"
|
||||
#include "vp9/encoder/vp9_rdopt.h"
|
||||
#include "vp9/encoder/vp9_segmentation.h"
|
||||
#include "vp9/encoder/vp9_mcomp.h"
|
||||
#include "vp9/common/vp9_blockd.h"
|
||||
#include "vp9/common/vp9_reconinter.h"
|
||||
#include "vp9/common/vp9_reconintra.h"
|
||||
#include "vp9/common/vp9_systemdependent.h"
|
||||
|
||||
|
||||
|
||||
static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
|
||||
int_mv *ref_mv,
|
||||
@@ -46,9 +49,9 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
|
||||
ref_full.as_mv.row = ref_mv->as_mv.row >> 3;
|
||||
|
||||
/*cpi->sf.search_method == HEX*/
|
||||
best_err = vp9_hex_search(x, &ref_full, step_param, x->errorperbit,
|
||||
best_err = vp9_hex_search(x, &ref_full.as_mv, step_param, x->errorperbit,
|
||||
0, &v_fn_ptr,
|
||||
0, ref_mv, dst_mv);
|
||||
0, &ref_mv->as_mv, &dst_mv->as_mv);
|
||||
|
||||
// Try sub-pixel MC
|
||||
// if (bestsme > error_thresh && bestsme < INT_MAX)
|
||||
@@ -57,7 +60,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
|
||||
unsigned int sse;
|
||||
best_err = cpi->find_fractional_mv_step(
|
||||
x,
|
||||
dst_mv, ref_mv,
|
||||
&dst_mv->as_mv, &ref_mv->as_mv,
|
||||
x->errorperbit, &v_fn_ptr,
|
||||
0, cpi->sf.subpel_iters_per_step, NULL, NULL,
|
||||
& distortion, &sse);
|
||||
@@ -246,9 +249,8 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
|
||||
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
|
||||
arf_top_mv.as_int = 0;
|
||||
gld_top_mv.as_int = 0;
|
||||
x->mv_row_min = -(VP9BORDERINPIXELS - 8 - VP9_INTERP_EXTEND);
|
||||
x->mv_row_max = (cm->mb_rows - 1) * 8 + VP9BORDERINPIXELS
|
||||
- 8 - VP9_INTERP_EXTEND;
|
||||
x->mv_row_min = -BORDER_MV_PIXELS_B16;
|
||||
x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
|
||||
xd->up_available = 0;
|
||||
xd->plane[0].dst.stride = buf->y_stride;
|
||||
xd->plane[0].pre[0].stride = buf->y_stride;
|
||||
@@ -267,9 +269,8 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
|
||||
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
|
||||
arf_left_mv.as_int = arf_top_mv.as_int;
|
||||
gld_left_mv.as_int = gld_top_mv.as_int;
|
||||
x->mv_col_min = -(VP9BORDERINPIXELS - 8 - VP9_INTERP_EXTEND);
|
||||
x->mv_col_max = (cm->mb_cols - 1) * 8 + VP9BORDERINPIXELS
|
||||
- 8 - VP9_INTERP_EXTEND;
|
||||
x->mv_col_min = -BORDER_MV_PIXELS_B16;
|
||||
x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
|
||||
xd->left_available = 0;
|
||||
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -22,10 +22,14 @@
|
||||
#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1)
|
||||
// Maximum size of the first step in full pel units
|
||||
#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1))
|
||||
// Allowed motion vector pixel distance outside image border
|
||||
// for Block_16x16
|
||||
#define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND)
|
||||
|
||||
|
||||
void vp9_clamp_mv_min_max(MACROBLOCK *x, MV *mv);
|
||||
int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost,
|
||||
int *mvcost[2], int weight);
|
||||
int vp9_mv_bit_cost(const MV *mv, const MV *ref,
|
||||
const int *mvjcost, int *mvcost[2], int weight);
|
||||
void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
|
||||
void vp9_init3smotion_compensation(MACROBLOCK *x, int stride);
|
||||
|
||||
@@ -40,37 +44,36 @@ int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int_mv *ref_mv, int_mv *dst_mv);
|
||||
|
||||
int vp9_hex_search(MACROBLOCK *x,
|
||||
int_mv *ref_mv,
|
||||
MV *ref_mv,
|
||||
int search_param,
|
||||
int error_per_bit,
|
||||
int do_init_search,
|
||||
const vp9_variance_fn_ptr_t *vf,
|
||||
int use_mvcost,
|
||||
int_mv *center_mv,
|
||||
int_mv *best_mv);
|
||||
const MV *center_mv,
|
||||
MV *best_mv);
|
||||
int vp9_bigdia_search(MACROBLOCK *x,
|
||||
int_mv *ref_mv,
|
||||
MV *ref_mv,
|
||||
int search_param,
|
||||
int error_per_bit,
|
||||
int do_init_search,
|
||||
const vp9_variance_fn_ptr_t *vf,
|
||||
int use_mvcost,
|
||||
int_mv *center_mv,
|
||||
int_mv *best_mv);
|
||||
const MV *center_mv,
|
||||
MV *best_mv);
|
||||
int vp9_square_search(MACROBLOCK *x,
|
||||
int_mv *ref_mv,
|
||||
MV *ref_mv,
|
||||
int search_param,
|
||||
int error_per_bit,
|
||||
int do_init_search,
|
||||
const vp9_variance_fn_ptr_t *vf,
|
||||
int use_mvcost,
|
||||
int_mv *center_mv,
|
||||
int_mv *best_mv);
|
||||
const MV *center_mv,
|
||||
MV *best_mv);
|
||||
|
||||
typedef int (fractional_mv_step_fp) (
|
||||
MACROBLOCK *x,
|
||||
int_mv *bestmv,
|
||||
int_mv *ref_mv,
|
||||
MV *bestmv, const MV *ref_mv,
|
||||
int error_per_bit,
|
||||
const vp9_variance_fn_ptr_t *vfp,
|
||||
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
|
||||
@@ -84,7 +87,7 @@ extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
|
||||
|
||||
typedef int (fractional_mv_step_comp_fp) (
|
||||
MACROBLOCK *x,
|
||||
int_mv *bestmv, int_mv *ref_mv,
|
||||
MV *bestmv, const MV *ref_mv,
|
||||
int error_per_bit,
|
||||
const vp9_variance_fn_ptr_t *vfp,
|
||||
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
|
||||
|
@@ -122,6 +122,8 @@ static int kf_high_motion_minq[QINDEX_RANGE];
|
||||
static int gf_low_motion_minq[QINDEX_RANGE];
|
||||
static int gf_high_motion_minq[QINDEX_RANGE];
|
||||
static int inter_minq[QINDEX_RANGE];
|
||||
static int afq_low_motion_minq[QINDEX_RANGE];
|
||||
static int afq_high_motion_minq[QINDEX_RANGE];
|
||||
|
||||
static INLINE void Scale2Ratio(int mode, int *hr, int *hs) {
|
||||
switch (mode) {
|
||||
@@ -193,22 +195,52 @@ static void init_minq_luts(void) {
|
||||
gf_low_motion_minq[i] = calculate_minq_index(maxq,
|
||||
0.0000015,
|
||||
-0.0009,
|
||||
0.33,
|
||||
0.32,
|
||||
0.0);
|
||||
gf_high_motion_minq[i] = calculate_minq_index(maxq,
|
||||
0.0000021,
|
||||
-0.00125,
|
||||
0.45,
|
||||
0.50,
|
||||
0.0);
|
||||
inter_minq[i] = calculate_minq_index(maxq,
|
||||
0.00000271,
|
||||
-0.00113,
|
||||
0.697,
|
||||
0.75,
|
||||
0.0);
|
||||
|
||||
afq_low_motion_minq[i] = calculate_minq_index(maxq,
|
||||
0.0000015,
|
||||
-0.0009,
|
||||
0.33,
|
||||
0.0);
|
||||
afq_high_motion_minq[i] = calculate_minq_index(maxq,
|
||||
0.0000021,
|
||||
-0.00125,
|
||||
0.55,
|
||||
0.0);
|
||||
}
|
||||
}
|
||||
|
||||
static int get_active_quality(int q,
|
||||
int gfu_boost,
|
||||
int low,
|
||||
int high,
|
||||
int *low_motion_minq,
|
||||
int *high_motion_minq) {
|
||||
int active_best_quality;
|
||||
if (gfu_boost > high) {
|
||||
active_best_quality = low_motion_minq[q];
|
||||
} else if (gfu_boost < low) {
|
||||
active_best_quality = high_motion_minq[q];
|
||||
} else {
|
||||
const int gap = high - low;
|
||||
const int offset = high - gfu_boost;
|
||||
const int qdiff = high_motion_minq[q] - low_motion_minq[q];
|
||||
const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
|
||||
active_best_quality = low_motion_minq[q] + adjustment;
|
||||
}
|
||||
return active_best_quality;
|
||||
}
|
||||
|
||||
static void set_mvcost(MACROBLOCK *mb) {
|
||||
if (mb->e_mbd.allow_high_precision_mv) {
|
||||
mb->mvcost = mb->nmvcost_hp;
|
||||
@@ -565,16 +597,16 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode) {
|
||||
sf->thresh_mult[THR_NEARESTG] = 0;
|
||||
sf->thresh_mult[THR_NEARESTA] = 0;
|
||||
|
||||
sf->thresh_mult[THR_NEWMV] += 1000;
|
||||
sf->thresh_mult[THR_COMP_NEARESTLA] += 1000;
|
||||
sf->thresh_mult[THR_NEARMV] += 1000;
|
||||
sf->thresh_mult[THR_COMP_NEARESTGA] += 1000;
|
||||
|
||||
sf->thresh_mult[THR_DC] += 1000;
|
||||
|
||||
sf->thresh_mult[THR_NEWG] += 1000;
|
||||
sf->thresh_mult[THR_NEWMV] += 1000;
|
||||
sf->thresh_mult[THR_NEWA] += 1000;
|
||||
sf->thresh_mult[THR_NEWG] += 1000;
|
||||
|
||||
sf->thresh_mult[THR_NEARMV] += 1000;
|
||||
sf->thresh_mult[THR_NEARA] += 1000;
|
||||
sf->thresh_mult[THR_COMP_NEARESTLA] += 1000;
|
||||
sf->thresh_mult[THR_COMP_NEARESTGA] += 1000;
|
||||
|
||||
sf->thresh_mult[THR_TM] += 1000;
|
||||
|
||||
@@ -606,28 +638,6 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode) {
|
||||
sf->thresh_mult[THR_D207_PRED] += 2500;
|
||||
sf->thresh_mult[THR_D63_PRED] += 2500;
|
||||
|
||||
if (cpi->sf.skip_lots_of_modes) {
|
||||
for (i = 0; i < MAX_MODES; ++i)
|
||||
sf->thresh_mult[i] = INT_MAX;
|
||||
|
||||
sf->thresh_mult[THR_DC] = 2000;
|
||||
sf->thresh_mult[THR_TM] = 2000;
|
||||
sf->thresh_mult[THR_NEWMV] = 4000;
|
||||
sf->thresh_mult[THR_NEWG] = 4000;
|
||||
sf->thresh_mult[THR_NEWA] = 4000;
|
||||
sf->thresh_mult[THR_NEARESTMV] = 0;
|
||||
sf->thresh_mult[THR_NEARESTG] = 0;
|
||||
sf->thresh_mult[THR_NEARESTA] = 0;
|
||||
sf->thresh_mult[THR_NEARMV] = 2000;
|
||||
sf->thresh_mult[THR_NEARG] = 2000;
|
||||
sf->thresh_mult[THR_NEARA] = 2000;
|
||||
sf->thresh_mult[THR_COMP_NEARESTLA] = 2000;
|
||||
sf->thresh_mult[THR_SPLITMV] = 2500;
|
||||
sf->thresh_mult[THR_SPLITG] = 2500;
|
||||
sf->thresh_mult[THR_SPLITA] = 2500;
|
||||
sf->recode_loop = 0;
|
||||
}
|
||||
|
||||
/* disable frame modes if flags not set */
|
||||
if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
|
||||
sf->thresh_mult[THR_NEWMV ] = INT_MAX;
|
||||
@@ -714,8 +724,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
||||
sf->adaptive_motion_search = 0;
|
||||
sf->use_avoid_tested_higherror = 0;
|
||||
sf->reference_masking = 0;
|
||||
sf->skip_lots_of_modes = 0;
|
||||
sf->partition_by_variance = 0;
|
||||
sf->use_one_partition_size_always = 0;
|
||||
sf->less_rectangular_check = 0;
|
||||
sf->use_square_partition_only = 0;
|
||||
@@ -796,7 +804,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
||||
sf->intra_y_mode_mask = INTRA_DC_TM_H_V;
|
||||
sf->intra_uv_mode_mask = INTRA_DC_TM_H_V;
|
||||
sf->use_fast_coef_updates = 1;
|
||||
sf->mode_skip_start = 9;
|
||||
sf->mode_skip_start = 11;
|
||||
}
|
||||
if (speed == 2) {
|
||||
sf->less_rectangular_check = 1;
|
||||
@@ -835,11 +843,15 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
||||
sf->disable_split_var_thresh = 32;
|
||||
sf->disable_filter_search_var_thresh = 32;
|
||||
sf->use_fast_coef_updates = 2;
|
||||
sf->mode_skip_start = 9;
|
||||
sf->mode_skip_start = 6;
|
||||
}
|
||||
if (speed == 3) {
|
||||
sf->less_rectangular_check = 1;
|
||||
sf->use_square_partition_only = 1;
|
||||
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
|
||||
sf->partition_by_variance = 1;
|
||||
sf->use_lastframe_partitioning = 1;
|
||||
sf->adjust_partitioning_from_last_frame = 1;
|
||||
sf->last_partitioning_redo_frequency = 3;
|
||||
sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME ||
|
||||
cpi->common.intra_only ||
|
||||
cpi->common.show_frame == 0) ?
|
||||
@@ -851,19 +863,25 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
||||
FLAG_SKIP_COMP_REFMISMATCH |
|
||||
FLAG_SKIP_INTRA_LOWVAR |
|
||||
FLAG_EARLY_TERMINATE;
|
||||
sf->intra_y_mode_mask = INTRA_DC_ONLY;
|
||||
sf->intra_uv_mode_mask = INTRA_DC_ONLY;
|
||||
sf->use_uv_intra_rd_estimate = 1;
|
||||
sf->use_rd_breakout = 1;
|
||||
sf->skip_encode_sb = 1;
|
||||
sf->use_lp32x32fdct = 1;
|
||||
sf->adaptive_motion_search = 1;
|
||||
sf->using_small_partition_info = 0;
|
||||
sf->disable_splitmv = 1;
|
||||
sf->auto_mv_step_size = 1;
|
||||
sf->search_method = BIGDIA;
|
||||
sf->subpel_iters_per_step = 1;
|
||||
sf->use_fast_lpf_pick = 1;
|
||||
sf->auto_min_max_partition_size = 1;
|
||||
sf->auto_min_max_partition_interval = 2;
|
||||
sf->disable_split_var_thresh = 64;
|
||||
sf->disable_filter_search_var_thresh = 64;
|
||||
sf->intra_y_mode_mask = INTRA_DC_ONLY;
|
||||
sf->intra_uv_mode_mask = INTRA_DC_ONLY;
|
||||
sf->use_fast_coef_updates = 2;
|
||||
sf->mode_skip_start = 9;
|
||||
sf->mode_skip_start = 6;
|
||||
}
|
||||
if (speed == 4) {
|
||||
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
|
||||
@@ -895,7 +913,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
||||
sf->intra_y_mode_mask = INTRA_DC_ONLY;
|
||||
sf->intra_uv_mode_mask = INTRA_DC_ONLY;
|
||||
sf->use_fast_coef_updates = 2;
|
||||
sf->mode_skip_start = 9;
|
||||
sf->mode_skip_start = 6;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -2714,18 +2732,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
|
||||
double q_val;
|
||||
|
||||
// Baseline value derived from cpi->active_worst_quality and kf boost
|
||||
if (cpi->kf_boost > high) {
|
||||
cpi->active_best_quality = kf_low_motion_minq[q];
|
||||
} else if (cpi->kf_boost < low) {
|
||||
cpi->active_best_quality = kf_high_motion_minq[q];
|
||||
} else {
|
||||
const int gap = high - low;
|
||||
const int offset = high - cpi->kf_boost;
|
||||
const int qdiff = kf_high_motion_minq[q] - kf_low_motion_minq[q];
|
||||
const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
|
||||
|
||||
cpi->active_best_quality = kf_low_motion_minq[q] + adjustment;
|
||||
}
|
||||
cpi->active_best_quality = get_active_quality(q, cpi->kf_boost,
|
||||
low, high,
|
||||
kf_low_motion_minq,
|
||||
kf_high_motion_minq);
|
||||
|
||||
// Allow somewhat lower kf minq with small image formats.
|
||||
if ((cm->width * cm->height) <= (352 * 288)) {
|
||||
@@ -2760,47 +2770,48 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
|
||||
q = cpi->avg_frame_qindex;
|
||||
}
|
||||
// For constrained quality dont allow Q less than the cq level
|
||||
if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
|
||||
q < cpi->cq_target_quality) {
|
||||
q = cpi->cq_target_quality;
|
||||
}
|
||||
if (cpi->gfu_boost > high) {
|
||||
cpi->active_best_quality = gf_low_motion_minq[q];
|
||||
} else if (cpi->gfu_boost < low) {
|
||||
cpi->active_best_quality = gf_high_motion_minq[q];
|
||||
} else {
|
||||
const int gap = high - low;
|
||||
const int offset = high - cpi->gfu_boost;
|
||||
const int qdiff = gf_high_motion_minq[q] - gf_low_motion_minq[q];
|
||||
const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
|
||||
|
||||
cpi->active_best_quality = gf_low_motion_minq[q] + adjustment;
|
||||
}
|
||||
|
||||
// Constrained quality use slightly lower active best.
|
||||
if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
|
||||
if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
|
||||
if (q < cpi->cq_target_quality)
|
||||
q = cpi->cq_target_quality;
|
||||
if (cpi->frames_since_key > 1) {
|
||||
cpi->active_best_quality = get_active_quality(q, cpi->gfu_boost,
|
||||
low, high,
|
||||
afq_low_motion_minq,
|
||||
afq_high_motion_minq);
|
||||
} else {
|
||||
cpi->active_best_quality = get_active_quality(q, cpi->gfu_boost,
|
||||
low, high,
|
||||
gf_low_motion_minq,
|
||||
gf_high_motion_minq);
|
||||
}
|
||||
// Constrained quality use slightly lower active best.
|
||||
cpi->active_best_quality = cpi->active_best_quality * 15 / 16;
|
||||
|
||||
// TODO(debargha): Refine the logic below
|
||||
if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
|
||||
} else if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
|
||||
if (!cpi->refresh_alt_ref_frame) {
|
||||
cpi->active_best_quality = cpi->cq_target_quality;
|
||||
} else {
|
||||
if (cpi->frames_since_key > 1) {
|
||||
if (cpi->gfu_boost > high) {
|
||||
cpi->active_best_quality = cpi->cq_target_quality * 6 / 16;
|
||||
} else if (cpi->gfu_boost < low) {
|
||||
cpi->active_best_quality = cpi->cq_target_quality * 11 / 16;
|
||||
} else {
|
||||
const int gap = high - low;
|
||||
const int offset = high - cpi->gfu_boost;
|
||||
const int qdiff = cpi->cq_target_quality * 5 / 16;
|
||||
const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
|
||||
cpi->active_best_quality = cpi->cq_target_quality * 6 / 16
|
||||
+ adjustment;
|
||||
}
|
||||
cpi->active_best_quality = get_active_quality(q, cpi->gfu_boost,
|
||||
low, high,
|
||||
afq_low_motion_minq,
|
||||
afq_high_motion_minq);
|
||||
} else {
|
||||
cpi->active_best_quality = get_active_quality(q, cpi->gfu_boost,
|
||||
low, high,
|
||||
gf_low_motion_minq,
|
||||
gf_high_motion_minq);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!cpi->refresh_alt_ref_frame) {
|
||||
cpi->active_best_quality = inter_minq[q];
|
||||
} else {
|
||||
cpi->active_best_quality = get_active_quality(q, cpi->gfu_boost,
|
||||
low, high,
|
||||
gf_low_motion_minq,
|
||||
gf_high_motion_minq);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
|
||||
@@ -3285,7 +3296,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
|
||||
// in this frame.
|
||||
// update_base_skip_probs(cpi);
|
||||
|
||||
#if CONFIG_INTERNAL_STATS
|
||||
#if 0 // CONFIG_INTERNAL_STATS
|
||||
{
|
||||
FILE *f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w");
|
||||
int recon_err;
|
||||
|
@@ -61,16 +61,11 @@
|
||||
#define INTRA_ZBIN_BOOST 0
|
||||
|
||||
typedef struct {
|
||||
nmv_context nmvc;
|
||||
int nmvjointcost[MV_JOINTS];
|
||||
int nmvcosts[2][MV_VALS];
|
||||
int nmvcosts_hp[2][MV_VALS];
|
||||
|
||||
vp9_prob segment_pred_probs[PREDICTION_PROBS];
|
||||
vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
|
||||
vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS];
|
||||
vp9_prob single_ref_prob[REF_CONTEXTS][2];
|
||||
vp9_prob comp_ref_prob[REF_CONTEXTS];
|
||||
|
||||
unsigned char *last_frame_seg_map_copy;
|
||||
|
||||
@@ -79,20 +74,8 @@ typedef struct {
|
||||
// 0 = ZERO_MV, MV
|
||||
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
|
||||
|
||||
vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES];
|
||||
|
||||
vp9_prob y_mode_prob[4][INTRA_MODES - 1];
|
||||
vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
|
||||
vp9_prob partition_prob[2][NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1];
|
||||
|
||||
vp9_prob switchable_interp_prob[SWITCHABLE_FILTERS + 1]
|
||||
[SWITCHABLE_FILTERS - 1];
|
||||
|
||||
int inter_mode_counts[INTER_MODE_CONTEXTS][INTER_MODES - 1][2];
|
||||
vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
|
||||
|
||||
struct tx_probs tx_probs;
|
||||
vp9_prob mbskip_probs[MBSKIP_CONTEXTS];
|
||||
FRAME_CONTEXT fc;
|
||||
} CODING_CONTEXT;
|
||||
|
||||
typedef struct {
|
||||
@@ -267,8 +250,6 @@ typedef struct {
|
||||
TX_SIZE_SEARCH_METHOD tx_size_search_method;
|
||||
int use_lp32x32fdct;
|
||||
int use_avoid_tested_higherror;
|
||||
int skip_lots_of_modes;
|
||||
int partition_by_variance;
|
||||
int use_one_partition_size_always;
|
||||
int less_rectangular_check;
|
||||
int use_square_partition_only;
|
||||
@@ -339,13 +320,13 @@ typedef struct VP9_COMP {
|
||||
YV12_BUFFER_CONFIG scaled_source;
|
||||
|
||||
unsigned int frames_till_alt_ref_frame;
|
||||
int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref
|
||||
int source_alt_ref_active; // an alt ref frame has been encoded and is usable
|
||||
int source_alt_ref_pending;
|
||||
int source_alt_ref_active;
|
||||
|
||||
int is_src_frame_alt_ref; // source of frame to encode is an exact copy of an alt ref frame
|
||||
int is_src_frame_alt_ref;
|
||||
|
||||
int gold_is_last; // golden frame same as last frame ( short circuit gold searches)
|
||||
int alt_is_last; // Alt reference frame same as last ( short circuit altref search)
|
||||
int gold_is_last; // gold same as last frame ( short circuit gold searches)
|
||||
int alt_is_last; // Alt same as last ( short circuit altref search)
|
||||
int gold_is_alt; // don't do both alt and gold search ( just do gold).
|
||||
|
||||
int scaled_ref_idx[3];
|
||||
@@ -393,8 +374,7 @@ typedef struct VP9_COMP {
|
||||
int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
|
||||
|
||||
int64_t rd_comp_pred_diff[NB_PREDICTION_TYPES];
|
||||
// FIXME(rbultje) int64_t?
|
||||
int rd_prediction_type_threshes[4][NB_PREDICTION_TYPES];
|
||||
int64_t rd_prediction_type_threshes[4][NB_PREDICTION_TYPES];
|
||||
unsigned int intra_inter_count[INTRA_INTER_CONTEXTS][2];
|
||||
unsigned int comp_inter_count[COMP_INTER_CONTEXTS][2];
|
||||
unsigned int single_ref_count[REF_CONTEXTS][2][2];
|
||||
@@ -424,14 +404,14 @@ typedef struct VP9_COMP {
|
||||
double gf_rate_correction_factor;
|
||||
|
||||
unsigned int frames_since_golden;
|
||||
int frames_till_gf_update_due; // Count down till next GF
|
||||
int frames_till_gf_update_due; // Count down till next GF
|
||||
|
||||
int gf_overspend_bits; // Total bits overspent becasue of GF boost (cumulative)
|
||||
int gf_overspend_bits; // cumulative bits overspent because of GF boost
|
||||
|
||||
int non_gf_bitrate_adjustment; // Used in the few frames following a GF to recover the extra bits spent in that GF
|
||||
int non_gf_bitrate_adjustment; // Following GF to recover extra bits spent
|
||||
|
||||
int kf_overspend_bits; // Extra bits spent on key frames that need to be recovered on inter frames
|
||||
int kf_bitrate_adjustment; // Current number of bit s to try and recover on each inter frame.
|
||||
int kf_overspend_bits; // Bits spent on key frames to be recovered on inters
|
||||
int kf_bitrate_adjustment; // number of bits to recover on each inter frame.
|
||||
int max_gf_interval;
|
||||
int baseline_gf_interval;
|
||||
int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames
|
||||
@@ -439,9 +419,9 @@ typedef struct VP9_COMP {
|
||||
|
||||
int64_t key_frame_count;
|
||||
int prior_key_frame_distance[KEY_FRAME_CONTEXT];
|
||||
int per_frame_bandwidth; // Current section per frame bandwidth target
|
||||
int av_per_frame_bandwidth; // Average frame size target for clip
|
||||
int min_frame_bandwidth; // Minimum allocation that should be used for any frame
|
||||
int per_frame_bandwidth; // Current section per frame bandwidth target
|
||||
int av_per_frame_bandwidth; // Average frame size target for clip
|
||||
int min_frame_bandwidth; // Minimum allocation used for any frame
|
||||
int inter_frame_target;
|
||||
double output_framerate;
|
||||
int64_t last_time_stamp_seen;
|
||||
@@ -537,7 +517,8 @@ typedef struct VP9_COMP {
|
||||
unsigned int max_mv_magnitude;
|
||||
int mv_step_param;
|
||||
|
||||
// Data used for real time conferencing mode to help determine if it would be good to update the gf
|
||||
// Data used for real time conferencing mode to help determine if it
|
||||
// would be good to update the gf
|
||||
int inter_zz_count;
|
||||
int gf_bad_count;
|
||||
int gf_update_recommended;
|
||||
@@ -651,7 +632,7 @@ typedef struct VP9_COMP {
|
||||
unsigned int switchable_interp_count[SWITCHABLE_FILTERS + 1]
|
||||
[SWITCHABLE_FILTERS];
|
||||
|
||||
unsigned int txfm_stepdown_count[TX_SIZES];
|
||||
unsigned int tx_stepdown_count[TX_SIZES];
|
||||
|
||||
int initial_width;
|
||||
int initial_height;
|
||||
@@ -714,9 +695,8 @@ void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x);
|
||||
|
||||
void vp9_set_speed_features(VP9_COMP *cpi);
|
||||
|
||||
extern int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *dest);
|
||||
int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest);
|
||||
|
||||
extern void vp9_alloc_compressor_data(VP9_COMP *cpi);
|
||||
void vp9_alloc_compressor_data(VP9_COMP *cpi);
|
||||
|
||||
#endif // VP9_ENCODER_VP9_ONYX_INT_H_
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user