Compare commits

...

108 Commits

Author SHA1 Message Date
Jingning Han
ac50b75e50 Use balanced model for intra prediction mode coding
This commit replaces the previous table based intra mode model
coding with a more balanced entropy coding system. It reduces the
decoder lookup table size by 1K bytes. The key frame compression
performance is about even on average. There are a few points where
the compression performance is improved by over 5%. Most test
points are fairly close to the lookup table approach.

Change-Id: I47154276c0a6a22ae87de8845bc2d494681b95f6
2015-06-23 16:42:56 -07:00
Jingning Han
81c389e790 Make tx partition entropy coder account for block size
This commit allows the entropy coder for transform block partition
to account for its relative position with respect to the block size.

Change-Id: I2b5019c378bfb58c11b926fa50c0db1933f35852
2015-06-18 21:56:30 +00:00
Jingning Han
0a42a1efd4 Add max_tx_size to MB_MODE_INFO
Refactor the recursive transform block partition to reduce repeated
computation maximum transform block size per block.

Change-Id: Ib408c78dc6923fe7d337dc937e74f2701ac63859
2015-06-18 14:54:49 -07:00
Jingning Han
2aa2ef4094 Make loop filter support variable transform block size
This commit refactors the loop filter implementation to make it
support recursive transform block partition.

Change-Id: Ica2daa9cb54730cff7770ee2c2d7ffdb240ff418
2015-06-16 18:56:47 -07:00
Jingning Han
85c220b2c4 Turn on loop filter
Temporarily use univariate transform size for loop filter.
As compared to VP9 master branch with loop filter turned on, the
compression gains are:

derf  0.671%
mr    0.749%
stdhd 0.886%
hr    1.394%

The encoding speed currently is about 1.3X that of speed 0.

Change-Id: I64788f894e70fde14c5be3159501bedf836e5998
2015-06-16 08:49:13 -07:00
Jingning Han
7cbea06386 Update transform block partition information for intra blocks
If a block is coded in the intra modes, update the transform block
partition information as maximum block size.

Change-Id: I5ea440c700fc887ff2fe84fabde77a9d896d16f4
2015-06-15 15:53:19 -07:00
Jingning Han
a4fd58a761 Refactor tx_block_rd_b() to compute per block rd cost
This commit makes the tx_block_rd_b() compute the rate and
distortion cost per transform block, instead of accumulating these
costs.

Change-Id: Iff5adc4c27cc54f8e6eb3abd95f8d88ba00f462c
2015-06-15 09:08:00 -07:00
Jingning Han
e272e5b8fb Skip redundant flag reset
If the skip flag is already on, there is no need to further check
the all zero block case. This improves encoding speed at no coding
statistics change.

Change-Id: Icab997ca2977e650351a47ff1314def5ac4ecb1d
2015-06-12 11:44:01 -07:00
Jingning Han
5180368403 Allow encoder to force all zero coefficient block
This commit allows the encoder to force all zero quantized
coefficient block per transform block, if that provides better
rate-distortion trade-off.

Change-Id: I5b57b28cccd257ebfaf7c1749dda7be482abc834
2015-06-12 09:18:10 -07:00
Jingning Han
63c0d8df9f Assign largest transform block size to skip block
If a block has all coefficients quantized to zero, the codec will
assume that it uses largest transform block size.

Change-Id: I1a32527e50026e8e4759ad8de474189cd20e89c8
2015-06-11 11:01:44 -07:00
Jingning Han
9ce132ac37 Refactor transform block partition entropy coding
This commit refactors the transform block partition entropy
coding process to improve the encoding speed. There is no change
in the compression statistics.

Change-Id: I237466fd95c1b888df432babfa36e01f74240eef
2015-06-11 09:41:20 -07:00
Jingning Han
9692042493 Refactor transform block partition update process
Unify transform block partition update process used in rate
distortion optimization and encoding stage.

Change-Id: I4e5f2b6d2482c53ceadb7c8743435158f229a82c
2015-06-10 10:01:31 -07:00
Jingning Han
87a0d5436b Account for context information for partition rate estimate
This commit allows the encoder to account for the boundary block
information to estimate the transform block partitiion rate cost
in the rate-distortion optimization scheme.

Change-Id: Idb79cf936d96cdd15bcba27e47318295413a5f5d
2015-06-09 15:53:55 -07:00
Jingning Han
948c6d882e Enable transform block partition entropy coding
Select the probability model for transform block partition coding
conditioned on the neighbor transform block sizes.

Change-Id: Ib701296e59009bad97dbd21d8dcd58bc5e552f39
2015-06-09 12:30:52 -07:00
Jingning Han
79d6b8fc85 Properly handle boundary block rate distortion computation
This commit makes the encoder to properly compute the rate
distortion cost for blocks that partially cover extend pixels.

Change-Id: I44529af6f76925cdc0f6b24a5d190b51b0813983
2015-06-09 11:14:24 -07:00
Jingning Han
b54dd00f53 Align the intra and inter mode cost measurement
This commit aligns the measurement method used to evaluate both
intra and inter modes.

Change-Id: I8071584ce87fa3c5401800363daa0e670de29af5
2015-06-05 11:37:21 -07:00
Jingning Han
3239e22a42 Conditionally use recursive transform block partition search
If the frame header sets to use fixed transform block size, use
the univariate transform block partition search flow.

Change-Id: Ic422ecb6565642cd8ddb96dc67a37109ef3ce90f
2015-06-03 11:14:26 -07:00
Jingning Han
a96f2ca319 Rework the rate and distortion computation pipeline
This allows the encoder to use more precise rate and distortion
costs for mode decision.

Change-Id: I7cfd676a88531a194b9a509375feea8365e5ef12
2015-06-02 23:15:09 -07:00
Jingning Han
0207dcde4a Fix rate estimate issue in transform block partition coding
This commit fixes the over count issue in the recursive transform
block partition rate cost estimation. It improves the compression
performance by about 0.45%.

Change-Id: I01ccda954ed0e120263977472c1c759c3c67170c
2015-06-02 18:51:03 -07:00
Jingning Han
33f05e90fe Enable rate-distortion optimization for transform partition
This commit enables the rate-distortion optimization for recursive
transform block partition for inter mode blocks based on luma
component. The chroma component infers the transform block size
decision from those of luma component.

Change-Id: I907cc52af888a606b718e087e717b189fa505748
2015-06-01 16:50:36 -07:00
Jingning Han
0451c6b6dd Refactor per block rate distortion estimate
Move the rate-distortion estimate function outside the recursion
as an individual operating module.

Change-Id: I662199223c256664bcd312084b3aebffb8a8034b
2015-06-01 12:41:45 -07:00
Jingning Han
d4b8dd76c4 Make chroma component RD estimate support transform partition
This commit makes the rate-distortion estimation of the chroma
components support the recursive transform block partition
inferred from the luma component mode decisions.

Change-Id: I2e038bebf558da406e966015952ad1058bdf4766
2015-06-01 11:15:15 -07:00
Jingning Han
cd4aca5959 Add decoder support to recursive transform block partition
It allows the decoder to recursively parse and use the transform
block size for inter coded blocks.

Change-Id: I12ceea48ab35501ac1a3447142deb2a334eff3b8
2015-05-22 16:45:34 -07:00
Jingning Han
64f3820f80 Refactor bit-stream syntax support to transform partition
Make the bit-stream syntax elelment coding ready to support
variable transform coding block sizes.

Change-Id: I07ae4ab62d1ecd46c4a5ae45702fc14bd1d4b07d
2015-05-22 12:13:29 -07:00
Jingning Han
6fc13b5cc2 Inter block transform coding partition syntax elements
Allocate memory buffer to store the transform coding partition
information of inter prediction mode blocks.

Change-Id: I428b1dd0b26e8eaf24030a833554ceb4479c5551
2015-05-22 10:57:36 -07:00
Jingning Han
df2042dc1e Synchronize encoding process and tokenization handle
The encoding and tokenization process support the recursive
transform block partition coding scheme.

Change-Id: I47283cc6ee9c383059950623ece60a0fcce82e00
2015-05-21 18:51:27 -07:00
Jingning Han
a15cf9a5b7 Synchronize tokenization and detokenization process
Make the encoder and decoder synchronized for recursive
tokenization coding.

Change-Id: I84c5f3dfc3ee9982ab57e658ffe6cb17a949eda2
2015-05-22 01:45:31 +00:00
Jingning Han
bf99a00340 Arrange tokenization order to support recursive txfm block coding
Make the encoder packetize transform block in a recursive order.
Note that the block index with respect to the coding block remains
identical.

Change-Id: I07c6d2017f4f150274aff46c05388a7fd47cd920
2015-05-21 18:43:37 -07:00
Jingning Han
5f6fe83ac5 Syntax coding support for transform block coding
This commit re-designs the bitstream syntax to support recursive
transform block partition. It disables the decoder vector unit
tests.

Change-Id: I6cac24c4f1e44f29ffcc9b87ba1167eeb32d1b69
2015-05-18 15:43:02 -07:00
Jingning Han
208aa6158b Remove get_nonrd_var_based_fixed_partition function
This function has been replaced by other approaches and is not
in use now.

Change-Id: I387f45b5607d202539e482468ccc70e6c0f9341f
2015-04-09 09:49:55 -07:00
Jingning Han
25206e7b7f Compute prediction filter type cost only when needed
Skip redundant prediction filter type cost in filter search loop,
if the rate value will be reset in Hadamard transform based rate
distortion estimate.

Change-Id: Ie5221f4bc8da9461c449df367251aeeac52c6e5d
2015-04-07 12:41:46 -07:00
Jingning Han
9922e4344a Enable Hadamard transform based cost estimate for all block sizes
This commit turns on the Hadamard transform based rate distortion
estimate for all block sizes in RTC coding mode. It conditionally
skips the rate distortion estimation if all zero block flag is set
on. No significant encoding speed change is observed. The
compression performance of speed -6 is improved by 1.7% over using
it only for block sizes of 32x32 and below.

Change-Id: I768145e6f05c737b05b5b5f1ee674e929532cafb
2015-04-04 09:58:45 -07:00
Jingning Han
60e01c6530 Account for eob cost in the RTC mode decision process
This commit accounts for the transform block end of coefficient flag
cost in the RTC mode decision process. This allows a more precise
rate estimate. It also turns on the model to block sizes up to 32x32.
The test sequences shows about 3% - 5% speed penalty for speed -6.
The average compression performance improvement for speed -6 is
1.58% in PSNR. The compression gains for hard clips like jimredvga,
mmmoving, and tacomascmv at low bit-rate range are 1.8%, 2.1%, and
3.2%, respectively.

Change-Id: Ic2ae211888e25a93979eac56b274c6e5ebcc21fb
2015-04-03 10:31:51 -07:00
Jingning Han
657cabe0f7 Tune SSSE3 assembly implementation to improve quantization speed
Change-Id: If0ca8b25b4800d4336e6cbc97194cd9b01c5b5a3
2015-04-01 15:28:01 -07:00
Yaowu Xu
fff4654d36 Merge "Simplify bsize calculation" 2015-04-01 15:06:55 -07:00
Jingning Han
cf4447339e Merge "Optimize quantization simd implementation" 2015-04-01 14:55:18 -07:00
Jingning Han
a4364e5146 Merge "Simplify effective src_diff address computation" 2015-04-01 14:55:03 -07:00
Jingning Han
7acb2a8795 Merge "Refactor block_yrd function for RTC coding mode" 2015-04-01 14:54:24 -07:00
Yaowu Xu
ba91b54d7c Simplify bsize calculation
Change-Id: Ibc514684def9914c66f04cb7931f773e2b79c168
2015-04-01 12:15:06 -07:00
Jingning Han
19da916716 Simplify effective src_diff address computation
Remove redundant offset calculation for effective src_diff address.

Change-Id: I4aab241a36abcef7fd8adf74aed5e12b8b88e0ef
2015-04-01 12:07:47 -07:00
Jingning Han
1470529f62 Refactor block_yrd function for RTC coding mode
This commit separates Hadamard transform/quantization operations
from rate and distortion computation in block_yrd. This allows one
to skip SATD computation when all transform blocks are quantized
to zero. It also uses a new block error function that skips
repeated computation of sum of squared residuals. It reduces the
CPU cycles spent on block error calculation in block_yrd by 40%.

Change-Id: I726acb2454b44af1c3bd95385abecac209959b10
2015-04-01 12:00:43 -07:00
Jingning Han
eed1badedd Optimize quantization simd implementation
This commit allows the quantizer to compare the AC coefficients to
the quantization step size to determine if further multiplication
operations are needed. It makes the quantization process 20% faster
without coding statistics change.

Change-Id: I735aaf6a9c0874c82175bb565b20e131464db64a
2015-04-01 11:47:09 -07:00
Yunqing Wang
a0043c6d30 Enhance the transform skipping decision-making in non-rd mode
For large partition blocks(block_size > 32x32), the variance
calculation is modified so that every 8x8 block's variance
is stored during the calculation, which is used in the
following transform skipping test. Also, the variance for
every tx block is calculated. The skipping test checks all tx
blocks in the partition, and sets the skip flag only if all tx
blocks are skippable. If the skip flag of Y plane is 1, a
quick evaluation is done on UV planes. If the current partition
block is skippable in YUV planes, the mode search checks fewer
inter modes and doesn't check intra modes.

The rtc set borg test(at speed 6) showed that:
Overall psnr: -0.527%; Avg psnr: -0.510%; ssim: -0.573%.
Average single-thread speedup on rtc set was 3.5%.
For 720p clips, more speedups were seen.
gipsrecmotion: 13%
gipsrestat: 12%
vidyo: 5 - 9%
dark: 15%
niklas: 6%

Change-Id: I8d8ebec0cb305f1de016516400bf007c3042666e
2015-04-01 09:43:40 -07:00
Yunqing Wang
fc98114761 Merge "Rename vbp thresholds" 2015-03-31 16:33:30 -07:00
Vignesh Venkatasubramanian
639955f66e Merge "webmdec: Fix read_frame return value for calls after EOS" 2015-03-31 16:11:56 -07:00
Marco
c2b8218eba Merge "Set postproc flags in decoder_get_frame." 2015-03-31 15:22:14 -07:00
Yunqing Wang
c28ff1a9de Rename vbp thresholds
Code refactoring

Change-Id: I410fcce1bc6d95c62c474445f4c97ea8469f1e79
2015-03-31 15:14:44 -07:00
Jingning Han
502ac72233 Merge "Tuning SATD rate calculation for speed" 2015-03-31 14:24:26 -07:00
Jingning Han
1c39c5b96f Merge "Use aligned copy in 8x8 Hadamard transform SSE2" 2015-03-31 12:16:47 -07:00
Jingning Han
fa4289522e Merge "Allow block skip coding option in RTC mode" 2015-03-31 12:16:36 -07:00
Jingning Han
1638d7dc96 Merge "Fix 8x8 Hadamard SSE2 implementation" 2015-03-31 12:16:27 -07:00
Alex Converse
9670d766ab Merge "VP9E_GET_ACTIVE_MAP API function." 2015-03-31 11:52:56 -07:00
Jingning Han
531468a07a Tuning SATD rate calculation for speed
This commit allows the encoder to check the eob per transform
block to decide how to compute the SATD rate cost. If the entire
block is quantized to zero, there is no need to add anything; if
only the DC coefficient is non-zero, add its absolute value;
otherwise, sum over the block. This reduces the CPU cycles spent
on vp9_satd_sse2 to one third.

Change-Id: I0d56044b793b286efc0875fafc0b8bf2d2047e32
2015-03-31 11:02:20 -07:00
hui su
d4f2f1dd5b Merge "Move vp9_coef_con_tree to common/" 2015-03-31 10:51:10 -07:00
Jingning Han
014fa45298 Use aligned copy in 8x8 Hadamard transform SSE2
This reduces the 8x8 Hadamard transform cycles by 20%.

Change-Id: If34c5e02f3afa42244c6efabe121f7cf5d2df41b
2015-03-31 10:21:52 -07:00
Jingning Han
db5ec37edc Merge "Enable 16x16 Hadamard transform in SATD based mode decision" 2015-03-31 09:55:41 -07:00
Jingning Han
8c5670bb6f Merge "Use SATD based mode decision for block sizes below 16x16" 2015-03-31 09:47:47 -07:00
Jingning Han
ebe1be9186 Allow block skip coding option in RTC mode
When the estimated rate-distortion cost of skip coding mode is
lower than that of sending quantized coefficients, allow the
encoder to drop these coefficients. This improves the compression
performance of speed -6 by 0.268% and makes the encoding speed
slightly faster.

Change-Id: Idff2d7ba59f27ead33dd5a0e9f68746ed3c2ab68
2015-03-31 09:32:53 -07:00
hui su
302e24cb3e Move vp9_coef_con_tree to common/
This tree should be defined in common/, as it is needed for
both encoder and decoder.

Change-Id: I4f5cbc80025cf2ced14182c98f7c82dc7d0f87db
2015-03-31 09:20:46 -07:00
Marco
385ca8f741 Set postproc flags in decoder_get_frame.
The postproc settings were not set in decoder_get_frame().

Change-Id: I20d23de3ea18f6df061a53d691d4095d5c62532a
2015-03-30 16:15:57 -07:00
Jingning Han
9b99eb2e12 Merge "Reuse inter prediction pixel block for Hadamard transform" 2015-03-30 16:09:38 -07:00
Jingning Han
34a996ac1e Fix 8x8 Hadamard SSE2 implementation
This commit fixes the SSE2 version 8x8 Hadamard transform
alignment and makes it consistent with the C version.

Change-Id: I1304e5f97e0e5ef2d798fe38081609c39f5bfe74
2015-03-30 15:54:08 -07:00
Jingning Han
26d3d3af6a Enable 16x16 Hadamard transform in SATD based mode decision
This commit replaces the 16x16 2D-DCT transform with Hadamard
transform for RTC coding mode. It reduces the CPU cycles cost
on 16x16 transform by 5X. Overall it makes the speed -6 encoding
speed 1.5% faster without compromise on compression performance.

Change-Id: If6c993831dc4c678d841edc804ff395ed37f2a1b
2015-03-30 15:43:31 -07:00
Jingning Han
f0ac5aaa08 Merge "Hadamard transform based coding mode decision process" 2015-03-30 15:43:15 -07:00
Jingning Han
b4b5af6acd Use SATD based mode decision for block sizes below 16x16
This commit makes the encoder to select between SATD/variance as
metric for mode decision. It also allows to account chroma
component costs for mode decision as well. The overall encoding
time increase as compared to variance based mode selection is about
15% for speed -6. The compression performance is on average 2.2%
better than variance based approach, with about 5% compression
performance gains for hard clips (e.g., jimredvga, nikas720p, and
mmmoving) at lower bit-rate range.

Change-Id: I4d04a31d36f4fcb3f5f491dacd6e7fe44cb9d815
2015-03-30 15:20:07 -07:00
Jingning Han
8a927a1b7a Reuse inter prediction pixel block for Hadamard transform
It saves one unnecessary motion compensated prediction constructed
by using 8-tap filter.

Change-Id: I101215131e6f38621d5935885f94cc74de6a5377
2015-03-30 15:04:33 -07:00
Jingning Han
8c411f74e0 Hadamard transform based coding mode decision process
This commit uses Hadamard transform based rate-distortion cost
estimate for rtc coding mode decision. It improves the compression
performance of speed -6 for many hard clips at lower bit-rates.
For example, 5.5% for jimredvga, 6.7% for mmmoving, 6.1% for
niklas720p. This will introduce extra encoding cycle costs at
this point.

Change-Id: Iaf70634fa2417a705ee29f2456175b981db3d375
2015-03-30 14:46:05 -07:00
Vignesh Venkatasubramanian
1f05b19e69 webmdec: Fix read_frame return value for calls after EOS
webm_read_frame assumes that it won't be called once end of file
is reached. But for frame parallel mode that turns out to be not
true. this patch fixes that behavior by checking for EOS and
returning the appropriate value for subsequent calls.

Change-Id: Ie2fddbe00493a0f96c4172c67be1eb719f0fe8ed
2015-03-30 12:58:26 -07:00
Alex Converse
bf7def9a43 Merge "Simplify skip check." 2015-03-30 11:31:45 -07:00
jackychen
b38b32a794 Merge "vp9_postproc.c: eliminate -Wshadow build warnings." 2015-03-30 10:29:39 -07:00
jackychen
68610ae568 vp9_postproc.c: eliminate -Wshadow build warnings.
Change-Id: I6df525a9ad1ae3cfbba8710d21db8fee76e64dbb
2015-03-27 20:27:30 -07:00
Marco
fa20a60f0d Speed 5: use non-rd mode for key frame coding.
Metrics on RTC set go down by ~1.5% on average.
Key frame encoding time goes down by factor of ~5.

Change-Id: Ia83acc55848613870e5ac6efe7f3d904d877febb
2015-03-27 16:19:26 -07:00
hkuang
0c85718954 Merge "Fix the issue that --limit is not working in --frame-parallel mode." 2015-03-27 10:12:45 -07:00
Adrian Grange
553792cee2 Merge "Remove 8-bit array in HBD" 2015-03-26 16:31:27 -07:00
Adrian Grange
300d428ecd Merge "Replace heap with stack memory allocation" 2015-03-26 16:31:06 -07:00
Adrian Grange
9931110971 Merge "Fix use of scaling in joint motion search" 2015-03-26 16:30:35 -07:00
hkuang
ffafcd6281 Fix the issue that --limit is not working in --frame-parallel mode.
The reason is due to early break out before outputting all the frames inside
decoder.

Change-Id: I4a138fba08d12935c39bd7602c95f8c18b474e29
2015-03-26 15:36:22 -07:00
Johann
46ce6954cc Remove duplicate code from merge
Change-Id: I5e2a1270001b7e29f3f198d57ea40e1efccef367
2015-03-26 14:56:24 -07:00
Adrian Grange
ad18b2b641 Remove 8-bit array in HBD
Creating both 8- and 16-bit arrays and then only using one
of them is wasteful.

Change-Id: Ic5b397c283efaff7bcfff2d2413838ba3e065561
2015-03-25 15:37:03 -07:00
Adrian Grange
65df3d138a Replace heap with stack memory allocation
Replaced the dynamic memory allocation of the
second_pred buffer with an allocation on the stack.

Change-Id: I2716c46b71e8587714ca5733a99eca2c68419b23
2015-03-25 15:36:43 -07:00
Adrian Grange
8d8d7bfde5 Fix use of scaling in joint motion search
To enable us to the scale-invariant motion estimation
code during mode selection, each of the reference
buffers is scaled to match the size of the frame
being encoded.

This fix ensures that a unit scaling factor is used in
this case rather than the one calculated assuming that
the reference frame is not scaled.

Change-Id: Id9a5c85dad402f3a7cc7ea9f30f204edad080ebf
2015-03-25 15:35:29 -07:00
Johann
ba13ff8501 Parall -> Parallel
Change-Id: I565fef382fa17a00d5ae54e980ef14d9f0ad4f55
2015-03-25 12:45:36 -07:00
James Zern
e865be95bf Merge "fix static analysis warnings related to CHECK_MEM_ERROR" 2015-03-24 23:56:04 -07:00
Parag Salasakar
84ec68d21a mips msa configuration patch for MIPS SIMD Arch (MSA) P5600 and I6400
For P5600:
CROSS=$MTI/bin/mips-mti-linux-gnu- CFLAGS='-EL' CXXFLAGS='-EL' LDFLAGS='-EL'\
 ../configure --target=mips32-linux-gcc --cpu=p5600 --enable-msa

For I6400:
CROSS=$IMG/bin/mips-img-linux-gnu- CFLAGS='-EL' CXXFLAGS='-EL' LDFLAGS='-EL'\
 ../configure --target=mips64-linux-gcc --cpu=i6400 --enable-msa

Change-Id: Id25f721ea1f1991d5116e04dba713aebd7378f05
2015-03-24 15:18:38 -07:00
paulwilkins
ab788c5380 Merge "Enable group adaptive max q by default." 2015-03-24 15:00:12 -07:00
Alex Converse
4dcb839607 VP9E_GET_ACTIVE_MAP API function.
This is useful when aq mode 3 (cyclic refresh) reactivates segments for refresh.

Change-Id: I3ad1d9410b899ede393d82bb8db14e2da4d84eca
2015-03-24 11:19:47 -07:00
Alex Converse
a1e20ec58f Refactor fast loop filter code to handle 444.
Change-Id: I921b1ebabdf617049f8fa26fbe462c3ff115c1ce
2015-03-24 11:17:50 -07:00
Yaowu Xu
c77d4dcb35 Merge "vp9_pred_mv(): misc fixes and optimizations" 2015-03-24 10:36:51 -07:00
Alex Converse
02697e35dc Merge "A tiny cyclic refresh / active map fix." 2015-03-24 09:43:24 -07:00
paulwilkins
8ea7bafdaa Merge "Revised rd adjustment for variance." 2015-03-24 03:12:56 -07:00
paulwilkins
c0b71cf82f Merge "Experimental rd bias based on source vs recon variance." 2015-03-24 03:12:41 -07:00
Alex Converse
31f1563a92 A tiny cyclic refresh / active map fix.
Change-Id: I198727461455c8c198a0c892d02ed3cb1673aa50
2015-03-23 18:51:00 -07:00
James Zern
7cc3e70394 Merge "vp8cx.h: vpx/vpx_encoder.h -> ./vpx_encoder.h" 2015-03-23 17:19:52 -07:00
hkuang
9f4f98fdbd Merge "Optimize the intra frame decode to skip some unnecessary copy." 2015-03-23 16:50:37 -07:00
hkuang
cd1d40ff5d Merge "Safely free all the frame buffers after all the workers finish the work." 2015-03-23 16:50:15 -07:00
James Zern
7999c07697 vp8cx.h: vpx/vpx_encoder.h -> ./vpx_encoder.h
this matches the other includes and simplifies include paths in builds
from source

Change-Id: I344902c84f688ef93c9f3a53e7c06c30db49d8d3
2015-03-23 16:07:21 -07:00
Alex Converse
b7605a9d70 Simplify skip check.
SEG_LVL_SKIP implies skip. This is enforced by skip = write_skip().

Change-Id: I61c79581c9c53deae36685c2bcf388cb4d8827d3
2015-03-23 10:53:31 -07:00
hkuang
85107641a4 Optimize the intra frame decode to skip some unnecessary copy.
This speeds up a normal YT style 1080P clip decode by ~1% on nexus 7.

Change-Id: Ied7fa0d8bc941b2adb4db9382f549ee4d5654f3a
2015-03-23 10:11:49 -07:00
Alex Converse
f7bcce91af Merge "Don't apply active map on key frames." 2015-03-23 10:04:39 -07:00
Alex Converse
03177cb7fa Merge "Set loop filter level to zero on inactive segment." 2015-03-23 10:04:29 -07:00
paulwilkins
691ec45b4e Enable group adaptive max q by default.
Set the GF group adaptive max Q compile flag to 1 by default.

This change has a quite big visual impact in some clips and also
contributes to tighter rate control.

For short test clips that have consistent content the impact is
quite small on metrics but for more varied long form clips there is
a drop in overal psnr but a sharp rise in average psnr caused by
greater expenditure on some easier sections and tighter rate clipping
in hard sections.

In chunck'ed encodes some of the effect will already be present due
to the independent rate control in each chunk but this change takes
the control down to a smaller scale.

yt hd +10.67%, - 3.77%, -1.56%
yt +9.654%, - 3.6%, - 1.82%
std hd +0.25%, -0.85%, -0.42%
derf +0.25%, - 1.1%. - 0.87%

Change-Id: Ibbc39b800d99d053939f4c6712d715124082843e
2015-03-23 15:57:09 +00:00
Yaowu Xu
9fd8abc541 vp9_pred_mv(): misc fixes and optimizations
1. skip near if it is same as nearest
2. correct rounding for converting mv to fullpel position
3. update pred_mv_sad after new mv search.

Overall .1%~.25% compression gains on rtc set for speed 5, 6, 7, 8.

Change-Id: Ic300ca53f7da18073771f1bb993c58cde9deee89
2015-03-20 17:17:04 -07:00
Alex Converse
6d6ef8eb3c Don't apply active map on key frames.
This allows applciations to be KF oblivious.

Change-Id: Ic02712eae6ad8d6b3eaec26548299d24ca0d5cc0
2015-03-20 14:57:24 -07:00
Alex Converse
e032fc7b9e Set loop filter level to zero on inactive segment.
Change-Id: I6022a79351882a72a219aee13563bf21bcd70383
2015-03-20 14:43:06 -07:00
paulwilkins
7e234b9228 Revised rd adjustment for variance.
Revised adjustment for rd based on source complexity.
Two cases:

1) Bias against low variance intra predictors
when the actual source variance is higher.

2) When the source variance is very low to give a slight
bias against predictors that might introduce false texture
or features.

The impact on metrics of this change across the test sets is
small and mixed.

derf -0.073%, -0.049%, -0.291%
std hd -0.093%, -0.1%, -0.557%
yt  +0.186%, +0.04%, - 0.074%
ythd +0.625%, + 0.563%, +0.584%

Medium to strong psycho-visual improvements in some
problem clips.

This feature and intra weight on GF group length now
turned on by default.

Change-Id: Idefc8b633a7b7bc56c42dbe19f6b2f872d73851e
2015-03-20 11:59:39 +00:00
paulwilkins
9a1ce7be7d Experimental rd bias based on source vs recon variance.
This experiment biases the rd decision based on the impact
a mode decision has on the relative spatial complexity of the
reconstruction vs the source.

The aim is to better retain a semblance of texture even if it
is slightly misaligned / wrong, rather than use a simple rd
measure that tends to favor use of a flat predictor if a perfect
match can't be found.

This improves the appearance of texture and visual quality
on specific test clips but is hidden under a flag and currently
off by default pending visual quality testing on a wider Yt set.

Change-Id: Idf6e754a8949bf39ed9d314c6f2daaa20c888aad
2015-03-20 11:57:36 +00:00
hkuang
b88dac8938 Safely free all the frame buffers after all the workers finish the work.
Issue: 978

Change-Id: Ia7aa809095008f6819a44d7ecb0329def79b1117
2015-03-19 12:21:00 -07:00
James Zern
3ab1c0227a fix static analysis warnings related to CHECK_MEM_ERROR
mark vpx_internal_error as noreturn under the analyzer

Change-Id: If214a0e740aab9b82cc04f4492eb77a7a07ef7ab
2015-03-18 14:35:49 -07:00
61 changed files with 3342 additions and 932 deletions

View File

@@ -1041,6 +1041,31 @@ EOF
check_add_cflags -mips32r2 -mdspr2 check_add_cflags -mips32r2 -mdspr2
disable_feature fast_unaligned disable_feature fast_unaligned
fi fi
if [ -n "${tune_cpu}" ]; then
case ${tune_cpu} in
p5600)
add_cflags -mips32r5 -funroll-loops -mload-store-pairs
add_cflags -msched-weight -mhard-float
add_asflags -mips32r5 -mhard-float
;;
i6400)
add_cflags -mips64r6 -mabi=64 -funroll-loops -mload-store-pairs
add_cflags -msched-weight -mhard-float
add_asflags -mips64r6 -mabi=64 -mhard-float
add_ldflags -mips64r6 -mabi=64
;;
esac
if enabled msa; then
add_cflags -mmsa -mfp64 -flax-vector-conversions
add_asflags -mmsa -mfp64 -flax-vector-conversions
add_ldflags -mmsa -mfp64 -flax-vector-conversions
disable_feature fast_unaligned
fi
fi
check_add_cflags -march=${tgt_isa} check_add_cflags -march=${tgt_isa}
check_add_asflags -march=${tgt_isa} check_add_asflags -march=${tgt_isa}
check_add_asflags -KPIC check_add_asflags -KPIC

View File

@@ -376,6 +376,10 @@ if ($opts{arch} eq 'x86') {
@ALL_ARCHS = filter("$opts{arch}", qw/dspr2/); @ALL_ARCHS = filter("$opts{arch}", qw/dspr2/);
last; last;
} }
if (/HAVE_MSA=yes/) {
@ALL_ARCHS = filter("$opts{arch}", qw/msa/);
last;
}
} }
close CONFIG_FILE; close CONFIG_FILE;
mips; mips;

2
configure vendored
View File

@@ -258,7 +258,7 @@ ARCH_EXT_LIST="
mips32 mips32
dspr2 dspr2
msa
mips64 mips64
mmx mmx

View File

@@ -21,13 +21,13 @@
namespace { namespace {
const int kLegacyByteAlignment = 0; //const int kLegacyByteAlignment = 0;
const int kLegacyYPlaneByteAlignment = 32; //const int kLegacyYPlaneByteAlignment = 32;
const int kNumPlanesToCheck = 3; //const int kNumPlanesToCheck = 3;
const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm"; //const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
const char kVP9Md5File[] = "vp90-2-02-size-lf-1920x1080.webm.md5"; //const char kVP9Md5File[] = "vp90-2-02-size-lf-1920x1080.webm.md5";
#if CONFIG_WEBM_IO #if CONFIG_WEBM_IO && 0
struct ByteAlignmentTestParam { struct ByteAlignmentTestParam {
int byte_alignment; int byte_alignment;

View File

@@ -398,7 +398,7 @@ TEST_P(ExternalFrameBufferMD5Test, ExtFBMD5Match) {
delete video; delete video;
} }
#if CONFIG_WEBM_IO #if CONFIG_WEBM_IO && 0
TEST_F(ExternalFrameBufferTest, MinFrameBuffers) { TEST_F(ExternalFrameBufferTest, MinFrameBuffers) {
// Minimum number of external frame buffers for VP9 is // Minimum number of external frame buffers for VP9 is
// #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS. // #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS.
@@ -481,8 +481,8 @@ TEST_F(ExternalFrameBufferTest, SetAfterDecode) {
} }
#endif // CONFIG_WEBM_IO #endif // CONFIG_WEBM_IO
VP9_INSTANTIATE_TEST_CASE(ExternalFrameBufferMD5Test, //VP9_INSTANTIATE_TEST_CASE(ExternalFrameBufferMD5Test,
::testing::ValuesIn(libvpx_test::kVP9TestVectors, // ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
libvpx_test::kVP9TestVectors + // libvpx_test::kVP9TestVectors +
libvpx_test::kNumVP9TestVectors)); // libvpx_test::kNumVP9TestVectors));
} // namespace } // namespace

View File

@@ -110,23 +110,23 @@ TEST_P(InvalidFileTest, ReturnCode) {
RunTest(); RunTest();
} }
const DecodeParam kVP9InvalidFileTests[] = { //const DecodeParam kVP9InvalidFileTests[] = {
{1, "invalid-vp90-02-v2.webm"}, // {1, "invalid-vp90-02-v2.webm"},
{1, "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf"}, // {1, "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf"},
{1, "invalid-vp90-03-v3.webm"}, // {1, "invalid-vp90-03-v3.webm"},
{1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf"}, // {1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf"},
{1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf"}, // {1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf"},
{1, "invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf"}, // {1, "invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf"},
{1, "invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf"}, // {1, "invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf"},
{1, "invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf"}, // {1, "invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf"},
{1, "invalid-vp91-2-mixedrefcsp-444to420.ivf"}, // {1, "invalid-vp91-2-mixedrefcsp-444to420.ivf"},
{1, "invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf"}, // {1, "invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf"},
{1, "invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf"}, // {1, "invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf"},
{1, "invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf"}, // {1, "invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf"},
}; //};
VP9_INSTANTIATE_TEST_CASE(InvalidFileTest, //VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
::testing::ValuesIn(kVP9InvalidFileTests)); // ::testing::ValuesIn(kVP9InvalidFileTests));
// This class will include test vectors that are expected to fail // This class will include test vectors that are expected to fail
// peek. However they are still expected to have no fatal failures. // peek. However they are still expected to have no fatal failures.
@@ -142,26 +142,26 @@ TEST_P(InvalidFileInvalidPeekTest, ReturnCode) {
RunTest(); RunTest();
} }
const DecodeParam kVP9InvalidFileInvalidPeekTests[] = { //const DecodeParam kVP9InvalidFileInvalidPeekTests[] = {
{1, "invalid-vp90-01-v2.webm"}, // {1, "invalid-vp90-01-v2.webm"},
}; //};
VP9_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest, //VP9_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest,
::testing::ValuesIn(kVP9InvalidFileInvalidPeekTests)); // ::testing::ValuesIn(kVP9InvalidFileInvalidPeekTests));
const DecodeParam kMultiThreadedVP9InvalidFileTests[] = { //const DecodeParam kMultiThreadedVP9InvalidFileTests[] = {
{4, "invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm"}, // {4, "invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm"},
{4, "invalid-" // {4, "invalid-"
"vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf"}, // "vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf"},
{4, "invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf"}, // {4, "invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf"},
{2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf"}, // {2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf"},
{4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf"}, // {4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf"},
}; //};
INSTANTIATE_TEST_CASE_P( //INSTANTIATE_TEST_CASE_P(
VP9MultiThreaded, InvalidFileTest, // VP9MultiThreaded, InvalidFileTest,
::testing::Combine( // ::testing::Combine(
::testing::Values( // ::testing::Values(
static_cast<const libvpx_test::CodecFactory*>(&libvpx_test::kVP9)), // static_cast<const libvpx_test::CodecFactory*>(&libvpx_test::kVP9)),
::testing::ValuesIn(kMultiThreadedVP9InvalidFileTests))); // ::testing::ValuesIn(kMultiThreadedVP9InvalidFileTests)));
} // namespace } // namespace

View File

@@ -29,7 +29,7 @@ namespace {
enum DecodeMode { enum DecodeMode {
kSerialMode, kSerialMode,
kFrameParallMode kFrameParallelMode
}; };
const int kDecodeMode = 0; const int kDecodeMode = 0;
@@ -95,7 +95,7 @@ TEST_P(TestVectorTest, MD5Match) {
vpx_codec_dec_cfg_t cfg = {0}; vpx_codec_dec_cfg_t cfg = {0};
char str[256]; char str[256];
if (mode == kFrameParallMode) { if (mode == kFrameParallelMode) {
flags |= VPX_CODEC_USE_FRAME_THREADING; flags |= VPX_CODEC_USE_FRAME_THREADING;
} }
@@ -145,28 +145,28 @@ VP8_INSTANTIATE_TEST_CASE(
libvpx_test::kNumVP8TestVectors))); libvpx_test::kNumVP8TestVectors)));
// Test VP9 decode in serial mode with single thread. // Test VP9 decode in serial mode with single thread.
VP9_INSTANTIATE_TEST_CASE( //VP9_INSTANTIATE_TEST_CASE(
TestVectorTest, // TestVectorTest,
::testing::Combine( // ::testing::Combine(
::testing::Values(0), // Serial Mode. // ::testing::Values(0), // Serial Mode.
::testing::Values(1), // Single thread. // ::testing::Values(1), // Single thread.
::testing::ValuesIn(libvpx_test::kVP9TestVectors, // ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
libvpx_test::kVP9TestVectors + // libvpx_test::kVP9TestVectors +
libvpx_test::kNumVP9TestVectors))); // libvpx_test::kNumVP9TestVectors)));
#if CONFIG_VP9_DECODER //#if CONFIG_VP9_DECODER
// Test VP9 decode in frame parallel mode with different number of threads. //// Test VP9 decode in frame parallel mode with different number of threads.
INSTANTIATE_TEST_CASE_P( //INSTANTIATE_TEST_CASE_P(
VP9MultiThreadedFrameParallel, TestVectorTest, // VP9MultiThreadedFrameParallel, TestVectorTest,
::testing::Combine( // ::testing::Combine(
::testing::Values( // ::testing::Values(
static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)), // static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
::testing::Combine( // ::testing::Combine(
::testing::Values(1), // Frame Parallel mode. // ::testing::Values(1), // Frame Parallel mode.
::testing::Range(2, 9), // With 2 ~ 8 threads. // ::testing::Range(2, 9), // With 2 ~ 8 threads.
::testing::ValuesIn(libvpx_test::kVP9TestVectors, // ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
libvpx_test::kVP9TestVectors + // libvpx_test::kVP9TestVectors +
libvpx_test::kNumVP9TestVectors)))); // libvpx_test::kNumVP9TestVectors))));
#endif //#endif
} // namespace } // namespace

View File

@@ -30,7 +30,7 @@ namespace {
using std::string; using std::string;
using libvpx_test::ACMRandom; using libvpx_test::ACMRandom;
#if CONFIG_WEBM_IO #if CONFIG_WEBM_IO && 0
void CheckUserPrivateData(void *user_priv, int *target) { void CheckUserPrivateData(void *user_priv, int *target) {
// actual pointer value should be the same as expected. // actual pointer value should be the same as expected.

View File

@@ -43,29 +43,29 @@ void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
namespace libvpx_test { namespace libvpx_test {
TEST(TestDecrypt, DecryptWorksVp9) { //TEST(TestDecrypt, DecryptWorksVp9) {
libvpx_test::IVFVideoSource video("vp90-2-05-resize.ivf"); // libvpx_test::IVFVideoSource video("vp90-2-05-resize.ivf");
video.Init(); // video.Init();
//
vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t(); // vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();
VP9Decoder decoder(dec_cfg, 0); // VP9Decoder decoder(dec_cfg, 0);
//
video.Begin(); // video.Begin();
//
// no decryption // // no decryption
vpx_codec_err_t res = decoder.DecodeFrame(video.cxdata(), video.frame_size()); // vpx_codec_err_t res = decoder.DecodeFrame(video.cxdata(), video.frame_size());
ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError(); // ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
//
// decrypt frame // // decrypt frame
video.Next(); // video.Next();
//
std::vector<uint8_t> encrypted(video.frame_size()); // std::vector<uint8_t> encrypted(video.frame_size());
encrypt_buffer(video.cxdata(), &encrypted[0], video.frame_size(), 0); // encrypt_buffer(video.cxdata(), &encrypted[0], video.frame_size(), 0);
vpx_decrypt_init di = { test_decrypt_cb, &encrypted[0] }; // vpx_decrypt_init di = { test_decrypt_cb, &encrypted[0] };
decoder.Control(VPXD_SET_DECRYPTOR, &di); // decoder.Control(VPXD_SET_DECRYPTOR, &di);
//
res = decoder.DecodeFrame(&encrypted[0], encrypted.size()); // res = decoder.DecodeFrame(&encrypted[0], encrypted.size());
ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError(); // ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
} //}
} // namespace libvpx_test } // namespace libvpx_test

View File

@@ -27,7 +27,7 @@ namespace {
using std::string; using std::string;
#if CONFIG_WEBM_IO #if CONFIG_WEBM_IO && 0
struct FileList { struct FileList {
const char *name; const char *name;

View File

@@ -152,7 +152,7 @@ TEST(VP9WorkerThreadTest, TestInterfaceAPI) {
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Multi-threaded decode tests // Multi-threaded decode tests
#if CONFIG_WEBM_IO #if CONFIG_WEBM_IO && 0
struct FileList { struct FileList {
const char *name; const char *name;
const char *expected_md5; const char *expected_md5;

View File

@@ -858,9 +858,6 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
{ {
vpx_codec_err_t res = VPX_CODEC_OK; vpx_codec_err_t res = VPX_CODEC_OK;
if (!ctx->cfg.rc_target_bitrate)
return res;
if (!ctx->cfg.rc_target_bitrate) if (!ctx->cfg.rc_target_bitrate)
return res; return res;

View File

@@ -83,8 +83,7 @@ static void free_seg_map(VP9_COMMON *cm) {
} }
} }
void vp9_free_ref_frame_buffers(VP9_COMMON *cm) { void vp9_free_ref_frame_buffers(BufferPool *pool) {
BufferPool *const pool = cm->buffer_pool;
int i; int i;
for (i = 0; i < FRAME_BUFFERS; ++i) { for (i = 0; i < FRAME_BUFFERS; ++i) {
@@ -97,10 +96,14 @@ void vp9_free_ref_frame_buffers(VP9_COMMON *cm) {
pool->frame_bufs[i].mvs = NULL; pool->frame_bufs[i].mvs = NULL;
vp9_free_frame_buffer(&pool->frame_bufs[i].buf); vp9_free_frame_buffer(&pool->frame_bufs[i].buf);
} }
}
void vp9_free_postproc_buffers(VP9_COMMON *cm) {
#if CONFIG_VP9_POSTPROC #if CONFIG_VP9_POSTPROC
vp9_free_frame_buffer(&cm->post_proc_buffer); vp9_free_frame_buffer(&cm->post_proc_buffer);
vp9_free_frame_buffer(&cm->post_proc_buffer_int); vp9_free_frame_buffer(&cm->post_proc_buffer_int);
#else
(void)cm;
#endif #endif
} }
@@ -111,6 +114,8 @@ void vp9_free_context_buffers(VP9_COMMON *cm) {
cm->above_context = NULL; cm->above_context = NULL;
vpx_free(cm->above_seg_context); vpx_free(cm->above_seg_context);
cm->above_seg_context = NULL; cm->above_seg_context = NULL;
vpx_free(cm->above_txfm_context);
cm->above_txfm_context = NULL;
} }
int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
@@ -134,6 +139,10 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context)); mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context));
if (!cm->above_seg_context) goto fail; if (!cm->above_seg_context) goto fail;
cm->above_txfm_context = (TXFM_CONTEXT *)vpx_calloc(
mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_txfm_context));
if (!cm->above_txfm_context) goto fail;
return 0; return 0;
fail: fail:
@@ -142,7 +151,6 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
} }
void vp9_remove_common(VP9_COMMON *cm) { void vp9_remove_common(VP9_COMMON *cm) {
vp9_free_ref_frame_buffers(cm);
vp9_free_context_buffers(cm); vp9_free_context_buffers(cm);
vpx_free(cm->fc); vpx_free(cm->fc);

View File

@@ -19,6 +19,7 @@ extern "C" {
#endif #endif
struct VP9Common; struct VP9Common;
struct BufferPool;
void vp9_remove_common(struct VP9Common *cm); void vp9_remove_common(struct VP9Common *cm);
@@ -26,7 +27,8 @@ int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height);
void vp9_init_context_buffers(struct VP9Common *cm); void vp9_init_context_buffers(struct VP9Common *cm);
void vp9_free_context_buffers(struct VP9Common *cm); void vp9_free_context_buffers(struct VP9Common *cm);
void vp9_free_ref_frame_buffers(struct VP9Common *cm); void vp9_free_ref_frame_buffers(struct BufferPool *pool);
void vp9_free_postproc_buffers(struct VP9Common *cm);
int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height); int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height);
void vp9_free_state_buffers(struct VP9Common *cm); void vp9_free_state_buffers(struct VP9Common *cm);

View File

@@ -29,6 +29,7 @@ extern "C" {
#define BLOCK_SIZE_GROUPS 4 #define BLOCK_SIZE_GROUPS 4
#define SKIP_CONTEXTS 3 #define SKIP_CONTEXTS 3
#define INTER_MODE_CONTEXTS 7 #define INTER_MODE_CONTEXTS 7
#define TXFM_PARTITION_CONTEXTS 12
/* Segment Feature Masks */ /* Segment Feature Masks */
#define MAX_MV_REF_CANDIDATES 2 #define MAX_MV_REF_CANDIDATES 2
@@ -46,6 +47,7 @@ typedef enum {
#define MAX_MB_PLANE 3 #define MAX_MB_PLANE 3
typedef char ENTROPY_CONTEXT; typedef char ENTROPY_CONTEXT;
typedef TX_SIZE TXFM_CONTEXT;
static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a, static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a,
ENTROPY_CONTEXT b) { ENTROPY_CONTEXT b) {
@@ -113,6 +115,8 @@ typedef struct {
BLOCK_SIZE sb_type; BLOCK_SIZE sb_type;
PREDICTION_MODE mode; PREDICTION_MODE mode;
TX_SIZE tx_size; TX_SIZE tx_size;
TX_SIZE inter_tx_size[64]; // Assume maximum of 64x64 block size.
TX_SIZE max_tx_size; // Maximum tx size allowed in current block.
int8_t skip; int8_t skip;
int8_t segment_id; int8_t segment_id;
int8_t seg_id_predicted; // valid only when temporal_update is enabled int8_t seg_id_predicted; // valid only when temporal_update is enabled
@@ -126,7 +130,6 @@ typedef struct {
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
uint8_t mode_context[MAX_REF_FRAMES]; uint8_t mode_context[MAX_REF_FRAMES];
INTERP_FILTER interp_filter; INTERP_FILTER interp_filter;
} MB_MODE_INFO; } MB_MODE_INFO;
typedef struct MODE_INFO { typedef struct MODE_INFO {
@@ -218,6 +221,10 @@ typedef struct macroblockd {
PARTITION_CONTEXT *above_seg_context; PARTITION_CONTEXT *above_seg_context;
PARTITION_CONTEXT left_seg_context[8]; PARTITION_CONTEXT left_seg_context[8];
TXFM_CONTEXT *above_txfm_context;
TXFM_CONTEXT *left_txfm_context;
TXFM_CONTEXT left_txfm_context_buffer[8];
/* mc buffer */ /* mc buffer */
DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]); DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
@@ -265,13 +272,21 @@ static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y); void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y);
static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize, static TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize,
int xss, int yss) { int xss, int yss) {
if (bsize < BLOCK_8X8) { if (bsize < BLOCK_8X8) {
return TX_4X4; return TX_4X4;
} else { } else {
const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][xss][yss]; const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][xss][yss];
return MIN(y_tx_size, max_txsize_lookup[plane_bsize]); TX_SIZE uv_tx_size = TX_4X4;
if (y_tx_size == TX_32X32)
uv_tx_size = TX_16X16;
else if (y_tx_size == TX_16X16)
uv_tx_size = TX_8X8;
else if (y_tx_size == TX_8X8)
uv_tx_size = TX_4X4;
return MIN(uv_tx_size, max_txsize_lookup[plane_bsize]);
} }
} }
@@ -300,7 +315,7 @@ void vp9_foreach_transformed_block(
const MACROBLOCKD* const xd, BLOCK_SIZE bsize, const MACROBLOCKD* const xd, BLOCK_SIZE bsize,
foreach_transformed_block_visitor visit, void *arg); foreach_transformed_block_visitor visit, void *arg);
static INLINE void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize, static void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, int block, TX_SIZE tx_size, int block,
int *x, int *y) { int *x, int *y) {
const int bwl = b_width_log2_lookup[plane_bsize]; const int bwl = b_width_log2_lookup[plane_bsize];

View File

@@ -15,6 +15,18 @@
#include "vpx_mem/vpx_mem.h" #include "vpx_mem/vpx_mem.h"
#include "vpx/vpx_integer.h" #include "vpx/vpx_integer.h"
// Unconstrained Node Tree
const vp9_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
2, 6, // 0 = LOW_VAL
-TWO_TOKEN, 4, // 1 = TWO
-THREE_TOKEN, -FOUR_TOKEN, // 2 = THREE
8, 10, // 3 = HIGH_LOW
-CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 4 = CAT_ONE
12, 14, // 5 = CAT_THREEFOUR
-CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 6 = CAT_THREE
-CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 7 = CAT_FIVE
};
const vp9_prob vp9_cat1_prob[] = { 159 }; const vp9_prob vp9_cat1_prob[] = { 159 };
const vp9_prob vp9_cat2_prob[] = { 165, 145 }; const vp9_prob vp9_cat2_prob[] = { 165, 145 };
const vp9_prob vp9_cat3_prob[] = { 173, 148, 140 }; const vp9_prob vp9_cat3_prob[] = { 173, 148, 140 };

View File

@@ -173,6 +173,7 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
#define PIVOT_NODE 2 // which node is pivot #define PIVOT_NODE 2 // which node is pivot
#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES) #define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES)
extern const vp9_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)];
extern const vp9_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES]; extern const vp9_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
typedef vp9_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS] typedef vp9_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS]

View File

@@ -13,118 +13,12 @@
#include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_seg_common.h"
const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = { const vp9_prob vp9_intra_mode_prob[INTRA_MODES] = {
{ // above = dc 227, 223, 219, 213, 204, 191, 170, 127
{ 137, 30, 42, 148, 151, 207, 70, 52, 91 }, // left = dc };
{ 92, 45, 102, 136, 116, 180, 74, 90, 100 }, // left = v
{ 73, 32, 19, 187, 222, 215, 46, 34, 100 }, // left = h const vp9_prob vp9_intra_predictor_prob[3] = {
{ 91, 30, 32, 116, 121, 186, 93, 86, 94 }, // left = d45 170, 192, 170
{ 72, 35, 36, 149, 68, 206, 68, 63, 105 }, // left = d135
{ 73, 31, 28, 138, 57, 124, 55, 122, 151 }, // left = d117
{ 67, 23, 21, 140, 126, 197, 40, 37, 171 }, // left = d153
{ 86, 27, 28, 128, 154, 212, 45, 43, 53 }, // left = d207
{ 74, 32, 27, 107, 86, 160, 63, 134, 102 }, // left = d63
{ 59, 67, 44, 140, 161, 202, 78, 67, 119 } // left = tm
}, { // above = v
{ 63, 36, 126, 146, 123, 158, 60, 90, 96 }, // left = dc
{ 43, 46, 168, 134, 107, 128, 69, 142, 92 }, // left = v
{ 44, 29, 68, 159, 201, 177, 50, 57, 77 }, // left = h
{ 58, 38, 76, 114, 97, 172, 78, 133, 92 }, // left = d45
{ 46, 41, 76, 140, 63, 184, 69, 112, 57 }, // left = d135
{ 38, 32, 85, 140, 46, 112, 54, 151, 133 }, // left = d117
{ 39, 27, 61, 131, 110, 175, 44, 75, 136 }, // left = d153
{ 52, 30, 74, 113, 130, 175, 51, 64, 58 }, // left = d207
{ 47, 35, 80, 100, 74, 143, 64, 163, 74 }, // left = d63
{ 36, 61, 116, 114, 128, 162, 80, 125, 82 } // left = tm
}, { // above = h
{ 82, 26, 26, 171, 208, 204, 44, 32, 105 }, // left = dc
{ 55, 44, 68, 166, 179, 192, 57, 57, 108 }, // left = v
{ 42, 26, 11, 199, 241, 228, 23, 15, 85 }, // left = h
{ 68, 42, 19, 131, 160, 199, 55, 52, 83 }, // left = d45
{ 58, 50, 25, 139, 115, 232, 39, 52, 118 }, // left = d135
{ 50, 35, 33, 153, 104, 162, 64, 59, 131 }, // left = d117
{ 44, 24, 16, 150, 177, 202, 33, 19, 156 }, // left = d153
{ 55, 27, 12, 153, 203, 218, 26, 27, 49 }, // left = d207
{ 53, 49, 21, 110, 116, 168, 59, 80, 76 }, // left = d63
{ 38, 72, 19, 168, 203, 212, 50, 50, 107 } // left = tm
}, { // above = d45
{ 103, 26, 36, 129, 132, 201, 83, 80, 93 }, // left = dc
{ 59, 38, 83, 112, 103, 162, 98, 136, 90 }, // left = v
{ 62, 30, 23, 158, 200, 207, 59, 57, 50 }, // left = h
{ 67, 30, 29, 84, 86, 191, 102, 91, 59 }, // left = d45
{ 60, 32, 33, 112, 71, 220, 64, 89, 104 }, // left = d135
{ 53, 26, 34, 130, 56, 149, 84, 120, 103 }, // left = d117
{ 53, 21, 23, 133, 109, 210, 56, 77, 172 }, // left = d153
{ 77, 19, 29, 112, 142, 228, 55, 66, 36 }, // left = d207
{ 61, 29, 29, 93, 97, 165, 83, 175, 162 }, // left = d63
{ 47, 47, 43, 114, 137, 181, 100, 99, 95 } // left = tm
}, { // above = d135
{ 69, 23, 29, 128, 83, 199, 46, 44, 101 }, // left = dc
{ 53, 40, 55, 139, 69, 183, 61, 80, 110 }, // left = v
{ 40, 29, 19, 161, 180, 207, 43, 24, 91 }, // left = h
{ 60, 34, 19, 105, 61, 198, 53, 64, 89 }, // left = d45
{ 52, 31, 22, 158, 40, 209, 58, 62, 89 }, // left = d135
{ 44, 31, 29, 147, 46, 158, 56, 102, 198 }, // left = d117
{ 35, 19, 12, 135, 87, 209, 41, 45, 167 }, // left = d153
{ 55, 25, 21, 118, 95, 215, 38, 39, 66 }, // left = d207
{ 51, 38, 25, 113, 58, 164, 70, 93, 97 }, // left = d63
{ 47, 54, 34, 146, 108, 203, 72, 103, 151 } // left = tm
}, { // above = d117
{ 64, 19, 37, 156, 66, 138, 49, 95, 133 }, // left = dc
{ 46, 27, 80, 150, 55, 124, 55, 121, 135 }, // left = v
{ 36, 23, 27, 165, 149, 166, 54, 64, 118 }, // left = h
{ 53, 21, 36, 131, 63, 163, 60, 109, 81 }, // left = d45
{ 40, 26, 35, 154, 40, 185, 51, 97, 123 }, // left = d135
{ 35, 19, 34, 179, 19, 97, 48, 129, 124 }, // left = d117
{ 36, 20, 26, 136, 62, 164, 33, 77, 154 }, // left = d153
{ 45, 18, 32, 130, 90, 157, 40, 79, 91 }, // left = d207
{ 45, 26, 28, 129, 45, 129, 49, 147, 123 }, // left = d63
{ 38, 44, 51, 136, 74, 162, 57, 97, 121 } // left = tm
}, { // above = d153
{ 75, 17, 22, 136, 138, 185, 32, 34, 166 }, // left = dc
{ 56, 39, 58, 133, 117, 173, 48, 53, 187 }, // left = v
{ 35, 21, 12, 161, 212, 207, 20, 23, 145 }, // left = h
{ 56, 29, 19, 117, 109, 181, 55, 68, 112 }, // left = d45
{ 47, 29, 17, 153, 64, 220, 59, 51, 114 }, // left = d135
{ 46, 16, 24, 136, 76, 147, 41, 64, 172 }, // left = d117
{ 34, 17, 11, 108, 152, 187, 13, 15, 209 }, // left = d153
{ 51, 24, 14, 115, 133, 209, 32, 26, 104 }, // left = d207
{ 55, 30, 18, 122, 79, 179, 44, 88, 116 }, // left = d63
{ 37, 49, 25, 129, 168, 164, 41, 54, 148 } // left = tm
}, { // above = d207
{ 82, 22, 32, 127, 143, 213, 39, 41, 70 }, // left = dc
{ 62, 44, 61, 123, 105, 189, 48, 57, 64 }, // left = v
{ 47, 25, 17, 175, 222, 220, 24, 30, 86 }, // left = h
{ 68, 36, 17, 106, 102, 206, 59, 74, 74 }, // left = d45
{ 57, 39, 23, 151, 68, 216, 55, 63, 58 }, // left = d135
{ 49, 30, 35, 141, 70, 168, 82, 40, 115 }, // left = d117
{ 51, 25, 15, 136, 129, 202, 38, 35, 139 }, // left = d153
{ 68, 26, 16, 111, 141, 215, 29, 28, 28 }, // left = d207
{ 59, 39, 19, 114, 75, 180, 77, 104, 42 }, // left = d63
{ 40, 61, 26, 126, 152, 206, 61, 59, 93 } // left = tm
}, { // above = d63
{ 78, 23, 39, 111, 117, 170, 74, 124, 94 }, // left = dc
{ 48, 34, 86, 101, 92, 146, 78, 179, 134 }, // left = v
{ 47, 22, 24, 138, 187, 178, 68, 69, 59 }, // left = h
{ 56, 25, 33, 105, 112, 187, 95, 177, 129 }, // left = d45
{ 48, 31, 27, 114, 63, 183, 82, 116, 56 }, // left = d135
{ 43, 28, 37, 121, 63, 123, 61, 192, 169 }, // left = d117
{ 42, 17, 24, 109, 97, 177, 56, 76, 122 }, // left = d153
{ 58, 18, 28, 105, 139, 182, 70, 92, 63 }, // left = d207
{ 46, 23, 32, 74, 86, 150, 67, 183, 88 }, // left = d63
{ 36, 38, 48, 92, 122, 165, 88, 137, 91 } // left = tm
}, { // above = tm
{ 65, 70, 60, 155, 159, 199, 61, 60, 81 }, // left = dc
{ 44, 78, 115, 132, 119, 173, 71, 112, 93 }, // left = v
{ 39, 38, 21, 184, 227, 206, 42, 32, 64 }, // left = h
{ 58, 47, 36, 124, 137, 193, 80, 82, 78 }, // left = d45
{ 49, 50, 35, 144, 95, 205, 63, 78, 59 }, // left = d135
{ 41, 53, 52, 148, 71, 142, 65, 128, 51 }, // left = d117
{ 40, 36, 28, 143, 143, 202, 40, 55, 137 }, // left = d153
{ 52, 34, 29, 129, 183, 227, 42, 35, 43 }, // left = d207
{ 42, 44, 44, 104, 105, 164, 64, 130, 80 }, // left = d63
{ 43, 81, 53, 140, 169, 204, 68, 84, 72 } // left = tm
}
}; };
const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = { const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = {
@@ -302,6 +196,10 @@ void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
ct_8x8p[0][1] = tx_count_8x8p[TX_8X8]; ct_8x8p[0][1] = tx_count_8x8p[TX_8X8];
} }
static const vp9_prob default_txfm_partition_probs[TXFM_PARTITION_CONTEXTS] = {
141, 139, 175, 87, 196, 165, 177, 75, 220, 179, 205, 197
};
static const vp9_prob default_skip_probs[SKIP_CONTEXTS] = { static const vp9_prob default_skip_probs[SKIP_CONTEXTS] = {
192, 128, 64 192, 128, 64
}; };
@@ -324,6 +222,8 @@ void vp9_init_mode_probs(FRAME_CONTEXT *fc) {
vp9_copy(fc->comp_ref_prob, default_comp_ref_p); vp9_copy(fc->comp_ref_prob, default_comp_ref_p);
vp9_copy(fc->single_ref_prob, default_single_ref_p); vp9_copy(fc->single_ref_prob, default_single_ref_p);
fc->tx_probs = default_tx_probs; fc->tx_probs = default_tx_probs;
vp9_copy(fc->txfm_partition_prob, default_txfm_partition_probs);
vp9_copy(fc->intra_predictor_prob, vp9_intra_predictor_prob);
vp9_copy(fc->skip_probs, default_skip_probs); vp9_copy(fc->skip_probs, default_skip_probs);
vp9_copy(fc->inter_mode_probs, default_inter_mode_probs); vp9_copy(fc->inter_mode_probs, default_inter_mode_probs);
} }
@@ -402,6 +302,11 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
} }
} }
for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i)
fc->txfm_partition_prob[i] =
mode_mv_merge_probs(pre_fc->txfm_partition_prob[i],
counts->txfm_partition[i]);
for (i = 0; i < SKIP_CONTEXTS; ++i) for (i = 0; i < SKIP_CONTEXTS; ++i)
fc->skip_probs[i] = mode_mv_merge_probs( fc->skip_probs[i] = mode_mv_merge_probs(
pre_fc->skip_probs[i], counts->skip[i]); pre_fc->skip_probs[i], counts->skip[i]);

View File

@@ -49,6 +49,8 @@ typedef struct frame_contexts {
vp9_prob single_ref_prob[REF_CONTEXTS][2]; vp9_prob single_ref_prob[REF_CONTEXTS][2];
vp9_prob comp_ref_prob[REF_CONTEXTS]; vp9_prob comp_ref_prob[REF_CONTEXTS];
struct tx_probs tx_probs; struct tx_probs tx_probs;
vp9_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
vp9_prob intra_predictor_prob[3];
vp9_prob skip_probs[SKIP_CONTEXTS]; vp9_prob skip_probs[SKIP_CONTEXTS];
nmv_context nmvc; nmv_context nmvc;
int initialized; int initialized;
@@ -70,12 +72,13 @@ typedef struct FRAME_COUNTS {
unsigned int comp_ref[REF_CONTEXTS][2]; unsigned int comp_ref[REF_CONTEXTS][2];
struct tx_counts tx; struct tx_counts tx;
unsigned int skip[SKIP_CONTEXTS][2]; unsigned int skip[SKIP_CONTEXTS][2];
unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
unsigned int intra_predictor[2][2];
nmv_context_counts mv; nmv_context_counts mv;
} FRAME_COUNTS; } FRAME_COUNTS;
extern const vp9_prob vp9_intra_mode_prob[INTRA_MODES];
extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
extern const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
[INTRA_MODES - 1];
extern const vp9_prob vp9_kf_partition_probs[PARTITION_CONTEXTS] extern const vp9_prob vp9_kf_partition_probs[PARTITION_CONTEXTS]
[PARTITION_TYPES - 1]; [PARTITION_TYPES - 1];
extern const vp9_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)]; extern const vp9_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)];
@@ -97,15 +100,6 @@ void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p,
void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
unsigned int (*ct_8x8p)[2]); unsigned int (*ct_8x8p)[2]);
static INLINE const vp9_prob *get_y_mode_probs(const MODE_INFO *mi,
const MODE_INFO *above_mi,
const MODE_INFO *left_mi,
int block) {
const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block);
const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block);
return vp9_kf_y_mode_prob[above][left];
}
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
#endif #endif

View File

@@ -293,7 +293,7 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
} }
} }
static void filter_selectively_vert_row2(PLANE_TYPE plane_type, static void filter_selectively_vert_row2(int subsampling_factor,
uint8_t *s, int pitch, uint8_t *s, int pitch,
unsigned int mask_16x16_l, unsigned int mask_16x16_l,
unsigned int mask_8x8_l, unsigned int mask_8x8_l,
@@ -301,9 +301,9 @@ static void filter_selectively_vert_row2(PLANE_TYPE plane_type,
unsigned int mask_4x4_int_l, unsigned int mask_4x4_int_l,
const loop_filter_info_n *lfi_n, const loop_filter_info_n *lfi_n,
const uint8_t *lfl) { const uint8_t *lfl) {
const int mask_shift = plane_type ? 4 : 8; const int mask_shift = subsampling_factor ? 4 : 8;
const int mask_cutoff = plane_type ? 0xf : 0xff; const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
const int lfl_forward = plane_type ? 4 : 8; const int lfl_forward = subsampling_factor ? 4 : 8;
unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
@@ -393,7 +393,7 @@ static void filter_selectively_vert_row2(PLANE_TYPE plane_type,
} }
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
static void highbd_filter_selectively_vert_row2(PLANE_TYPE plane_type, static void highbd_filter_selectively_vert_row2(int subsampling_factor,
uint16_t *s, int pitch, uint16_t *s, int pitch,
unsigned int mask_16x16_l, unsigned int mask_16x16_l,
unsigned int mask_8x8_l, unsigned int mask_8x8_l,
@@ -401,9 +401,9 @@ static void highbd_filter_selectively_vert_row2(PLANE_TYPE plane_type,
unsigned int mask_4x4_int_l, unsigned int mask_4x4_int_l,
const loop_filter_info_n *lfi_n, const loop_filter_info_n *lfi_n,
const uint8_t *lfl, int bd) { const uint8_t *lfl, int bd) {
const int mask_shift = plane_type ? 4 : 8; const int mask_shift = subsampling_factor ? 4 : 8;
const int mask_cutoff = plane_type ? 0xf : 0xff; const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
const int lfl_forward = plane_type ? 4 : 8; const int lfl_forward = subsampling_factor ? 4 : 8;
unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
@@ -1176,27 +1176,37 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
// Determine the vertical edges that need filtering // Determine the vertical edges that need filtering
for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
const MODE_INFO *mi = mi_8x8[c].src_mi; const MODE_INFO *mi = mi_8x8[c].src_mi;
const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type; const MB_MODE_INFO *mbmi = &mi[0].mbmi;
const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi); const BLOCK_SIZE sb_type = mbmi->sb_type;
const int skip_this = mbmi->skip && is_inter_block(mbmi);
const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1);
const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1);
// left edge of current unit is block/partition edge -> no skip // left edge of current unit is block/partition edge -> no skip
const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ?
!(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1; !blk_col : 1;
const int skip_this_c = skip_this && !block_edge_left; const int skip_this_c = skip_this && !block_edge_left;
// top edge of current unit is block/partition edge -> no skip // top edge of current unit is block/partition edge -> no skip
const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ?
!(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; !blk_row : 1;
const int skip_this_r = skip_this && !block_edge_above; const int skip_this_r = skip_this && !block_edge_above;
const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
? get_uv_tx_size(&mi[0].mbmi, plane) TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV) ?
: mi[0].mbmi.tx_size; get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, ss_x, ss_y)
: mbmi->tx_size;
const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1; const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
// Filter level can vary per MI // Filter level can vary per MI
if (!(lfl[(r << 3) + (c >> ss_x)] = if (!(lfl[(r << 3) + (c >> ss_x)] =
get_filter_level(&cm->lf_info, &mi[0].mbmi))) get_filter_level(&cm->lf_info, mbmi)))
continue; continue;
if (is_inter_block(mbmi) && !mbmi->skip)
tx_size = (plane->plane_type == PLANE_TYPE_UV) ?
get_uv_tx_size_impl(mbmi->inter_tx_size[blk_row * 8 + blk_col],
mbmi->sb_type, ss_x, ss_y)
: mbmi->inter_tx_size[blk_row * 8 + blk_col];
// Build masks based on the transform size of each block // Build masks based on the transform size of each block
if (tx_size == TX_32X32) { if (tx_size == TX_32X32) {
if (!skip_this_c && ((c >> ss_x) & 3) == 0) { if (!skip_this_c && ((c >> ss_x) & 3) == 0) {
@@ -1326,20 +1336,20 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
} }
} }
void vp9_filter_block_plane(VP9_COMMON *const cm, void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
struct macroblockd_plane *const plane, struct macroblockd_plane *const plane,
int mi_row, int mi_row,
LOOP_FILTER_MASK *lfm) { LOOP_FILTER_MASK *lfm) {
struct buf_2d *const dst = &plane->dst; struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf; uint8_t *const dst0 = dst->buf;
int r, c; int r;
if (!plane->plane_type) {
uint64_t mask_16x16 = lfm->left_y[TX_16X16]; uint64_t mask_16x16 = lfm->left_y[TX_16X16];
uint64_t mask_8x8 = lfm->left_y[TX_8X8]; uint64_t mask_8x8 = lfm->left_y[TX_8X8];
uint64_t mask_4x4 = lfm->left_y[TX_4X4]; uint64_t mask_4x4 = lfm->left_y[TX_4X4];
uint64_t mask_4x4_int = lfm->int_4x4_y; uint64_t mask_4x4_int = lfm->int_4x4_y;
assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
// Vertical pass: do 2 rows at one time // Vertical pass: do 2 rows at one time
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
unsigned int mask_16x16_l = mask_16x16 & 0xffff; unsigned int mask_16x16_l = mask_16x16 & 0xffff;
@@ -1350,33 +1360,19 @@ void vp9_filter_block_plane(VP9_COMMON *const cm,
// Disable filtering on the leftmost column. // Disable filtering on the leftmost column.
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) { if (cm->use_highbitdepth) {
highbd_filter_selectively_vert_row2(plane->plane_type, highbd_filter_selectively_vert_row2(
CONVERT_TO_SHORTPTR(dst->buf), plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
dst->stride, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
mask_16x16_l, &lfm->lfl_y[r << 3], (int)cm->bit_depth);
mask_8x8_l,
mask_4x4_l,
mask_4x4_int_l,
&cm->lf_info, &lfm->lfl_y[r << 3],
(int)cm->bit_depth);
} else { } else {
filter_selectively_vert_row2(plane->plane_type, filter_selectively_vert_row2(
dst->buf, dst->stride, plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
mask_16x16_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
mask_8x8_l,
mask_4x4_l,
mask_4x4_int_l,
&cm->lf_info,
&lfm->lfl_y[r << 3]);
} }
#else #else
filter_selectively_vert_row2(plane->plane_type, filter_selectively_vert_row2(
dst->buf, dst->stride, plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
mask_16x16_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
mask_8x8_l,
mask_4x4_l,
mask_4x4_int_l,
&cm->lf_info, &lfm->lfl_y[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 16 * dst->stride; dst->buf += 16 * dst->stride;
mask_16x16 >>= 16; mask_16x16 >>= 16;
@@ -1409,31 +1405,18 @@ void vp9_filter_block_plane(VP9_COMMON *const cm,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) { if (cm->use_highbitdepth) {
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), highbd_filter_selectively_horiz(
dst->stride, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
mask_16x16_r, mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3],
mask_8x8_r,
mask_4x4_r,
mask_4x4_int & 0xff,
&cm->lf_info,
&lfm->lfl_y[r << 3],
(int)cm->bit_depth); (int)cm->bit_depth);
} else { } else {
filter_selectively_horiz(dst->buf, dst->stride, filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_16x16_r, mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int & 0xff,
&cm->lf_info,
&lfm->lfl_y[r << 3]); &lfm->lfl_y[r << 3]);
} }
#else #else
filter_selectively_horiz(dst->buf, dst->stride, filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_16x16_r, mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int & 0xff,
&cm->lf_info,
&lfm->lfl_y[r << 3]); &lfm->lfl_y[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
@@ -1443,19 +1426,29 @@ void vp9_filter_block_plane(VP9_COMMON *const cm,
mask_4x4 >>= 8; mask_4x4 >>= 8;
mask_4x4_int >>= 8; mask_4x4_int >>= 8;
} }
} else { }
void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
struct macroblockd_plane *const plane,
int mi_row,
LOOP_FILTER_MASK *lfm) {
struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf;
int r, c;
uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; uint16_t mask_16x16 = lfm->left_uv[TX_16X16];
uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; uint16_t mask_8x8 = lfm->left_uv[TX_8X8];
uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
uint16_t mask_4x4_int = lfm->int_4x4_uv; uint16_t mask_4x4_int = lfm->int_4x4_uv;
assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
// Vertical pass: do 2 rows at one time // Vertical pass: do 2 rows at one time
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) {
if (plane->plane_type == 1) { if (plane->plane_type == 1) {
for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) {
lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)];
lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
(c << 1)];
} }
} }
@@ -1468,34 +1461,20 @@ void vp9_filter_block_plane(VP9_COMMON *const cm,
// Disable filtering on the leftmost column. // Disable filtering on the leftmost column.
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) { if (cm->use_highbitdepth) {
highbd_filter_selectively_vert_row2(plane->plane_type, highbd_filter_selectively_vert_row2(
CONVERT_TO_SHORTPTR(dst->buf), plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
dst->stride, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
mask_16x16_l, &lfm->lfl_uv[r << 1], (int)cm->bit_depth);
mask_8x8_l,
mask_4x4_l,
mask_4x4_int_l,
&cm->lf_info,
&lfm->lfl_uv[r << 1],
(int)cm->bit_depth);
} else { } else {
filter_selectively_vert_row2(plane->plane_type, filter_selectively_vert_row2(
dst->buf, dst->stride, plane->subsampling_x, dst->buf, dst->stride,
mask_16x16_l, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
mask_8x8_l,
mask_4x4_l,
mask_4x4_int_l,
&cm->lf_info,
&lfm->lfl_uv[r << 1]); &lfm->lfl_uv[r << 1]);
} }
#else #else
filter_selectively_vert_row2(plane->plane_type, filter_selectively_vert_row2(
dst->buf, dst->stride, plane->subsampling_x, dst->buf, dst->stride,
mask_16x16_l, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
mask_8x8_l,
mask_4x4_l,
mask_4x4_int_l,
&cm->lf_info,
&lfm->lfl_uv[r << 1]); &lfm->lfl_uv[r << 1]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
@@ -1516,8 +1495,8 @@ void vp9_filter_block_plane(VP9_COMMON *const cm,
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
const unsigned int mask_4x4_int_r = skip_border_4x4_r ? const unsigned int mask_4x4_int_r =
0 : (mask_4x4_int & 0xf); skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf);
unsigned int mask_16x16_r; unsigned int mask_16x16_r;
unsigned int mask_8x8_r; unsigned int mask_8x8_r;
unsigned int mask_4x4_r; unsigned int mask_4x4_r;
@@ -1535,30 +1514,17 @@ void vp9_filter_block_plane(VP9_COMMON *const cm,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) { if (cm->use_highbitdepth) {
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
dst->stride, dst->stride, mask_16x16_r, mask_8x8_r,
mask_16x16_r, mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
mask_8x8_r, &lfm->lfl_uv[r << 1], (int)cm->bit_depth);
mask_4x4_r,
mask_4x4_int_r,
&cm->lf_info,
&lfm->lfl_uv[r << 1],
(int)cm->bit_depth);
} else { } else {
filter_selectively_horiz(dst->buf, dst->stride, filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_16x16_r, mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
&cm->lf_info,
&lfm->lfl_uv[r << 1]); &lfm->lfl_uv[r << 1]);
} }
#else #else
filter_selectively_horiz(dst->buf, dst->stride, filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_16x16_r, mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
&cm->lf_info,
&lfm->lfl_uv[r << 1]); &lfm->lfl_uv[r << 1]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
@@ -1569,16 +1535,12 @@ void vp9_filter_block_plane(VP9_COMMON *const cm,
mask_4x4_int >>= 4; mask_4x4_int >>= 4;
} }
} }
}
void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
VP9_COMMON *cm, VP9_COMMON *cm,
struct macroblockd_plane planes[MAX_MB_PLANE], struct macroblockd_plane planes[MAX_MB_PLANE],
int start, int stop, int y_only) { int start, int stop, int y_only) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE; const int num_planes = y_only ? 1 : MAX_MB_PLANE;
const int use_420 = y_only || (planes[1].subsampling_y == 1 &&
planes[1].subsampling_x == 1);
LOOP_FILTER_MASK lfm;
int mi_row, mi_col; int mi_row, mi_col;
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
@@ -1589,21 +1551,12 @@ void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
// TODO(JBB): Make setup_mask work for non 420. for (plane = 0; plane < num_planes; ++plane)
if (use_420)
vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
&lfm);
for (plane = 0; plane < num_planes; ++plane) {
if (use_420)
vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm);
else
vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
mi_row, mi_col); mi_row, mi_col);
} }
} }
} }
}
void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame,
VP9_COMMON *cm, MACROBLOCKD *xd, VP9_COMMON *cm, MACROBLOCKD *xd,

View File

@@ -29,6 +29,12 @@ extern "C" {
#define MAX_REF_LF_DELTAS 4 #define MAX_REF_LF_DELTAS 4
#define MAX_MODE_LF_DELTAS 2 #define MAX_MODE_LF_DELTAS 2
enum lf_path {
LF_PATH_420,
LF_PATH_444,
LF_PATH_SLOW,
};
struct loopfilter { struct loopfilter {
int filter_level; int filter_level;
@@ -92,7 +98,12 @@ void vp9_setup_mask(struct VP9Common *const cm,
MODE_INFO *mi_8x8, const int mode_info_stride, MODE_INFO *mi_8x8, const int mode_info_stride,
LOOP_FILTER_MASK *lfm); LOOP_FILTER_MASK *lfm);
void vp9_filter_block_plane(struct VP9Common *const cm, void vp9_filter_block_plane_ss00(struct VP9Common *const cm,
struct macroblockd_plane *const plane,
int mi_row,
LOOP_FILTER_MASK *lfm);
void vp9_filter_block_plane_ss11(struct VP9Common *const cm,
struct macroblockd_plane *const plane, struct macroblockd_plane *const plane,
int mi_row, int mi_row,
LOOP_FILTER_MASK *lfm); LOOP_FILTER_MASK *lfm);

View File

@@ -88,7 +88,7 @@ typedef struct {
int col; int col;
} RefCntBuffer; } RefCntBuffer;
typedef struct { typedef struct BufferPool {
// Protect BufferPool from being accessed by several FrameWorkers at // Protect BufferPool from being accessed by several FrameWorkers at
// the same time during frame parallel decode. // the same time during frame parallel decode.
// TODO(hkuang): Try to use atomic variable instead of locking the whole pool. // TODO(hkuang): Try to use atomic variable instead of locking the whole pool.
@@ -266,8 +266,9 @@ typedef struct VP9Common {
// External BufferPool passed from outside. // External BufferPool passed from outside.
BufferPool *buffer_pool; BufferPool *buffer_pool;
PARTITION_CONTEXT *above_seg_context;
ENTROPY_CONTEXT *above_context; ENTROPY_CONTEXT *above_context;
PARTITION_CONTEXT *above_seg_context;
TXFM_CONTEXT *above_txfm_context;
} VP9_COMMON; } VP9_COMMON;
// TODO(hkuang): Don't need to lock the whole pool after implementing atomic // TODO(hkuang): Don't need to lock the whole pool after implementing atomic
@@ -328,6 +329,7 @@ static INLINE void init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd) {
} }
xd->above_seg_context = cm->above_seg_context; xd->above_seg_context = cm->above_seg_context;
xd->above_txfm_context = cm->above_txfm_context;
xd->mi_stride = cm->mi_stride; xd->mi_stride = cm->mi_stride;
xd->error_info = &cm->error; xd->error_info = &cm->error;
} }
@@ -427,6 +429,30 @@ static INLINE int partition_plane_context(const MACROBLOCKD *xd,
return (left * 2 + above) + bsl * PARTITION_PLOFFSET; return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
} }
static void txfm_partition_update(TXFM_CONTEXT *above_ctx,
TXFM_CONTEXT *left_ctx,
TX_SIZE tx_size) {
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bs = num_8x8_blocks_high_lookup[bsize];
int i;
for (i = 0; i < bs; ++i) {
above_ctx[i] = tx_size;
left_ctx[i] = tx_size;
}
}
static int max_tx_size_offset[TX_SIZES] = {0, 0, 2, 6};
static int txfm_partition_context(const TXFM_CONTEXT *above_ctx,
const TXFM_CONTEXT *left_ctx,
TX_SIZE max_tx_size,
TX_SIZE tx_size) {
int above = *above_ctx < tx_size;
int left = *left_ctx < tx_size;
return max_tx_size_offset[max_tx_size] +
2 * (max_tx_size - tx_size) + (above || left);
}
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
#endif #endif

View File

@@ -91,10 +91,7 @@ void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr,
int flimit) { int flimit) {
uint8_t const *p_src; uint8_t const *p_src;
uint8_t *p_dst; uint8_t *p_dst;
int row; int row, col, i, v, kernel;
int col;
int i;
int v;
int pitch = src_pixels_per_line; int pitch = src_pixels_per_line;
uint8_t d[8]; uint8_t d[8];
(void)dst_pixels_per_line; (void)dst_pixels_per_line;
@@ -105,8 +102,8 @@ void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr,
p_dst = dst_ptr; p_dst = dst_ptr;
for (col = 0; col < cols; col++) { for (col = 0; col < cols; col++) {
int kernel = 4; kernel = 4;
int v = p_src[col]; v = p_src[col];
for (i = -2; i <= 2; i++) { for (i = -2; i <= 2; i++) {
if (abs(v - p_src[col + i * pitch]) > flimit) if (abs(v - p_src[col + i * pitch]) > flimit)
@@ -128,7 +125,7 @@ void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr,
d[i] = p_src[i]; d[i] = p_src[i];
for (col = 0; col < cols; col++) { for (col = 0; col < cols; col++) {
int kernel = 4; kernel = 4;
v = p_src[col]; v = p_src[col];
d[col & 7] = v; d[col & 7] = v;
@@ -168,10 +165,7 @@ void vp9_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr,
int flimit) { int flimit) {
uint16_t const *p_src; uint16_t const *p_src;
uint16_t *p_dst; uint16_t *p_dst;
int row; int row, col, i, v, kernel;
int col;
int i;
int v;
int pitch = src_pixels_per_line; int pitch = src_pixels_per_line;
uint16_t d[8]; uint16_t d[8];
@@ -181,8 +175,8 @@ void vp9_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr,
p_dst = dst_ptr; p_dst = dst_ptr;
for (col = 0; col < cols; col++) { for (col = 0; col < cols; col++) {
int kernel = 4; kernel = 4;
int v = p_src[col]; v = p_src[col];
for (i = -2; i <= 2; i++) { for (i = -2; i <= 2; i++) {
if (abs(v - p_src[col + i * pitch]) > flimit) if (abs(v - p_src[col + i * pitch]) > flimit)
@@ -205,7 +199,7 @@ void vp9_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr,
d[i] = p_src[i]; d[i] = p_src[i];
for (col = 0; col < cols; col++) { for (col = 0; col < cols; col++) {
int kernel = 4; kernel = 4;
v = p_src[col]; v = p_src[col];
d[col & 7] = v; d[col & 7] = v;
@@ -518,22 +512,24 @@ void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
assert((src->flags & YV12_FLAG_HIGHBITDEPTH) == assert((src->flags & YV12_FLAG_HIGHBITDEPTH) ==
(dst->flags & YV12_FLAG_HIGHBITDEPTH)); (dst->flags & YV12_FLAG_HIGHBITDEPTH));
if (src->flags & YV12_FLAG_HIGHBITDEPTH) { if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
const uint16_t *const src = CONVERT_TO_SHORTPTR(srcs[i] + 2 * src_stride const uint16_t *const src_plane = CONVERT_TO_SHORTPTR(
+ 2); srcs[i] + 2 * src_stride + 2);
uint16_t *const dst = CONVERT_TO_SHORTPTR(dsts[i] + 2 * dst_stride + 2); uint16_t *const dst_plane = CONVERT_TO_SHORTPTR(
vp9_highbd_post_proc_down_and_across(src, dst, src_stride, dst_stride, dsts[i] + 2 * dst_stride + 2);
src_height, src_width, ppl); vp9_highbd_post_proc_down_and_across(src_plane, dst_plane, src_stride,
dst_stride, src_height, src_width,
ppl);
} else { } else {
const uint8_t *const src = srcs[i] + 2 * src_stride + 2; const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2;
uint8_t *const dst = dsts[i] + 2 * dst_stride + 2; uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2;
vp9_post_proc_down_and_across(src, dst, src_stride, dst_stride, vp9_post_proc_down_and_across(src_plane, dst_plane, src_stride,
src_height, src_width, ppl); dst_stride, src_height, src_width, ppl);
} }
#else #else
const uint8_t *const src = srcs[i] + 2 * src_stride + 2; const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2;
uint8_t *const dst = dsts[i] + 2 * dst_stride + 2; uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2;
vp9_post_proc_down_and_across(src, dst, src_stride, dst_stride, vp9_post_proc_down_and_across(src_plane, dst_plane, src_stride, dst_stride,
src_height, src_width, ppl); src_height, src_width, ppl);
#endif #endif
} }
@@ -558,16 +554,15 @@ static void fillrd(struct postproc_state *state, int q, int a) {
* a gaussian distribution with sigma determined by q. * a gaussian distribution with sigma determined by q.
*/ */
{ {
double i;
int next, j; int next, j;
next = 0; next = 0;
for (i = -32; i < 32; i++) { for (i = -32; i < 32; i++) {
int a = (int)(0.5 + 256 * gaussian(sigma, 0, i)); int a_i = (int)(0.5 + 256 * gaussian(sigma, 0, i));
if (a) { if (a_i) {
for (j = 0; j < a; j++) { for (j = 0; j < a_i; j++) {
char_dist[next + j] = (char) i; char_dist[next + j] = (char) i;
} }

View File

@@ -30,6 +30,25 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
ADST_ADST, // TM ADST_ADST, // TM
}; };
enum {
NEED_LEFT = 1 << 1,
NEED_ABOVE = 1 << 2,
NEED_ABOVERIGHT = 1 << 3,
};
static const uint8_t extend_modes[INTRA_MODES] = {
NEED_ABOVE | NEED_LEFT, // DC
NEED_ABOVE, // V
NEED_LEFT, // H
NEED_ABOVERIGHT, // D45
NEED_LEFT | NEED_ABOVE, // D135
NEED_LEFT | NEED_ABOVE, // D117
NEED_LEFT | NEED_ABOVE, // D153
NEED_LEFT, // D207
NEED_ABOVERIGHT, // D63
NEED_LEFT | NEED_ABOVE, // TM
};
// This serves as a wrapper function, so that all the prediction functions // This serves as a wrapper function, so that all the prediction functions
// can be unified and accessed as a pointer array. Note that the boundary // can be unified and accessed as a pointer array. Note that the boundary
// above and left are not necessarily used all the time. // above and left are not necessarily used all the time.
@@ -790,9 +809,8 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
vpx_memset(left_col, 129, 64); // NEED_LEFT
if (extend_modes[mode] & NEED_LEFT) {
// left
if (left_available) { if (left_available) {
if (xd->mb_to_bottom_edge < 0) { if (xd->mb_to_bottom_edge < 0) {
/* slower path if the block needs border extension */ /* slower path if the block needs border extension */
@@ -811,10 +829,42 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
for (i = 0; i < bs; ++i) for (i = 0; i < bs; ++i)
left_col[i] = ref[i * ref_stride - 1]; left_col[i] = ref[i * ref_stride - 1];
} }
} else {
vpx_memset(left_col, 129, bs);
}
} }
// TODO(hkuang) do not extend 2*bs pixels for all modes. // NEED_ABOVE
// above if (extend_modes[mode] & NEED_ABOVE) {
if (up_available) {
const uint8_t *above_ref = ref - ref_stride;
if (xd->mb_to_right_edge < 0) {
/* slower path if the block needs border extension */
if (x0 + bs <= frame_width) {
vpx_memcpy(above_row, above_ref, bs);
} else if (x0 <= frame_width) {
const int r = frame_width - x0;
vpx_memcpy(above_row, above_ref, r);
vpx_memset(above_row + r, above_row[r - 1],
x0 + bs - frame_width);
}
} else {
/* faster path if the block does not need extension */
if (bs == 4 && right_available && left_available) {
const_above_row = above_ref;
} else {
vpx_memcpy(above_row, above_ref, bs);
}
}
above_row[-1] = left_available ? above_ref[-1] : 129;
} else {
vpx_memset(above_row, 127, bs);
above_row[-1] = 127;
}
}
// NEED_ABOVERIGHT
if (extend_modes[mode] & NEED_ABOVERIGHT) {
if (up_available) { if (up_available) {
const uint8_t *above_ref = ref - ref_stride; const uint8_t *above_ref = ref - ref_stride;
if (xd->mb_to_right_edge < 0) { if (xd->mb_to_right_edge < 0) {
@@ -842,7 +892,6 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
vpx_memset(above_row + r, above_row[r - 1], vpx_memset(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width); x0 + 2 * bs - frame_width);
} }
above_row[-1] = left_available ? above_ref[-1] : 129;
} else { } else {
/* faster path if the block does not need extension */ /* faster path if the block does not need extension */
if (bs == 4 && right_available && left_available) { if (bs == 4 && right_available && left_available) {
@@ -853,13 +902,14 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
vpx_memcpy(above_row + bs, above_ref + bs, bs); vpx_memcpy(above_row + bs, above_ref + bs, bs);
else else
vpx_memset(above_row + bs, above_row[bs - 1], bs); vpx_memset(above_row + bs, above_row[bs - 1], bs);
}
}
above_row[-1] = left_available ? above_ref[-1] : 129; above_row[-1] = left_available ? above_ref[-1] : 129;
}
}
} else { } else {
vpx_memset(above_row, 127, bs * 2); vpx_memset(above_row, 127, bs * 2);
above_row[-1] = 127; above_row[-1] = 127;
} }
}
// predict // predict
if (mode == DC_PRED) { if (mode == DC_PRED) {

View File

@@ -1109,6 +1109,15 @@ specialize qw/vp9_avg_8x8 sse2 neon/;
add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p"; add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
specialize qw/vp9_avg_4x4 sse2/; specialize qw/vp9_avg_4x4 sse2/;
add_proto qw/void vp9_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
specialize qw/vp9_hadamard_8x8 sse2/;
add_proto qw/void vp9_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
specialize qw/vp9_hadamard_16x16 sse2/;
add_proto qw/int16_t vp9_satd/, "const int16_t *coeff, int length";
specialize qw/vp9_satd sse2/;
add_proto qw/void vp9_int_pro_row/, "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height"; add_proto qw/void vp9_int_pro_row/, "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height";
specialize qw/vp9_int_pro_row sse2/; specialize qw/vp9_int_pro_row sse2/;
@@ -1162,6 +1171,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp9_block_error avx2/, "$sse2_x86inc"; specialize qw/vp9_block_error avx2/, "$sse2_x86inc";
add_proto qw/int64_t vp9_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
specialize qw/vp9_block_error_fp sse2/;
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64"; specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64";

View File

@@ -13,6 +13,7 @@
#include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_thread_common.h" #include "vp9/common/vp9_thread_common.h"
#include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_loopfilter.h"
#if CONFIG_MULTITHREAD #if CONFIG_MULTITHREAD
static INLINE void mutex_lock(pthread_mutex_t *const mutex) { static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
@@ -92,10 +93,17 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer,
int start, int stop, int y_only, int start, int stop, int y_only,
VP9LfSync *const lf_sync) { VP9LfSync *const lf_sync) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE; const int num_planes = y_only ? 1 : MAX_MB_PLANE;
const int use_420 = y_only || (planes[1].subsampling_y == 1 &&
planes[1].subsampling_x == 1);
const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
int mi_row, mi_col; int mi_row, mi_col;
enum lf_path path;
if (y_only)
path = LF_PATH_444;
else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
path = LF_PATH_420;
else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
path = LF_PATH_444;
else
path = LF_PATH_SLOW;
for (mi_row = start; mi_row < stop; for (mi_row = start; mi_row < stop;
mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) { mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
@@ -112,18 +120,24 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer,
vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
// TODO(JBB): Make setup_mask work for non 420. // TODO(JBB): Make setup_mask work for non 420.
if (use_420)
vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
&lfm); &lfm);
for (plane = 0; plane < num_planes; ++plane) { vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
if (use_420) for (plane = 1; plane < num_planes; ++plane) {
vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm); switch (path) {
else case LF_PATH_420:
vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_444:
vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_SLOW:
vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
mi_row, mi_col); mi_row, mi_col);
break;
}
} }
sync_write(lf_sync, r, c, sb_cols); sync_write(lf_sync, r, c, sb_cols);
} }
} }
@@ -385,6 +399,10 @@ void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts,
for (i = 0; i < TX_SIZES; i++) for (i = 0; i < TX_SIZES; i++)
cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i]; cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i];
for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i)
for (j = 0; j < 2; ++j)
cm->counts.txfm_partition[i][j] += counts->txfm_partition[i][j];
for (i = 0; i < SKIP_CONTEXTS; i++) for (i = 0; i < SKIP_CONTEXTS; i++)
for (j = 0; j < 2; j++) for (j = 0; j < 2; j++)
cm->counts.skip[i][j] += counts->skip[i][j]; cm->counts.skip[i][j] += counts->skip[i][j];

View File

@@ -338,6 +338,59 @@ struct inter_args {
const int16_t *const uv_dequant; const int16_t *const uv_dequant;
}; };
static void decode_reconstruct_tx(int blk_row, int blk_col,
int plane, int block,
TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
void *arg) {
struct inter_args *args = (struct inter_args *)arg;
VP9_COMMON *const cm = args->cm;
MACROBLOCKD *const xd = args->xd;
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
struct macroblockd_plane *const pd = &xd->plane[plane];
int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
(blk_col >> (1 - pd->subsampling_x));
TX_SIZE plane_tx_size = plane ?
get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], plane_bsize, 0, 0) :
mbmi->inter_tx_size[tx_idx];
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
if (xd->mb_to_bottom_edge < 0)
max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
if (xd->mb_to_right_edge < 0)
max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
return;
if (tx_size == plane_tx_size) {
const int16_t *const dequant = (plane == 0) ? args->y_dequant
: args->uv_dequant;
int eob;
eob = vp9_decode_block_tokens(cm, xd, args->counts, plane, block,
plane_bsize, blk_col, blk_row,
tx_size, args->r, dequant);
inverse_transform_block(xd, plane, block, tx_size,
&pd->dst.buf[4 * blk_row * pd->dst.stride +
4 * blk_col],
pd->dst.stride, eob);
*args->eobtotal += eob;
} else {
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bh = num_4x4_blocks_high_lookup[bsize];
int step = 1 << (2 *(tx_size - 1));
int i;
for (i = 0; i < 4; ++i) {
int offsetr = (i >> 1) * bh / 2;
int offsetc = (i & 0x01) * bh / 2;
decode_reconstruct_tx(blk_row + offsetr, blk_col + offsetc,
plane, block + i * step, tx_size - 1,
plane_bsize, arg);
}
}
}
static void reconstruct_inter_block(int plane, int block, static void reconstruct_inter_block(int plane, int block,
BLOCK_SIZE plane_bsize, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) { TX_SIZE tx_size, void *arg) {
@@ -370,6 +423,7 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
xd->mi = cm->mi + offset; xd->mi = cm->mi + offset;
xd->mi[0].src_mi = &xd->mi[0]; // Point to self. xd->mi[0].src_mi = &xd->mi[0]; // Point to self.
xd->mi[0].mbmi.sb_type = bsize; xd->mi[0].mbmi.sb_type = bsize;
xd->mi[0].mbmi.max_tx_size = max_txsize_lookup[bsize];
for (y = 0; y < y_mis; ++y) for (y = 0; y < y_mis; ++y)
for (x = !y; x < x_mis; ++x) { for (x = !y; x < x_mis; ++x) {
@@ -425,12 +479,32 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
int eobtotal = 0; int eobtotal = 0;
struct inter_args arg = {cm, xd, r, counts, &eobtotal, y_dequant, struct inter_args arg = {cm, xd, r, counts, &eobtotal, y_dequant,
uv_dequant}; uv_dequant};
vp9_foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg); int plane;
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
int bh = num_4x4_blocks_wide_lookup[txb_size];
int idx, idy;
int block = 0;
int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
for (idy = 0; idy < mi_height; idy += bh) {
for (idx = 0; idx < mi_width; idx += bh) {
decode_reconstruct_tx(idy, idx, plane, block,
max_txsize_lookup[plane_bsize],
plane_bsize, &arg);
block += step;
}
}
}
if (!less8x8 && eobtotal == 0) if (!less8x8 && eobtotal == 0)
mbmi->skip = 1; // skip loopfilter mbmi->skip = 1; // skip loopfilter
} }
} }
xd->corrupted |= vp9_reader_has_error(r); xd->corrupted |= vp9_reader_has_error(r);
} }
@@ -943,9 +1017,10 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
// are allocated as part of the same buffer. // are allocated as part of the same buffer.
vpx_memset(cm->above_context, 0, vpx_memset(cm->above_context, 0,
sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols);
vpx_memset(cm->above_seg_context, 0, vpx_memset(cm->above_seg_context, 0,
sizeof(*cm->above_seg_context) * aligned_cols); sizeof(*cm->above_seg_context) * aligned_cols);
vpx_memset(cm->above_txfm_context, 0,
sizeof(*cm->above_txfm_context) * aligned_cols);
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
@@ -988,6 +1063,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
vp9_tile_set_col(&tile, tile_data->cm, col); vp9_tile_set_col(&tile, tile_data->cm, col);
vp9_zero(tile_data->xd.left_context); vp9_zero(tile_data->xd.left_context);
vp9_zero(tile_data->xd.left_seg_context); vp9_zero(tile_data->xd.left_seg_context);
vp9_zero(tile_data->xd.left_txfm_context_buffer);
for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
mi_col += MI_BLOCK_SIZE) { mi_col += MI_BLOCK_SIZE) {
decode_partition(pbi, &tile_data->xd, &cm->counts, &tile, mi_row, decode_partition(pbi, &tile_data->xd, &cm->counts, &tile, mi_row,
@@ -1061,6 +1137,7 @@ static int tile_worker_hook(TileWorkerData *const tile_data,
mi_row += MI_BLOCK_SIZE) { mi_row += MI_BLOCK_SIZE) {
vp9_zero(tile_data->xd.left_context); vp9_zero(tile_data->xd.left_context);
vp9_zero(tile_data->xd.left_seg_context); vp9_zero(tile_data->xd.left_seg_context);
vp9_zero(tile_data->xd.left_txfm_context_buffer);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE) { mi_col += MI_BLOCK_SIZE) {
decode_partition(tile_data->pbi, &tile_data->xd, &tile_data->counts, decode_partition(tile_data->pbi, &tile_data->xd, &tile_data->counts,
@@ -1146,6 +1223,8 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols);
vpx_memset(cm->above_seg_context, 0, vpx_memset(cm->above_seg_context, 0,
sizeof(*cm->above_seg_context) * aligned_mi_cols); sizeof(*cm->above_seg_context) * aligned_mi_cols);
vpx_memset(cm->above_txfm_context, 0,
sizeof(*cm->above_txfm_context) * aligned_mi_cols);
// Load tile data into tile_buffers // Load tile data into tile_buffers
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
@@ -1516,6 +1595,12 @@ static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data,
read_tx_mode_probs(&fc->tx_probs, &r); read_tx_mode_probs(&fc->tx_probs, &r);
read_coef_probs(fc, cm->tx_mode, &r); read_coef_probs(fc, cm->tx_mode, &r);
for (k = 0; k < TXFM_PARTITION_CONTEXTS; ++k)
vp9_diff_update_prob(&r, &fc->txfm_partition_prob[k]);
for (k = 0; k < 3; ++k)
vp9_diff_update_prob(&r, &fc->intra_predictor_prob[k]);
for (k = 0; k < SKIP_CONTEXTS; ++k) for (k = 0; k < SKIP_CONTEXTS; ++k)
vp9_diff_update_prob(&r, &fc->skip_probs[k]); vp9_diff_update_prob(&r, &fc->skip_probs[k]);

View File

@@ -23,6 +23,51 @@
#include "vp9/decoder/vp9_decodeframe.h" #include "vp9/decoder/vp9_decodeframe.h"
#include "vp9/decoder/vp9_reader.h" #include "vp9/decoder/vp9_reader.h"
static PREDICTION_MODE read_intra_mode_exp(const VP9_COMMON *cm,
vp9_reader *r, const MODE_INFO *mi,
const MODE_INFO *above_mi,
const MODE_INFO *left_mi,
int block) {
const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block);
const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block);
PREDICTION_MODE i;
int count = 0;
if (above == left) {
if (vp9_read(r, cm->fc->intra_predictor_prob[0]))
return above;
for (i = DC_PRED; i < INTRA_MODES - 1; ++i) {
if (i == above)
continue;
if (vp9_read(r, vp9_intra_mode_prob[count]))
return i;
++count;
if (count == INTRA_MODES - 2)
return (i + 1) == above ? (i + 2) : (i + 1);
}
return (INTRA_MODES - 1);
} else {
if (vp9_read(r, cm->fc->intra_predictor_prob[1]))
return above;
if (vp9_read(r, cm->fc->intra_predictor_prob[2]))
return left;
for (i = DC_PRED; i < INTRA_MODES - 1; ++i) {
if (i == above || i == left)
continue;
if (vp9_read(r, vp9_intra_mode_prob[count + 1]))
return i;
++count;
if (count == INTRA_MODES - 3)
break;
}
for (++i; i <= INTRA_MODES - 1; ++i)
if (i != above && i != left)
return i;
return (INTRA_MODES - 1);
}
}
static PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) { static PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) {
return (PREDICTION_MODE)vp9_read_tree(r, vp9_intra_mode_tree, p); return (PREDICTION_MODE)vp9_read_tree(r, vp9_intra_mode_tree, p);
} }
@@ -60,6 +105,62 @@ static int read_segment_id(vp9_reader *r, const struct segmentation *seg) {
return vp9_read_tree(r, vp9_segment_tree, seg->tree_probs); return vp9_read_tree(r, vp9_segment_tree, seg->tree_probs);
} }
static void read_tx_size_inter(VP9_COMMON *cm, MACROBLOCKD *xd,
FRAME_COUNTS *counts, TX_SIZE tx_size,
int blk_row, int blk_col, vp9_reader *r) {
MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
int is_split = 0;
int tx_idx = (blk_row / 2) * 8 + (blk_col / 2);
int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bh = num_4x4_blocks_high_lookup[bsize];
int i, j;
int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col / 2),
xd->left_txfm_context + (blk_row / 2),
mbmi->max_tx_size,
tx_size);
if (xd->mb_to_bottom_edge < 0)
max_blocks_high += xd->mb_to_bottom_edge >> 5;
if (xd->mb_to_right_edge < 0)
max_blocks_wide += xd->mb_to_right_edge >> 5;
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
return;
is_split = vp9_read(r, cm->fc->txfm_partition_prob[ctx]);
if (!is_split) {
mbmi->inter_tx_size[tx_idx] = tx_size;
for (j = 0; j < bh / 2; ++j)
for (i = 0; i < bh / 2; ++i)
mbmi->inter_tx_size[tx_idx + j * 8 + i] = tx_size;
mbmi->tx_size = tx_size;
++counts->txfm_partition[ctx][0];
txfm_partition_update(xd->above_txfm_context + (blk_col / 2),
xd->left_txfm_context + (blk_row / 2), tx_size);
} else {
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bh = num_4x4_blocks_high_lookup[bsize];
int i;
++counts->txfm_partition[ctx][1];
if (tx_size == TX_8X8) {
mbmi->inter_tx_size[tx_idx] = TX_4X4;
mbmi->tx_size = TX_4X4;
txfm_partition_update(xd->above_txfm_context + (blk_col / 2),
xd->left_txfm_context + (blk_row / 2), TX_4X4);
return;
}
for (i = 0; i < 4; ++i) {
int offsetr = (i >> 1) * bh / 2;
int offsetc = (i & 0x01) * bh / 2;
read_tx_size_inter(cm, xd, counts, tx_size - 1,
blk_row + offsetr, blk_col + offsetc, r);
}
}
}
static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
FRAME_COUNTS *counts, FRAME_COUNTS *counts,
TX_SIZE max_tx_size, vp9_reader *r) { TX_SIZE max_tx_size, vp9_reader *r) {
@@ -208,24 +309,24 @@ static void read_intra_frame_mode_info(VP9_COMMON *const cm,
case BLOCK_4X4: case BLOCK_4X4:
for (i = 0; i < 4; ++i) for (i = 0; i < 4; ++i)
mi->bmi[i].as_mode = mi->bmi[i].as_mode =
read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, i)); read_intra_mode_exp(cm, r, mi, above_mi, left_mi, i);
mbmi->mode = mi->bmi[3].as_mode; mbmi->mode = mi->bmi[3].as_mode;
break; break;
case BLOCK_4X8: case BLOCK_4X8:
mi->bmi[0].as_mode = mi->bmi[2].as_mode = mi->bmi[0].as_mode = mi->bmi[2].as_mode =
read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); read_intra_mode_exp(cm, r, mi, above_mi, left_mi, 0);
mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode = mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode =
read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 1)); read_intra_mode_exp(cm, r, mi, above_mi, left_mi, 1);
break; break;
case BLOCK_8X4: case BLOCK_8X4:
mi->bmi[0].as_mode = mi->bmi[1].as_mode = mi->bmi[0].as_mode = mi->bmi[1].as_mode =
read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); read_intra_mode_exp(cm, r, mi, above_mi, left_mi, 0);
mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode = mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode =
read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 2)); read_intra_mode_exp(cm, r, mi, above_mi, left_mi, 2);
break; break;
default: default:
mbmi->mode = read_intra_mode(r, mbmi->mode =
get_y_mode_probs(mi, above_mi, left_mi, 0)); read_intra_mode_exp(cm, r, mi, above_mi, left_mi, 0);
} }
mbmi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mbmi->mode]); mbmi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mbmi->mode]);
@@ -569,13 +670,63 @@ static void read_inter_frame_mode_info(VP9Decoder *const pbi,
MODE_INFO *const mi = xd->mi[0].src_mi; MODE_INFO *const mi = xd->mi[0].src_mi;
MB_MODE_INFO *const mbmi = &mi->mbmi; MB_MODE_INFO *const mbmi = &mi->mbmi;
int inter_block; int inter_block;
BLOCK_SIZE bsize = mbmi->sb_type;
mbmi->mv[0].as_int = 0; mbmi->mv[0].as_int = 0;
mbmi->mv[1].as_int = 0; mbmi->mv[1].as_int = 0;
mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r); mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r);
mbmi->skip = read_skip(cm, xd, counts, mbmi->segment_id, r); mbmi->skip = read_skip(cm, xd, counts, mbmi->segment_id, r);
inter_block = read_is_inter_block(cm, xd, counts, mbmi->segment_id, r); inter_block = read_is_inter_block(cm, xd, counts, mbmi->segment_id, r);
mbmi->tx_size = read_tx_size(cm, xd, counts, !mbmi->skip || !inter_block, r); xd->above_txfm_context = cm->above_txfm_context + mi_col;
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
if (mbmi->sb_type >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
!mbmi->skip && inter_block) {
BLOCK_SIZE txb_size = txsize_to_bsize[mbmi->max_tx_size];
int bh = num_4x4_blocks_wide_lookup[txb_size];
int width = num_4x4_blocks_wide_lookup[bsize];
int height = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
for (idy = 0; idy < height; idy += bh)
for (idx = 0; idx < width; idx += bh)
read_tx_size_inter(cm, xd, counts, mbmi->max_tx_size,
idy, idx, r);
} else {
int i;
mbmi->tx_size = read_tx_size(cm, xd, counts,
!mbmi->skip || !inter_block, r);
for (i = 0; i < 64; ++i)
mbmi->inter_tx_size[i] = mbmi->tx_size;
}
if (mbmi->sb_type < BLOCK_8X8)
txfm_partition_update(xd->above_txfm_context, xd->left_txfm_context,
TX_4X4);
if (inter_block) {
if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && mbmi->skip) {
BLOCK_SIZE txb_size = txsize_to_bsize[mbmi->max_tx_size];
int bh = num_4x4_blocks_wide_lookup[txb_size];
int width = num_4x4_blocks_wide_lookup[bsize];
int height = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
for (idy = 0; idy < height; idy += bh)
for (idx = 0; idx < width; idx += bh)
txfm_partition_update(xd->above_txfm_context + (idx / 2),
xd->left_txfm_context + (idy / 2),
mbmi->max_tx_size);
}
} else {
BLOCK_SIZE txb_size = txsize_to_bsize[mbmi->max_tx_size];
int bh = num_4x4_blocks_wide_lookup[txb_size];
int width = num_4x4_blocks_wide_lookup[bsize];
int height = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
for (idy = 0; idy < height; idy += bh)
for (idx = 0; idx < width; idx += bh)
txfm_partition_update(xd->above_txfm_context + (idx / 2),
xd->left_txfm_context + (idy / 2), mbmi->tx_size);
}
if (inter_block) if (inter_block)
read_inter_block_mode_info(pbi, xd, counts, tile, mi, mi_row, mi_col, r); read_inter_block_mode_info(pbi, xd, counts, tile, mi, mi_row, mi_col, r);

View File

@@ -45,17 +45,6 @@ static INLINE int read_coeff(const vp9_prob *probs, int n, vp9_reader *r) {
return val; return val;
} }
static const vp9_tree_index coeff_subtree_high[TREE_SIZE(ENTROPY_TOKENS)] = {
2, 6, /* 0 = LOW_VAL */
-TWO_TOKEN, 4, /* 1 = TWO */
-THREE_TOKEN, -FOUR_TOKEN, /* 2 = THREE */
8, 10, /* 3 = HIGH_LOW */
-CATEGORY1_TOKEN, -CATEGORY2_TOKEN, /* 4 = CAT_ONE */
12, 14, /* 5 = CAT_THREEFOUR */
-CATEGORY3_TOKEN, -CATEGORY4_TOKEN, /* 6 = CAT_THREE */
-CATEGORY5_TOKEN, -CATEGORY6_TOKEN /* 7 = CAT_FIVE */
};
static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
FRAME_COUNTS *counts, PLANE_TYPE type, FRAME_COUNTS *counts, PLANE_TYPE type,
tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq, tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
@@ -147,7 +136,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
val = 1; val = 1;
} else { } else {
INCREMENT_COUNT(TWO_TOKEN); INCREMENT_COUNT(TWO_TOKEN);
token = vp9_read_tree(r, coeff_subtree_high, token = vp9_read_tree(r, vp9_coef_con_tree,
vp9_pareto8_full[prob[PIVOT_NODE] - 1]); vp9_pareto8_full[prob[PIVOT_NODE] - 1]);
switch (token) { switch (token) {
case TWO_TOKEN: case TWO_TOKEN:

View File

@@ -28,6 +28,94 @@ unsigned int vp9_avg_4x4_c(const uint8_t *s, int p) {
return (sum + 8) >> 4; return (sum + 8) >> 4;
} }
static void hadamard_col8(const int16_t *src_diff, int src_stride,
int16_t *coeff) {
int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride];
int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride];
int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride];
int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride];
int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride];
int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride];
int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride];
int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride];
int16_t c0 = b0 + b2;
int16_t c1 = b1 + b3;
int16_t c2 = b0 - b2;
int16_t c3 = b1 - b3;
int16_t c4 = b4 + b6;
int16_t c5 = b5 + b7;
int16_t c6 = b4 - b6;
int16_t c7 = b5 - b7;
coeff[0] = c0 + c4;
coeff[7] = c1 + c5;
coeff[3] = c2 + c6;
coeff[4] = c3 + c7;
coeff[2] = c0 - c4;
coeff[6] = c1 - c5;
coeff[1] = c2 - c6;
coeff[5] = c3 - c7;
}
void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride,
int16_t *coeff) {
int idx;
int16_t buffer[64];
int16_t *tmp_buf = &buffer[0];
for (idx = 0; idx < 8; ++idx) {
hadamard_col8(src_diff, src_stride, tmp_buf);
tmp_buf += 8;
++src_diff;
}
tmp_buf = &buffer[0];
for (idx = 0; idx < 8; ++idx) {
hadamard_col8(tmp_buf, 8, coeff);
coeff += 8;
++tmp_buf;
}
}
// In place 16x16 2D Hadamard transform
void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride,
int16_t *coeff) {
int idx;
for (idx = 0; idx < 4; ++idx) {
int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride
+ (idx & 0x01) * 8;
vp9_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64);
}
for (idx = 0; idx < 64; ++idx) {
int16_t a0 = coeff[0];
int16_t a1 = coeff[64];
int16_t a2 = coeff[128];
int16_t a3 = coeff[192];
int16_t b0 = a0 + a1;
int16_t b1 = a0 - a1;
int16_t b2 = a2 + a3;
int16_t b3 = a2 - a3;
coeff[0] = (b0 + b2) >> 1;
coeff[64] = (b1 + b3) >> 1;
coeff[128] = (b0 - b2) >> 1;
coeff[192] = (b1 - b3) >> 1;
++coeff;
}
}
int16_t vp9_satd_c(const int16_t *coeff, int length) {
int i;
int satd = 0;
for (i = 0; i < length; ++i)
satd += abs(coeff[i]);
return (int16_t)satd;
}
// Integer projection onto row vectors. // Integer projection onto row vectors.
void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref, void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref,
const int ref_stride, const int height) { const int ref_stride, const int height) {

View File

@@ -44,6 +44,53 @@ static const struct vp9_token partition_encodings[PARTITION_TYPES] =
static const struct vp9_token inter_mode_encodings[INTER_MODES] = static const struct vp9_token inter_mode_encodings[INTER_MODES] =
{{2, 2}, {6, 3}, {0, 1}, {7, 3}}; {{2, 2}, {6, 3}, {0, 1}, {7, 3}};
static void write_intra_mode_exp(const VP9_COMMON *cm,
vp9_writer *w, const MODE_INFO *mi,
const MODE_INFO *above_mi,
const MODE_INFO *left_mi, int block,
PREDICTION_MODE mode) {
const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block);
const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block);
PREDICTION_MODE i;
int count = 0;
if (above == left) {
vp9_write(w, mode == above, cm->fc->intra_predictor_prob[0]);
if (mode == above)
return;
for (i = DC_PRED; i < INTRA_MODES - 1; ++i) {
if (i == above)
continue;
vp9_write(w, i == mode, vp9_intra_mode_prob[count]);
++count;
if (i == mode)
return;
if (count == INTRA_MODES - 2)
return;
}
} else {
// above and left reference modes differ
vp9_write(w, mode == above, cm->fc->intra_predictor_prob[1]);
if (mode == above)
return;
vp9_write(w, mode == left, cm->fc->intra_predictor_prob[2]);
if (mode == left)
return;
for (i = DC_PRED; i < INTRA_MODES - 1; ++i) {
if (i == above || i == left)
continue;
vp9_write(w, i == mode, vp9_intra_mode_prob[count + 1]);
++count;
if (i == mode)
return;
if (count == INTRA_MODES - 3)
return;
}
}
}
static void write_intra_mode(vp9_writer *w, PREDICTION_MODE mode, static void write_intra_mode(vp9_writer *w, PREDICTION_MODE mode,
const vp9_prob *probs) { const vp9_prob *probs) {
vp9_write_token(w, vp9_intra_mode_tree, probs, &intra_mode_encodings[mode]); vp9_write_token(w, vp9_intra_mode_tree, probs, &intra_mode_encodings[mode]);
@@ -76,6 +123,52 @@ static void prob_diff_update(const vp9_tree_index *tree,
vp9_cond_prob_diff_update(w, &probs[i], branch_ct[i]); vp9_cond_prob_diff_update(w, &probs[i], branch_ct[i]);
} }
static void write_tx_size_inter(const VP9_COMMON *cm, MACROBLOCKD *xd,
TX_SIZE tx_size, int blk_row, int blk_col,
vp9_writer *w) {
MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
int tx_idx = (blk_row / 2) * 8 + (blk_col / 2);
int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col / 2),
xd->left_txfm_context + (blk_row / 2),
mbmi->max_tx_size,
tx_size);
if (xd->mb_to_bottom_edge < 0)
max_blocks_high += xd->mb_to_bottom_edge >> 5;
if (xd->mb_to_right_edge < 0)
max_blocks_wide += xd->mb_to_right_edge >> 5;
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
return;
// TODO(jingning): this assumes support of the possible 64x64 transform.
if (tx_size == mbmi->inter_tx_size[tx_idx]) {
vp9_write(w, 0, cm->fc->txfm_partition_prob[ctx]);
txfm_partition_update(xd->above_txfm_context + (blk_col / 2),
xd->left_txfm_context + (blk_row / 2), tx_size);
} else { // further split
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bh = num_4x4_blocks_high_lookup[bsize];
int i;
vp9_write(w, 1, cm->fc->txfm_partition_prob[ctx]);
if (tx_size == TX_8X8) {
txfm_partition_update(xd->above_txfm_context + (blk_col / 2),
xd->left_txfm_context + (blk_row / 2), TX_4X4);
return;
}
for (i = 0; i < 4; ++i) {
int offsetr = (i >> 1) * bh / 2;
int offsetc = (i & 0x01) * bh / 2;
write_tx_size_inter(cm, xd, tx_size - 1,
blk_row + offsetr, blk_col + offsetc, w);
}
}
}
static void write_selected_tx_size(const VP9_COMMON *cm, static void write_selected_tx_size(const VP9_COMMON *cm,
const MACROBLOCKD *xd, vp9_writer *w) { const MACROBLOCKD *xd, vp9_writer *w) {
TX_SIZE tx_size = xd->mi[0].src_mi->mbmi.tx_size; TX_SIZE tx_size = xd->mi[0].src_mi->mbmi.tx_size;
@@ -91,6 +184,22 @@ static void write_selected_tx_size(const VP9_COMMON *cm,
} }
} }
static void update_txfm_partition_probs(VP9_COMMON *cm, vp9_writer *w,
FRAME_COUNTS *counts) {
int k;
for (k = 0; k < TXFM_PARTITION_CONTEXTS; ++k)
vp9_cond_prob_diff_update(w, &cm->fc->txfm_partition_prob[k],
counts->txfm_partition[k]);
}
static void update_intra_predictor_probs(VP9_COMMON *cm, vp9_writer *w,
FRAME_COUNTS *counts) {
int k;
for (k = 0; k < 3; ++k)
vp9_cond_prob_diff_update(w, &cm->fc->intra_predictor_prob[k],
counts->intra_predictor[k]);
}
static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *xd, static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *xd,
int segment_id, const MODE_INFO *mi, vp9_writer *w) { int segment_id, const MODE_INFO *mi, vp9_writer *w) {
if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
@@ -238,8 +347,8 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
vp9_writer *w) { vp9_writer *w) {
VP9_COMMON *const cm = &cpi->common; VP9_COMMON *const cm = &cpi->common;
const nmv_context *nmvc = &cm->fc->nmvc; const nmv_context *nmvc = &cm->fc->nmvc;
const MACROBLOCK *const x = &cpi->td.mb; MACROBLOCK *x = &cpi->td.mb;
const MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *xd = &x->e_mbd;
const struct segmentation *const seg = &cm->seg; const struct segmentation *const seg = &cm->seg;
const MB_MODE_INFO *const mbmi = &mi->mbmi; const MB_MODE_INFO *const mbmi = &mi->mbmi;
const PREDICTION_MODE mode = mbmi->mode; const PREDICTION_MODE mode = mbmi->mode;
@@ -268,9 +377,48 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd)); vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd));
if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
!(is_inter && !(is_inter && skip)) {
(skip || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) { if (!is_inter) {
write_selected_tx_size(cm, xd, w); write_selected_tx_size(cm, xd, w);
} else {
BLOCK_SIZE txb_size = txsize_to_bsize[mbmi->max_tx_size];
int bh = num_4x4_blocks_wide_lookup[txb_size];
int width = num_4x4_blocks_wide_lookup[bsize];
int height = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
for (idy = 0; idy < height; idy += bh)
for (idx = 0; idx < width; idx += bh)
write_tx_size_inter(cm, xd, mbmi->max_tx_size, idy, idx, w);
}
}
if (bsize < BLOCK_8X8)
txfm_partition_update(xd->above_txfm_context,
xd->left_txfm_context, TX_4X4);
if (is_inter) {
if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && skip) {
BLOCK_SIZE txb_size = txsize_to_bsize[mbmi->max_tx_size];
int bh = num_4x4_blocks_wide_lookup[txb_size];
int width = num_4x4_blocks_wide_lookup[bsize];
int height = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
for (idy = 0; idy < height; idy += bh)
for (idx = 0; idx < width; idx += bh)
txfm_partition_update(xd->above_txfm_context + (idx / 2),
xd->left_txfm_context + (idy / 2),
mbmi->max_tx_size);
}
} else {
BLOCK_SIZE txb_size = txsize_to_bsize[mbmi->max_tx_size];
int bh = num_4x4_blocks_wide_lookup[txb_size];
int width = num_4x4_blocks_wide_lookup[bsize];
int height = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
for (idy = 0; idy < height; idy += bh)
for (idx = 0; idx < width; idx += bh)
txfm_partition_update(xd->above_txfm_context + (idx / 2),
xd->left_txfm_context + (idy / 2), mbmi->tx_size);
} }
if (!is_inter) { if (!is_inter) {
@@ -356,7 +504,7 @@ static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
write_selected_tx_size(cm, xd, w); write_selected_tx_size(cm, xd, w);
if (bsize >= BLOCK_8X8) { if (bsize >= BLOCK_8X8) {
write_intra_mode(w, mbmi->mode, get_y_mode_probs(mi, above_mi, left_mi, 0)); write_intra_mode_exp(cm, w, mi, above_mi, left_mi, 0, mbmi->mode);
} else { } else {
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
@@ -365,8 +513,8 @@ static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
for (idy = 0; idy < 2; idy += num_4x4_h) { for (idy = 0; idy < 2; idy += num_4x4_h) {
for (idx = 0; idx < 2; idx += num_4x4_w) { for (idx = 0; idx < 2; idx += num_4x4_w) {
const int block = idy * 2 + idx; const int block = idy * 2 + idx;
write_intra_mode(w, mi->bmi[block].as_mode, write_intra_mode_exp(cm, w, mi, above_mi, left_mi, block,
get_y_mode_probs(mi, above_mi, left_mi, block)); mi->bmi[block].as_mode);
} }
} }
} }
@@ -392,6 +540,8 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
if (frame_is_intra_only(cm)) { if (frame_is_intra_only(cm)) {
write_mb_modes_kf(cm, xd, xd->mi, w); write_mb_modes_kf(cm, xd, xd->mi, w);
} else { } else {
xd->above_txfm_context = cm->above_txfm_context + mi_col;
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
pack_inter_mode_mvs(cpi, m, w); pack_inter_mode_mvs(cpi, m, w);
} }
@@ -488,6 +638,7 @@ static void write_modes(VP9_COMP *cpi,
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) { mi_row += MI_BLOCK_SIZE) {
vp9_zero(xd->left_seg_context); vp9_zero(xd->left_seg_context);
vp9_zero(xd->left_txfm_context_buffer);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE) mi_col += MI_BLOCK_SIZE)
write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col,
@@ -931,6 +1082,8 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) *
mi_cols_aligned_to_sb(cm->mi_cols)); mi_cols_aligned_to_sb(cm->mi_cols));
vpx_memset(cm->above_txfm_context, 0, sizeof(*cm->above_txfm_context) *
mi_cols_aligned_to_sb(cm->mi_cols));
for (tile_row = 0; tile_row < tile_rows; tile_row++) { for (tile_row = 0; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) { for (tile_col = 0; tile_col < tile_cols; tile_col++) {
@@ -1159,6 +1312,8 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
encode_txfm_probs(cm, &header_bc, counts); encode_txfm_probs(cm, &header_bc, counts);
update_coef_probs(cpi, &header_bc); update_coef_probs(cpi, &header_bc);
update_txfm_partition_probs(cm, &header_bc, counts);
update_intra_predictor_probs(cm, &header_bc, counts);
update_skip_probs(cm, &header_bc, counts); update_skip_probs(cm, &header_bc, counts);
if (!frame_is_intra_only(cm)) { if (!frame_is_intra_only(cm)) {

View File

@@ -95,6 +95,7 @@ struct macroblock {
uint8_t zcoeff_blk[TX_SIZES][256]; uint8_t zcoeff_blk[TX_SIZES][256];
int skip; int skip;
uint8_t blk_skip[MAX_MB_PLANE][256];
int encode_breakout; int encode_breakout;

View File

@@ -28,6 +28,8 @@ static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk, CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_4x4_blk, sizeof(uint8_t))); vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
for (i = 0; i < MAX_MB_PLANE; ++i) { for (i = 0; i < MAX_MB_PLANE; ++i) {
CHECK_MEM_ERROR(cm, ctx->blk_skip[i],
vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
for (k = 0; k < 3; ++k) { for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k], CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
vpx_memalign(16, num_pix * sizeof(*ctx->coeff[i][k]))); vpx_memalign(16, num_pix * sizeof(*ctx->coeff[i][k])));
@@ -50,6 +52,8 @@ static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
vpx_free(ctx->zcoeff_blk); vpx_free(ctx->zcoeff_blk);
ctx->zcoeff_blk = 0; ctx->zcoeff_blk = 0;
for (i = 0; i < MAX_MB_PLANE; ++i) { for (i = 0; i < MAX_MB_PLANE; ++i) {
vpx_free(ctx->blk_skip[i]);
ctx->blk_skip[i] = 0;
for (k = 0; k < 3; ++k) { for (k = 0; k < 3; ++k) {
vpx_free(ctx->coeff[i][k]); vpx_free(ctx->coeff[i][k]);
ctx->coeff[i][k] = 0; ctx->coeff[i][k] = 0;

View File

@@ -21,6 +21,7 @@ struct ThreadData;
typedef struct { typedef struct {
MODE_INFO mic; MODE_INFO mic;
uint8_t *zcoeff_blk; uint8_t *zcoeff_blk;
uint8_t *blk_skip[MAX_MB_PLANE];
tran_low_t *coeff[MAX_MB_PLANE][3]; tran_low_t *coeff[MAX_MB_PLANE][3];
tran_low_t *qcoeff[MAX_MB_PLANE][3]; tran_low_t *qcoeff[MAX_MB_PLANE][3];
tran_low_t *dqcoeff[MAX_MB_PLANE][3]; tran_low_t *dqcoeff[MAX_MB_PLANE][3];

View File

@@ -99,7 +99,7 @@ static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
}; };
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi,
const struct buf_2d *ref, const struct buf_2d *ref,
BLOCK_SIZE bs) { BLOCK_SIZE bs) {
unsigned int sse; unsigned int sse;
@@ -109,7 +109,7 @@ static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
} }
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
static unsigned int high_get_sby_perpixel_variance( unsigned int vp9_high_get_sby_perpixel_variance(
VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd) { VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd) {
unsigned int var, sse; unsigned int var, sse;
switch (bd) { switch (bd) {
@@ -165,21 +165,6 @@ static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x,
return BLOCK_8X8; return BLOCK_8X8;
} }
static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi,
MACROBLOCK *x,
int mi_row,
int mi_col) {
unsigned int var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src,
mi_row, mi_col,
BLOCK_64X64);
if (var < 4)
return BLOCK_64X64;
else if (var < 10)
return BLOCK_32X32;
else
return BLOCK_16X16;
}
// Lighter version of set_offsets that only sets the mode info // Lighter version of set_offsets that only sets the mode info
// pointers. // pointers.
static INLINE void set_mode_info_offsets(VP9_COMMON *const cm, static INLINE void set_mode_info_offsets(VP9_COMMON *const cm,
@@ -482,9 +467,9 @@ void vp9_set_vbp_thresholds(VP9_COMP *cpi, int q) {
} else { } else {
VP9_COMMON *const cm = &cpi->common; VP9_COMMON *const cm = &cpi->common;
const int is_key_frame = (cm->frame_type == KEY_FRAME); const int is_key_frame = (cm->frame_type == KEY_FRAME);
const int threshold_multiplier = is_key_frame ? 80 : 4; const int threshold_multiplier = is_key_frame ? 20 : 1;
const int64_t threshold_base = (int64_t)(threshold_multiplier * const int64_t threshold_base = (int64_t)(threshold_multiplier *
vp9_convert_qindex_to_q(q, cm->bit_depth)); cpi->y_dequant[q][1]);
// TODO(marpan): Allow 4x4 partitions for inter-frames. // TODO(marpan): Allow 4x4 partitions for inter-frames.
// use_4x4_partition = (variance4x4downsample[i2 + j] == 1); // use_4x4_partition = (variance4x4downsample[i2 + j] == 1);
@@ -492,21 +477,20 @@ void vp9_set_vbp_thresholds(VP9_COMP *cpi, int q) {
// if variance of 16x16 block is very high, so use larger threshold // if variance of 16x16 block is very high, so use larger threshold
// for 16x16 (threshold_bsize_min) in that case. // for 16x16 (threshold_bsize_min) in that case.
if (is_key_frame) { if (is_key_frame) {
cpi->vbp_threshold = threshold_base >> 2; cpi->vbp_threshold_64x64 = threshold_base;
cpi->vbp_threshold_bsize_max = threshold_base; cpi->vbp_threshold_32x32 = threshold_base >> 2;
cpi->vbp_threshold_bsize_min = threshold_base << 2; cpi->vbp_threshold_16x16 = threshold_base >> 2;
cpi->vbp_threshold_16x16 = cpi->vbp_threshold; cpi->vbp_threshold_8x8 = threshold_base << 2;
cpi->vbp_bsize_min = BLOCK_8X8; cpi->vbp_bsize_min = BLOCK_8X8;
} else { } else {
cpi->vbp_threshold = threshold_base; cpi->vbp_threshold_32x32 = threshold_base;
if (cm->width <= 352 && cm->height <= 288) { if (cm->width <= 352 && cm->height <= 288) {
cpi->vbp_threshold_bsize_max = threshold_base >> 2; cpi->vbp_threshold_64x64 = threshold_base >> 2;
cpi->vbp_threshold_bsize_min = threshold_base << 3; cpi->vbp_threshold_16x16 = threshold_base << 3;
} else { } else {
cpi->vbp_threshold_bsize_max = threshold_base; cpi->vbp_threshold_64x64 = threshold_base;
cpi->vbp_threshold_bsize_min = threshold_base << cpi->oxcf.speed; cpi->vbp_threshold_16x16 = threshold_base << cpi->oxcf.speed;
} }
cpi->vbp_threshold_16x16 = cpi->vbp_threshold_bsize_min;
cpi->vbp_bsize_min = BLOCK_16X16; cpi->vbp_bsize_min = BLOCK_16X16;
} }
} }
@@ -560,18 +544,10 @@ static void choose_partitioning(VP9_COMP *cpi,
const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
unsigned int y_sad, y_sad_g; unsigned int y_sad, y_sad_g;
BLOCK_SIZE bsize; const BLOCK_SIZE bsize = BLOCK_32X32
if (mi_row + 4 < cm->mi_rows && mi_col + 4 < cm->mi_cols) + (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows);
bsize = BLOCK_64X64;
else if (mi_row + 4 < cm->mi_rows && mi_col + 4 >= cm->mi_cols)
bsize = BLOCK_32X64;
else if (mi_row + 4 >= cm->mi_rows && mi_col + 4 < cm->mi_cols)
bsize = BLOCK_64X32;
else
bsize = BLOCK_32X32;
assert(yv12 != NULL); assert(yv12 != NULL);
if (yv12_g && yv12_g != yv12) { if (yv12_g && yv12_g != yv12) {
vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
&cm->frame_refs[GOLDEN_FRAME - 1].sf); &cm->frame_refs[GOLDEN_FRAME - 1].sf);
@@ -692,7 +668,7 @@ static void choose_partitioning(VP9_COMP *cpi,
} }
if (is_key_frame || (low_res && if (is_key_frame || (low_res &&
vt.split[i].split[j].part_variances.none.variance > vt.split[i].split[j].part_variances.none.variance >
(cpi->vbp_threshold << 1))) { (cpi->vbp_threshold_32x32 << 1))) {
// Go down to 4x4 down-sampling for variance. // Go down to 4x4 down-sampling for variance.
variance4x4downsample[i2 + j] = 1; variance4x4downsample[i2 + j] = 1;
for (k = 0; k < 4; k++) { for (k = 0; k < 4; k++) {
@@ -757,7 +733,7 @@ static void choose_partitioning(VP9_COMP *cpi,
// If variance of this 32x32 block is above the threshold, force the block // If variance of this 32x32 block is above the threshold, force the block
// to split. This also forces a split on the upper (64x64) level. // to split. This also forces a split on the upper (64x64) level.
get_variance(&vt.split[i].part_variances.none); get_variance(&vt.split[i].part_variances.none);
if (vt.split[i].part_variances.none.variance > cpi->vbp_threshold) { if (vt.split[i].part_variances.none.variance > cpi->vbp_threshold_32x32) {
force_split[i + 1] = 1; force_split[i + 1] = 1;
force_split[0] = 1; force_split[0] = 1;
} }
@@ -769,7 +745,7 @@ static void choose_partitioning(VP9_COMP *cpi,
// we get to one that's got a variance lower than our threshold. // we get to one that's got a variance lower than our threshold.
if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
!set_vt_partitioning(cpi, xd, &vt, BLOCK_64X64, mi_row, mi_col, !set_vt_partitioning(cpi, xd, &vt, BLOCK_64X64, mi_row, mi_col,
cpi->vbp_threshold_bsize_max, BLOCK_16X16, cpi->vbp_threshold_64x64, BLOCK_16X16,
force_split[0])) { force_split[0])) {
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
const int x32_idx = ((i & 1) << 2); const int x32_idx = ((i & 1) << 2);
@@ -777,7 +753,7 @@ static void choose_partitioning(VP9_COMP *cpi,
const int i2 = i << 2; const int i2 = i << 2;
if (!set_vt_partitioning(cpi, xd, &vt.split[i], BLOCK_32X32, if (!set_vt_partitioning(cpi, xd, &vt.split[i], BLOCK_32X32,
(mi_row + y32_idx), (mi_col + x32_idx), (mi_row + y32_idx), (mi_col + x32_idx),
cpi->vbp_threshold, cpi->vbp_threshold_32x32,
BLOCK_16X16, force_split[i + 1])) { BLOCK_16X16, force_split[i + 1])) {
for (j = 0; j < 4; ++j) { for (j = 0; j < 4; ++j) {
const int x16_idx = ((j & 1) << 1); const int x16_idx = ((j & 1) << 1);
@@ -801,7 +777,7 @@ static void choose_partitioning(VP9_COMP *cpi,
BLOCK_8X8, BLOCK_8X8,
mi_row + y32_idx + y16_idx + y8_idx, mi_row + y32_idx + y16_idx + y8_idx,
mi_col + x32_idx + x16_idx + x8_idx, mi_col + x32_idx + x16_idx + x8_idx,
cpi->vbp_threshold_bsize_min, cpi->vbp_threshold_8x8,
BLOCK_8X8, 0)) { BLOCK_8X8, 0)) {
set_block_size(cpi, xd, set_block_size(cpi, xd,
(mi_row + y32_idx + y16_idx + y8_idx), (mi_row + y32_idx + y16_idx + y8_idx),
@@ -917,6 +893,10 @@ static void update_state(VP9_COMP *cpi, ThreadData *td,
vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk, vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
sizeof(uint8_t) * ctx->num_4x4_blk); sizeof(uint8_t) * ctx->num_4x4_blk);
for (i = 0; i < MAX_MB_PLANE; ++i)
vpx_memcpy(x->blk_skip[i], ctx->blk_skip[i],
sizeof(uint8_t) * ctx->num_4x4_blk);
if (!output_enabled) if (!output_enabled)
return; return;
@@ -1007,6 +987,7 @@ static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
mbmi->mode = ZEROMV; mbmi->mode = ZEROMV;
mbmi->tx_size = MIN(max_txsize_lookup[bsize], mbmi->tx_size = MIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[tx_mode]); tx_mode_to_biggest_tx_size[tx_mode]);
mbmi->max_tx_size = max_txsize_lookup[bsize];
mbmi->skip = 1; mbmi->skip = 1;
mbmi->uv_mode = DC_PRED; mbmi->uv_mode = DC_PRED;
mbmi->ref_frame[0] = LAST_FRAME; mbmi->ref_frame[0] = LAST_FRAME;
@@ -1055,6 +1036,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi,
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
mbmi = &xd->mi[0].src_mi->mbmi; mbmi = &xd->mi[0].src_mi->mbmi;
mbmi->sb_type = bsize; mbmi->sb_type = bsize;
mbmi->max_tx_size = max_txsize_lookup[bsize];
for (i = 0; i < MAX_MB_PLANE; ++i) { for (i = 0; i < MAX_MB_PLANE; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][0]; p[i].coeff = ctx->coeff_pbuf[i][0];
@@ -1073,13 +1055,15 @@ static void rd_pick_sb_modes(VP9_COMP *cpi,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
x->source_variance = x->source_variance =
high_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize, xd->bd); vp9_high_get_sby_perpixel_variance(cpi, &x->plane[0].src,
bsize, xd->bd);
} else { } else {
x->source_variance = x->source_variance =
get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
} }
#else #else
x->source_variance = get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); x->source_variance =
vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
// Save rdmult before it might be changed, so it can be restored later. // Save rdmult before it might be changed, so it can be restored later.
@@ -1103,8 +1087,9 @@ static void rd_pick_sb_modes(VP9_COMP *cpi,
} else if (aq_mode == CYCLIC_REFRESH_AQ) { } else if (aq_mode == CYCLIC_REFRESH_AQ) {
const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map
: cm->last_frame_seg_map; : cm->last_frame_seg_map;
// If segment 1, use rdmult for that segment. // If segment is boosted, use rdmult for that segment.
if (vp9_get_segment_id(cm, map, bsize, mi_row, mi_col)) if (cyclic_refresh_segment_id_boosted(
vp9_get_segment_id(cm, map, bsize, mi_row, mi_col)))
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
} }
@@ -1208,6 +1193,7 @@ static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col,
ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
TXFM_CONTEXT ta[8], TXFM_CONTEXT tl[8],
BLOCK_SIZE bsize) { BLOCK_SIZE bsize) {
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
int p; int p;
@@ -1232,12 +1218,17 @@ static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col,
sizeof(*xd->above_seg_context) * mi_width); sizeof(*xd->above_seg_context) * mi_width);
vpx_memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl, vpx_memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
sizeof(xd->left_seg_context[0]) * mi_height); sizeof(xd->left_seg_context[0]) * mi_height);
vpx_memcpy(xd->above_txfm_context, ta,
sizeof(*xd->above_txfm_context) * mi_width);
vpx_memcpy(xd->left_txfm_context, tl,
sizeof(*xd->left_txfm_context) * mi_height);
} }
static void save_context(MACROBLOCK *const x, int mi_row, int mi_col, static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
TXFM_CONTEXT ta[8], TXFM_CONTEXT tl[8],
BLOCK_SIZE bsize) { BLOCK_SIZE bsize) {
const MACROBLOCKD *const xd = &x->e_mbd; const MACROBLOCKD *const xd = &x->e_mbd;
int p; int p;
@@ -1264,6 +1255,10 @@ static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
sizeof(*xd->above_seg_context) * mi_width); sizeof(*xd->above_seg_context) * mi_width);
vpx_memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK), vpx_memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
sizeof(xd->left_seg_context[0]) * mi_height); sizeof(xd->left_seg_context[0]) * mi_height);
vpx_memcpy(ta, xd->above_txfm_context,
sizeof(*xd->above_txfm_context) * mi_width);
vpx_memcpy(tl, xd->left_txfm_context,
sizeof(*xd->left_txfm_context) * mi_height);
} }
static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, static void encode_b(VP9_COMP *cpi, const TileInfo *const tile,
@@ -1421,6 +1416,7 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
int index = block_row * mis + block_col; int index = block_row * mis + block_col;
mi_8x8[index].src_mi = mi_upper_left + index; mi_8x8[index].src_mi = mi_upper_left + index;
mi_8x8[index].src_mi->mbmi.sb_type = bsize; mi_8x8[index].src_mi->mbmi.sb_type = bsize;
mi_8x8[index].src_mi->mbmi.max_tx_size = max_txsize_lookup[bsize];
} }
} }
} else { } else {
@@ -1486,6 +1482,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
index = b_mi_row * mis + b_mi_col; index = b_mi_row * mis + b_mi_col;
mi_8x8[index].src_mi = mi_upper_left + index; mi_8x8[index].src_mi = mi_upper_left + index;
mi_8x8[index].src_mi->mbmi.sb_type = BLOCK_16X16; mi_8x8[index].src_mi->mbmi.sb_type = BLOCK_16X16;
mi_8x8[index].src_mi->mbmi.max_tx_size = max_txsize_lookup[BLOCK_16X16];
// TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition
// size to further improve quality. // size to further improve quality.
@@ -1508,6 +1505,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col; index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col;
mi_8x8[index].src_mi = mi_upper_left + index; mi_8x8[index].src_mi = mi_upper_left + index;
mi_8x8[index].src_mi->mbmi.sb_type = BLOCK_32X32; mi_8x8[index].src_mi->mbmi.sb_type = BLOCK_32X32;
mi_8x8[index].src_mi->mbmi.max_tx_size = max_txsize_lookup[BLOCK_32X32];
} }
} }
@@ -1520,6 +1518,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
if (is_larger_better) { if (is_larger_better) {
mi_8x8[0].src_mi = mi_upper_left; mi_8x8[0].src_mi = mi_upper_left;
mi_8x8[0].src_mi->mbmi.sb_type = BLOCK_64X64; mi_8x8[0].src_mi->mbmi.sb_type = BLOCK_64X64;
mi_8x8[0].src_mi->mbmi.max_tx_size = max_txsize_lookup[BLOCK_64X64];
} }
} }
} else { // partial in-image SB64 } else { // partial in-image SB64
@@ -1714,6 +1713,7 @@ static void rd_use_partition(VP9_COMP *cpi,
BLOCK_SIZE subsize; BLOCK_SIZE subsize;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8]; PARTITION_CONTEXT sl[8], sa[8];
TXFM_CONTEXT tl[8], ta[8];
RD_COST last_part_rdc, none_rdc, chosen_rdc; RD_COST last_part_rdc, none_rdc, chosen_rdc;
BLOCK_SIZE sub_subsize = BLOCK_4X4; BLOCK_SIZE sub_subsize = BLOCK_4X4;
int splits_below = 0; int splits_below = 0;
@@ -1735,7 +1735,9 @@ static void rd_use_partition(VP9_COMP *cpi,
subsize = get_subsize(bsize, partition); subsize = get_subsize(bsize, partition);
pc_tree->partitioning = partition; pc_tree->partitioning = partition;
save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); xd->above_txfm_context = cm->above_txfm_context + mi_col;
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
save_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) { if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
@@ -1775,7 +1777,7 @@ static void rd_use_partition(VP9_COMP *cpi,
none_rdc.dist); none_rdc.dist);
} }
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
mi_8x8[0].src_mi->mbmi.sb_type = bs_type; mi_8x8[0].src_mi->mbmi.sb_type = bs_type;
pc_tree->partitioning = partition; pc_tree->partitioning = partition;
} }
@@ -1886,7 +1888,7 @@ static void rd_use_partition(VP9_COMP *cpi,
BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
chosen_rdc.rate = 0; chosen_rdc.rate = 0;
chosen_rdc.dist = 0; chosen_rdc.dist = 0;
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
pc_tree->partitioning = PARTITION_SPLIT; pc_tree->partitioning = PARTITION_SPLIT;
// Split partition. // Split partition.
@@ -1896,17 +1898,18 @@ static void rd_use_partition(VP9_COMP *cpi,
RD_COST tmp_rdc; RD_COST tmp_rdc;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8]; PARTITION_CONTEXT sl[8], sa[8];
TXFM_CONTEXT tl[8], ta[8];
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue; continue;
save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); save_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
pc_tree->split[i]->partitioning = PARTITION_NONE; pc_tree->split[i]->partitioning = PARTITION_NONE;
rd_pick_sb_modes(cpi, tile_data, x, rd_pick_sb_modes(cpi, tile_data, x,
mi_row + y_idx, mi_col + x_idx, &tmp_rdc, mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
split_subsize, &pc_tree->split[i]->none, INT64_MAX); split_subsize, &pc_tree->split[i]->none, INT64_MAX);
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
vp9_rd_cost_reset(&chosen_rdc); vp9_rd_cost_reset(&chosen_rdc);
@@ -1946,7 +1949,9 @@ static void rd_use_partition(VP9_COMP *cpi,
chosen_rdc = none_rdc; chosen_rdc = none_rdc;
} }
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); xd->above_txfm_context = cm->above_txfm_context + mi_col;
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
// We must have chosen a partitioning and encoding or we'll fail later on. // We must have chosen a partitioning and encoding or we'll fail later on.
// No other opportunities for success. // No other opportunities for success.
@@ -2300,6 +2305,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8]; PARTITION_CONTEXT sl[8], sa[8];
TXFM_CONTEXT tl[8], ta[8];
TOKENEXTRA *tp_orig = *tp; TOKENEXTRA *tp_orig = *tp;
PICK_MODE_CONTEXT *ctx = &pc_tree->none; PICK_MODE_CONTEXT *ctx = &pc_tree->none;
int i, pl; int i, pl;
@@ -2365,7 +2371,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
partition_vert_allowed &= force_vert_split; partition_vert_allowed &= force_vert_split;
} }
save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); xd->above_txfm_context = cm->above_txfm_context + mi_col;
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
save_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
#if CONFIG_FP_MB_STATS #if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) { if (cpi->use_fp_mb_stats) {
@@ -2511,7 +2519,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
#endif #endif
} }
} }
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
} }
// store estimated motion vector // store estimated motion vector
@@ -2576,7 +2584,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
if (cpi->sf.less_rectangular_check) if (cpi->sf.less_rectangular_check)
do_rect &= !partition_none_allowed; do_rect &= !partition_none_allowed;
} }
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); xd->above_txfm_context = cm->above_txfm_context + mi_col;
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
} }
// PARTITION_HORZ // PARTITION_HORZ
@@ -2603,6 +2613,10 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
partition_none_allowed) partition_none_allowed)
pc_tree->horizontal[1].pred_interp_filter = pc_tree->horizontal[1].pred_interp_filter =
ctx->mic.mbmi.interp_filter; ctx->mic.mbmi.interp_filter;
xd->above_txfm_context = cm->above_txfm_context + mi_col;
xd->left_txfm_context = xd->left_txfm_context_buffer +
((mi_row + mi_step) & 0x07);
rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col,
&this_rdc, subsize, &pc_tree->horizontal[1], &this_rdc, subsize, &pc_tree->horizontal[1],
best_rdc.rdcost - sum_rdc.rdcost); best_rdc.rdcost - sum_rdc.rdcost);
@@ -2624,7 +2638,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
pc_tree->partitioning = PARTITION_HORZ; pc_tree->partitioning = PARTITION_HORZ;
} }
} }
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); xd->above_txfm_context = cm->above_txfm_context + mi_col;
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
} }
// PARTITION_VERT // PARTITION_VERT
if (partition_vert_allowed && do_rect) { if (partition_vert_allowed && do_rect) {
@@ -2650,6 +2666,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
partition_none_allowed) partition_none_allowed)
pc_tree->vertical[1].pred_interp_filter = pc_tree->vertical[1].pred_interp_filter =
ctx->mic.mbmi.interp_filter; ctx->mic.mbmi.interp_filter;
xd->above_txfm_context = cm->above_txfm_context + mi_col + mi_step;
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step,
&this_rdc, subsize, &this_rdc, subsize,
&pc_tree->vertical[1], best_rdc.rdcost - sum_rdc.rdcost); &pc_tree->vertical[1], best_rdc.rdcost - sum_rdc.rdcost);
@@ -2672,7 +2690,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
pc_tree->partitioning = PARTITION_VERT; pc_tree->partitioning = PARTITION_VERT;
} }
} }
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); xd->above_txfm_context = cm->above_txfm_context + mi_col;
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
} }
// TODO(jbb): This code added so that we avoid static analysis // TODO(jbb): This code added so that we avoid static analysis
@@ -2714,6 +2734,8 @@ static void encode_rd_sb_row(VP9_COMP *cpi,
// Initialize the left context for the new SB row // Initialize the left context for the new SB row
vpx_memset(&xd->left_context, 0, sizeof(xd->left_context)); vpx_memset(&xd->left_context, 0, sizeof(xd->left_context));
vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
vpx_memset(xd->left_txfm_context_buffer, 0,
sizeof(xd->left_txfm_context_buffer));
// Code each SB in the row // Code each SB in the row
for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end; for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
@@ -2802,6 +2824,8 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
2 * aligned_mi_cols * MAX_MB_PLANE); 2 * aligned_mi_cols * MAX_MB_PLANE);
vpx_memset(xd->above_seg_context, 0, vpx_memset(xd->above_seg_context, 0,
sizeof(*xd->above_seg_context) * aligned_mi_cols); sizeof(*xd->above_seg_context) * aligned_mi_cols);
vpx_memset(cm->above_txfm_context, 0,
sizeof(*xd->above_txfm_context) * aligned_mi_cols);
} }
static int check_dual_ref_flags(VP9_COMP *cpi) { static int check_dual_ref_flags(VP9_COMP *cpi) {
@@ -2842,6 +2866,9 @@ static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) {
static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) { static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) {
if (xd->lossless) if (xd->lossless)
return ONLY_4X4; return ONLY_4X4;
return TX_MODE_SELECT;
if (cpi->common.frame_type == KEY_FRAME && if (cpi->common.frame_type == KEY_FRAME &&
cpi->sf.use_nonrd_pick_mode && cpi->sf.use_nonrd_pick_mode &&
cpi->sf.partition_search_type == VAR_BASED_PARTITION) cpi->sf.partition_search_type == VAR_BASED_PARTITION)
@@ -2875,9 +2902,10 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi,
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
mbmi = &xd->mi[0].src_mi->mbmi; mbmi = &xd->mi[0].src_mi->mbmi;
mbmi->sb_type = bsize; mbmi->sb_type = bsize;
mbmi->max_tx_size = max_txsize_lookup[bsize];
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
if (mbmi->segment_id) if (cyclic_refresh_segment_id_boosted(mbmi->segment_id))
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
if (cm->frame_type == KEY_FRAME) if (cm->frame_type == KEY_FRAME)
@@ -3783,6 +3811,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_zero(rdc->filter_diff); vp9_zero(rdc->filter_diff);
vp9_zero(rdc->tx_select_diff); vp9_zero(rdc->tx_select_diff);
vp9_zero(rd_opt->tx_select_threshes); vp9_zero(rd_opt->tx_select_threshes);
vp9_zero(x->blk_skip);
xd->lossless = cm->base_qindex == 0 && xd->lossless = cm->base_qindex == 0 &&
cm->y_dc_delta_q == 0 && cm->y_dc_delta_q == 0 &&
@@ -3983,40 +4012,40 @@ void vp9_encode_frame(VP9_COMP *cpi) {
} }
} }
if (cm->tx_mode == TX_MODE_SELECT) { // if (cm->tx_mode == TX_MODE_SELECT) {
int count4x4 = 0; // int count4x4 = 0;
int count8x8_lp = 0, count8x8_8x8p = 0; // int count8x8_lp = 0, count8x8_8x8p = 0;
int count16x16_16x16p = 0, count16x16_lp = 0; // int count16x16_16x16p = 0, count16x16_lp = 0;
int count32x32 = 0; // int count32x32 = 0;
//
for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { // for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
count4x4 += counts->tx.p32x32[i][TX_4X4]; // count4x4 += counts->tx.p32x32[i][TX_4X4];
count4x4 += counts->tx.p16x16[i][TX_4X4]; // count4x4 += counts->tx.p16x16[i][TX_4X4];
count4x4 += counts->tx.p8x8[i][TX_4X4]; // count4x4 += counts->tx.p8x8[i][TX_4X4];
//
count8x8_lp += counts->tx.p32x32[i][TX_8X8]; // count8x8_lp += counts->tx.p32x32[i][TX_8X8];
count8x8_lp += counts->tx.p16x16[i][TX_8X8]; // count8x8_lp += counts->tx.p16x16[i][TX_8X8];
count8x8_8x8p += counts->tx.p8x8[i][TX_8X8]; // count8x8_8x8p += counts->tx.p8x8[i][TX_8X8];
//
count16x16_16x16p += counts->tx.p16x16[i][TX_16X16]; // count16x16_16x16p += counts->tx.p16x16[i][TX_16X16];
count16x16_lp += counts->tx.p32x32[i][TX_16X16]; // count16x16_lp += counts->tx.p32x32[i][TX_16X16];
count32x32 += counts->tx.p32x32[i][TX_32X32]; // count32x32 += counts->tx.p32x32[i][TX_32X32];
} // }
if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && // if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
count32x32 == 0) { // count32x32 == 0) {
cm->tx_mode = ALLOW_8X8; // cm->tx_mode = ALLOW_8X8;
reset_skip_tx_size(cm, TX_8X8); // reset_skip_tx_size(cm, TX_8X8);
} else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 && // } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 &&
count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) { // count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
cm->tx_mode = ONLY_4X4; // cm->tx_mode = ONLY_4X4;
reset_skip_tx_size(cm, TX_4X4); // reset_skip_tx_size(cm, TX_4X4);
} else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) { // } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
cm->tx_mode = ALLOW_32X32; // cm->tx_mode = ALLOW_32X32;
} else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) { // } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
cm->tx_mode = ALLOW_16X16; // cm->tx_mode = ALLOW_16X16;
reset_skip_tx_size(cm, TX_16X16); // reset_skip_tx_size(cm, TX_16X16);
} // }
} // }
} else { } else {
cm->reference_mode = SINGLE_REFERENCE; cm->reference_mode = SINGLE_REFERENCE;
encode_frame_internal(cpi); encode_frame_internal(cpi);
@@ -4042,6 +4071,57 @@ static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
++counts->uv_mode[y_mode][uv_mode]; ++counts->uv_mode[y_mode][uv_mode];
} }
static void update_txfm_count(MACROBLOCKD *xd, FRAME_COUNTS *counts,
TX_SIZE tx_size, int blk_row, int blk_col,
int dry_run) {
MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
int tx_idx = (blk_row / 2) * 8 + (blk_col / 2);
int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col / 2),
xd->left_txfm_context + (blk_row / 2),
mbmi->max_tx_size,
tx_size);
TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx];
if (xd->mb_to_bottom_edge < 0)
max_blocks_high += xd->mb_to_bottom_edge >> 5;
if (xd->mb_to_right_edge < 0)
max_blocks_wide += xd->mb_to_right_edge >> 5;
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
return;
if (tx_size == plane_tx_size) {
if (!dry_run)
++counts->txfm_partition[ctx][0];
mbmi->tx_size = tx_size;
txfm_partition_update(xd->above_txfm_context + (blk_col / 2),
xd->left_txfm_context + (blk_row / 2), tx_size);
} else {
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bh = num_4x4_blocks_high_lookup[bsize];
int i;
if (!dry_run)
++counts->txfm_partition[ctx][1];
if (tx_size == TX_8X8) {
mbmi->inter_tx_size[tx_idx] = TX_4X4;
mbmi->tx_size = TX_4X4;
txfm_partition_update(xd->above_txfm_context + (blk_col / 2),
xd->left_txfm_context + (blk_row / 2), TX_4X4);
return;
}
for (i = 0; i < 4; ++i) {
int offsetr = (i >> 1) * bh / 2;
int offsetc = (i & 0x01) * bh / 2;
update_txfm_count(xd, counts, tx_size - 1,
blk_row + offsetr, blk_col + offsetc, dry_run);
}
}
}
static void encode_superblock(VP9_COMP *cpi, ThreadData *td, static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
TOKENEXTRA **t, int output_enabled, TOKENEXTRA **t, int output_enabled,
int mi_row, int mi_col, BLOCK_SIZE bsize, int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -4101,13 +4181,18 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
vp9_encode_sb(x, MAX(bsize, BLOCK_8X8)); vp9_encode_sb(x, MAX(bsize, BLOCK_8X8));
vp9_tokenize_sb(cpi, td, t, !output_enabled, MAX(bsize, BLOCK_8X8));
vp9_tokenize_sb_inter(cpi, td, t, !output_enabled, MAX(bsize, BLOCK_8X8));
} }
xd->above_txfm_context = cm->above_txfm_context + mi_col;
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
if (output_enabled) { if (output_enabled) {
if (cm->tx_mode == TX_MODE_SELECT && if (cm->tx_mode == TX_MODE_SELECT &&
mbmi->sb_type >= BLOCK_8X8 && mbmi->sb_type >= BLOCK_8X8 &&
!(is_inter_block(mbmi) && (mbmi->skip || seg_skip))) { !(is_inter_block(mbmi) && (mbmi->skip || seg_skip))) {
if (!is_inter_block(mbmi))
++get_tx_counts(max_txsize_lookup[bsize], vp9_get_tx_size_context(xd), ++get_tx_counts(max_txsize_lookup[bsize], vp9_get_tx_size_context(xd),
&td->counts->tx)[mbmi->tx_size]; &td->counts->tx)[mbmi->tx_size];
} else { } else {
@@ -4126,7 +4211,80 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows) if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows)
mi_8x8[mis * y + x].src_mi->mbmi.tx_size = tx_size; mi_8x8[mis * y + x].src_mi->mbmi.tx_size = tx_size;
} }
if (!is_inter_block(mbmi)) {
// TODO(jingning): refactor this code for speed improvement.
const MODE_INFO *above_mi = xd->mi[-cm->mi_stride].src_mi;
const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;
if (bsize >= BLOCK_8X8) {
int idx, idy;
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
for (idy = 0; idy < 2; idy += num_4x4_h) {
for (idx = 0; idx < 2; idx += num_4x4_w) {
const int block = idy * 2 + idx;
const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi,
block);
const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi,
block);
if (above == left) {
++td->counts->intra_predictor[0][mi->bmi[block].as_mode == above];
} else {
++td->counts->intra_predictor[1][mi->bmi[block].as_mode == above];
if (mbmi->mode != above)
++td->counts->intra_predictor[1]
[mi->bmi[block].as_mode == left];
}
}
}
} else {
const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, 0);
const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, 0);
if (above == left) {
++td->counts->intra_predictor[0][mbmi->mode == above];
} else {
++td->counts->intra_predictor[1][mbmi->mode == above];
if (mbmi->mode != above)
++td->counts->intra_predictor[1][mbmi->mode == left];
}
}
}
++td->counts->tx.tx_totals[mbmi->tx_size]; ++td->counts->tx.tx_totals[mbmi->tx_size];
++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])]; ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])];
} }
if (is_inter_block(mbmi)) {
if (cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8) {
BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[bsize]];
int bh = num_4x4_blocks_wide_lookup[txb_size];
int width = num_4x4_blocks_wide_lookup[bsize];
int height = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
for (idy = 0; idy < height; idy += bh)
for (idx = 0; idx < width; idx += bh)
if (mbmi->skip || seg_skip)
txfm_partition_update(xd->above_txfm_context + (idx / 2),
xd->left_txfm_context + (idy / 2),
max_txsize_lookup[bsize]);
else
update_txfm_count(xd, td->counts, max_txsize_lookup[mbmi->sb_type],
idy, idx, !output_enabled);
}
} else {
TX_SIZE max_tx_size = max_txsize_lookup[bsize];
BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
int bh = num_4x4_blocks_wide_lookup[txb_size];
int width = num_4x4_blocks_wide_lookup[bsize];
int height = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
for (idy = 0; idy < height; idy += bh)
for (idx = 0; idx < width; idx += bh)
txfm_partition_update(xd->above_txfm_context + (idx / 2),
xd->left_txfm_context + (idy / 2), mbmi->tx_size);
}
if (mbmi->sb_type < BLOCK_8X8)
txfm_partition_update(xd->above_txfm_context,
xd->left_txfm_context, TX_4X4);
} }

View File

@@ -530,6 +530,94 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
} }
} }
void vp9_xform_quant_inter(MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int16_t *src_diff;
src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
case TX_32X32:
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
p->round, p->quant, p->quant_shift, qcoeff,
dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_16X16:
vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_8X8:
vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
default:
assert(0);
}
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
switch (tx_size) {
case TX_32X32:
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
case TX_16X16:
vp9_fdct16x16(src_diff, coeff, diff_stride);
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_8X8:
vp9_fdct8x8(src_diff, coeff, diff_stride);
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
default:
assert(0);
break;
}
}
void vp9_xform_quant(MACROBLOCK *x, int plane, int block, void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
@@ -619,7 +707,8 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
} }
} }
static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, static void encode_block_b(int blk_row, int blk_col, int plane,
int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) { TX_SIZE tx_size, void *arg) {
struct encode_b_args *const args = arg; struct encode_b_args *const args = arg;
MACROBLOCK *const x = args->x; MACROBLOCK *const x = args->x;
@@ -628,61 +717,60 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
struct macroblock_plane *const p = &x->plane[plane]; struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane];
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
int i, j;
uint8_t *dst; uint8_t *dst;
ENTROPY_CONTEXT *a, *l; ENTROPY_CONTEXT *a, *l;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); const int block_stride = num_4x4_blocks_wide_lookup[plane_bsize];
dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; int i;
a = &ctx->ta[plane][i]; dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
l = &ctx->tl[plane][j]; a = &ctx->ta[plane][blk_col];
l = &ctx->tl[plane][blk_row];
// TODO(jingning): per transformed block zero forcing only enabled for // TODO(jingning): per transformed block zero forcing only enabled for
// luma component. will integrate chroma components as well. // luma component. will integrate chroma components as well.
if (x->zcoeff_blk[tx_size][block] && plane == 0) { // if (x->zcoeff_blk[tx_size][block] && plane == 0) {
p->eobs[block] = 0; // p->eobs[block] = 0;
*a = *l = 0; // *a = *l = 0;
return; // return;
} // }
if (!x->skip_recode) { if (x->blk_skip[plane][blk_row * block_stride + blk_col] == 0)
if (x->quant_fp) { vp9_xform_quant_inter(x, plane, block, blk_row, blk_col,
// Encoding process for rtc mode plane_bsize, tx_size);
if (x->skip_txfm[0] == 1 && plane == 0) { else
// skip forward transform
p->eobs[block] = 0; p->eobs[block] = 0;
*a = *l = 0;
return;
} else {
vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
}
} else {
if (max_txsize_lookup[plane_bsize] == tx_size) {
int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
if (x->skip_txfm[txfm_blk_index] == 0) {
// full forward transform and quantization
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
} else if (x->skip_txfm[txfm_blk_index]== 2) {
// fast path forward transform and quantization
vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
} else {
// skip forward transform
p->eobs[block] = 0;
*a = *l = 0;
return;
}
} else {
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
}
}
}
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { if (x->optimize) {
const int ctx = combine_entropy_contexts(*a, *l); int context;
*a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0; switch (tx_size) {
case TX_4X4:
break;
case TX_8X8:
a[0] = !!*(const uint16_t *)&a[0];
l[0] = !!*(const uint16_t *)&l[0];
break;
case TX_16X16:
a[0] = !!*(const uint32_t *)&a[0];
l[0] = !!*(const uint32_t *)&l[0];
break;
case TX_32X32:
a[0] = !!*(const uint64_t *)&a[0];
l[0] = !!*(const uint64_t *)&l[0];
break;
default:
assert(0 && "Invalid transform size.");
break;
}
context = combine_entropy_contexts(*a, *l);
*a = *l = optimize_b(x, plane, block, tx_size, context) > 0;
} else { } else {
*a = *l = p->eobs[block] > 0; *a = *l = p->eobs[block] > 0;
} }
for (i = 0; i < (1 << tx_size); ++i) {
a[i] = a[0];
l[i] = l[0];
}
if (p->eobs[block]) if (p->eobs[block])
*(args->skip) = 0; *(args->skip) = 0;
@@ -739,6 +827,46 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
} }
} }
static void encode_block_inter(int blk_row, int blk_col,
int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct encode_b_args *const args = arg;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
struct macroblockd_plane *const pd = &xd->plane[plane];
int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
(blk_col >> (1 - pd->subsampling_x));
TX_SIZE plane_tx_size = plane ?
get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], plane_bsize, 0, 0) :
mbmi->inter_tx_size[tx_idx];
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
if (xd->mb_to_bottom_edge < 0)
max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
if (xd->mb_to_right_edge < 0)
max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
return;
if (tx_size == plane_tx_size) {
encode_block_b(blk_row, blk_col, plane, block, plane_bsize, tx_size, arg);
} else {
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bh = num_4x4_blocks_high_lookup[bsize];
int step = 1 << (2 *(tx_size - 1));
int i;
for (i = 0; i < 4; ++i) {
int offsetr = (i >> 1) * bh / 2;
int offsetc = (i & 0x01) * bh / 2;
encode_block_inter(blk_row + offsetr, blk_col + offsetc,
plane, block + i * step, plane_bsize,
tx_size - 1, arg);
}
}
}
static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) { TX_SIZE tx_size, void *arg) {
MACROBLOCK *const x = (MACROBLOCK *)arg; MACROBLOCK *const x = (MACROBLOCK *)arg;
@@ -783,18 +911,27 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
return; return;
for (plane = 0; plane < MAX_MB_PLANE; ++plane) { for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
if (!x->skip_recode) const struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
int bh = num_4x4_blocks_wide_lookup[txb_size];
int idx, idy;
int block = 0;
int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
vp9_subtract_plane(x, bsize, plane); vp9_subtract_plane(x, bsize, plane);
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { vp9_get_entropy_contexts(bsize, TX_4X4, pd, ctx.ta[plane], ctx.tl[plane]);
const struct macroblockd_plane* const pd = &xd->plane[plane];
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
vp9_get_entropy_contexts(bsize, tx_size, pd,
ctx.ta[plane], ctx.tl[plane]);
}
vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block, for (idy = 0; idy < mi_height; idy += bh) {
&arg); for (idx = 0; idx < mi_width; idx += bh) {
encode_block_inter(idy, idx, plane, block, plane_bsize,
max_txsize_lookup[plane_bsize], &arg);
block += step;
}
}
} }
} }
@@ -820,6 +957,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
const int src_stride = p->src.stride; const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride; const int dst_stride = pd->dst.stride;
int i, j; int i, j;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
dst = &pd->dst.buf[4 * (j * dst_stride + i)]; dst = &pd->dst.buf[4 * (j * dst_stride + i)];
src = &p->src.buf[4 * (j * src_stride + i)]; src = &p->src.buf[4 * (j * src_stride + i)];

View File

@@ -29,6 +29,9 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size); BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size); BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
void vp9_xform_quant_inter(MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
void vp9_xform_quant(MACROBLOCK *x, int plane, int block, void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size); BLOCK_SIZE plane_bsize, TX_SIZE tx_size);

View File

@@ -126,14 +126,25 @@ void vp9_apply_active_map(VP9_COMP *cpi) {
assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE); assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE);
if (frame_is_intra_only(&cpi->common)) {
cpi->active_map.enabled = 0;
cpi->active_map.update = 1;
}
if (cpi->active_map.update) { if (cpi->active_map.update) {
if (cpi->active_map.enabled) { if (cpi->active_map.enabled) {
for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i) for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i]; if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i];
vp9_enable_segmentation(seg); vp9_enable_segmentation(seg);
vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP); vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
// Setting the data to -MAX_LOOP_FILTER will result in the computed loop
// filter level being zero regardless of the value of seg->abs_delta.
vp9_set_segdata(seg, AM_SEGMENT_ID_INACTIVE,
SEG_LVL_ALT_LF, -MAX_LOOP_FILTER);
} else { } else {
vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP); vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
if (seg->enabled) { if (seg->enabled) {
seg->update_data = 1; seg->update_data = 1;
seg->update_map = 1; seg->update_map = 1;
@@ -172,6 +183,33 @@ int vp9_set_active_map(VP9_COMP* cpi,
} }
} }
int vp9_get_active_map(VP9_COMP* cpi,
unsigned char* new_map_16x16,
int rows,
int cols) {
if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols &&
new_map_16x16) {
unsigned char* const seg_map_8x8 = cpi->segmentation_map;
const int mi_rows = cpi->common.mi_rows;
const int mi_cols = cpi->common.mi_cols;
vpx_memset(new_map_16x16, !cpi->active_map.enabled, rows * cols);
if (cpi->active_map.enabled) {
int r, c;
for (r = 0; r < mi_rows; ++r) {
for (c = 0; c < mi_cols; ++c) {
// Cyclic refresh segments are considered active despite not having
// AM_SEGMENT_ID_ACTIVE
new_map_16x16[(r >> 1) * cols + (c >> 1)] |=
seg_map_8x8[r * mi_cols + c] != AM_SEGMENT_ID_INACTIVE;
}
}
}
return 0;
} else {
return -1;
}
}
void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) { void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) {
MACROBLOCK *const mb = &cpi->td.mb; MACROBLOCK *const mb = &cpi->td.mb;
cpi->common.allow_high_precision_mv = allow_high_precision_mv; cpi->common.allow_high_precision_mv = allow_high_precision_mv;
@@ -303,7 +341,10 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
vpx_free(cpi->active_map.map); vpx_free(cpi->active_map.map);
cpi->active_map.map = NULL; cpi->active_map.map = NULL;
vp9_free_ref_frame_buffers(cm); vp9_free_ref_frame_buffers(cm->buffer_pool);
#if CONFIG_VP9_POSTPROC
vp9_free_postproc_buffers(cm);
#endif
vp9_free_context_buffers(cm); vp9_free_context_buffers(cm);
vp9_free_frame_buffer(&cpi->last_frame_uf); vp9_free_frame_buffer(&cpi->last_frame_uf);
@@ -1908,6 +1949,10 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
#endif #endif
vp9_remove_common(cm); vp9_remove_common(cm);
vp9_free_ref_frame_buffers(cm->buffer_pool);
#if CONFIG_VP9_POSTPROC
vp9_free_postproc_buffers(cm);
#endif
vpx_free(cpi); vpx_free(cpi);
#if CONFIG_VP9_TEMPORAL_DENOISING #if CONFIG_VP9_TEMPORAL_DENOISING
@@ -3408,6 +3453,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
MAX_MODES * sizeof(*cpi->mode_chosen_counts)); MAX_MODES * sizeof(*cpi->mode_chosen_counts));
#endif #endif
cpi->dummy_writing = 1;
if (cpi->sf.recode_loop == DISALLOW_RECODE) { if (cpi->sf.recode_loop == DISALLOW_RECODE) {
encode_without_recode_loop(cpi); encode_without_recode_loop(cpi);
} else { } else {
@@ -3453,6 +3499,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// Pick the loop filter level for the frame. // Pick the loop filter level for the frame.
loopfilter_frame(cpi, cm); loopfilter_frame(cpi, cm);
cpi->dummy_writing = 0;
// build the bitstream // build the bitstream
vp9_pack_bitstream(cpi, dest, size); vp9_pack_bitstream(cpi, dest, size);

View File

@@ -417,6 +417,7 @@ typedef struct VP9_COMP {
int b_calculate_ssimg; int b_calculate_ssimg;
#endif #endif
int dummy_writing;
int b_calculate_psnr; int b_calculate_psnr;
int droppable; int droppable;
@@ -460,10 +461,10 @@ typedef struct VP9_COMP {
int resize_pending; int resize_pending;
// VAR_BASED_PARTITION thresholds // VAR_BASED_PARTITION thresholds
int64_t vbp_threshold; int64_t vbp_threshold_64x64;
int64_t vbp_threshold_bsize_min; int64_t vbp_threshold_32x32;
int64_t vbp_threshold_bsize_max;
int64_t vbp_threshold_16x16; int64_t vbp_threshold_16x16;
int64_t vbp_threshold_8x8;
BLOCK_SIZE vbp_bsize_min; BLOCK_SIZE vbp_bsize_min;
// Multi-threading // Multi-threading
@@ -508,6 +509,8 @@ int vp9_update_entropy(VP9_COMP *cpi, int update);
int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols); int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols);
int vp9_get_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols);
int vp9_set_internal_size(VP9_COMP *cpi, int vp9_set_internal_size(VP9_COMP *cpi,
VPX_SCALING horiz_mode, VPX_SCALING vert_mode); VPX_SCALING horiz_mode, VPX_SCALING vert_mode);

View File

@@ -38,7 +38,7 @@
#define OUTPUT_FPF 0 #define OUTPUT_FPF 0
#define ARF_STATS_OUTPUT 0 #define ARF_STATS_OUTPUT 0
#define GROUP_ADAPTIVE_MAXQ 0 #define GROUP_ADAPTIVE_MAXQ 1
#define BOOST_BREAKOUT 12.5 #define BOOST_BREAKOUT 12.5
#define BOOST_FACTOR 12.5 #define BOOST_FACTOR 12.5
@@ -61,12 +61,9 @@
#define RC_FACTOR_MAX 1.75 #define RC_FACTOR_MAX 1.75
#define INTRA_WEIGHT_EXPERIMENT 0
#if INTRA_WEIGHT_EXPERIMENT
#define NCOUNT_INTRA_THRESH 8192 #define NCOUNT_INTRA_THRESH 8192
#define NCOUNT_INTRA_FACTOR 3 #define NCOUNT_INTRA_FACTOR 3
#define NCOUNT_FRAME_II_THRESH 5.0 #define NCOUNT_FRAME_II_THRESH 5.0
#endif
#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001) #define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001)
@@ -832,7 +829,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
// Keep a count of cases where the inter and intra were very close // Keep a count of cases where the inter and intra were very close
// and very low. This helps with scene cut detection for example in // and very low. This helps with scene cut detection for example in
// cropped clips with black bars at the sides or top and bottom. // cropped clips with black bars at the sides or top and bottom.
#if INTRA_WEIGHT_EXPERIMENT
if (((this_error - intrapenalty) * 9 <= motion_error * 10) && if (((this_error - intrapenalty) * 9 <= motion_error * 10) &&
(this_error < (2 * intrapenalty))) { (this_error < (2 * intrapenalty))) {
neutral_count += 1.0; neutral_count += 1.0;
@@ -843,12 +839,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
neutral_count += (double)motion_error / neutral_count += (double)motion_error /
DOUBLE_DIVIDE_CHECK((double)this_error); DOUBLE_DIVIDE_CHECK((double)this_error);
} }
#else
if (((this_error - intrapenalty) * 9 <= motion_error * 10) &&
(this_error < (2 * intrapenalty))) {
neutral_count += 1.0;
}
#endif
mv.row *= 8; mv.row *= 8;
mv.col *= 8; mv.col *= 8;
@@ -1291,11 +1281,10 @@ static double get_sr_decay_rate(const VP9_COMP *cpi,
frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / 2); frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / 2);
modified_pct_inter = frame->pcnt_inter; modified_pct_inter = frame->pcnt_inter;
#if INTRA_WEIGHT_EXPERIMENT
if ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) < if ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) <
(double)NCOUNT_FRAME_II_THRESH) (double)NCOUNT_FRAME_II_THRESH) {
modified_pct_inter = frame->pcnt_inter - frame->pcnt_neutral; modified_pct_inter = frame->pcnt_inter - frame->pcnt_neutral;
#endif }
modified_pcnt_intra = 100 * (1.0 - modified_pct_inter); modified_pcnt_intra = 100 * (1.0 - modified_pct_inter);

View File

@@ -20,9 +20,11 @@
#include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_reconintra.h"
#include "vp9/encoder/vp9_cost.h"
#include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_pickmode.h" #include "vp9/encoder/vp9_pickmode.h"
#include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_ratectrl.h"
@@ -188,6 +190,8 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
cond_cost_list(cpi, cost_list), cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost, x->nmvjointcost, x->mvcost,
&dis, &x->pred_sse[ref], NULL, 0, 0); &dis, &x->pred_sse[ref], NULL, 0, 0);
*rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
} }
if (scaled_ref_frame) { if (scaled_ref_frame) {
@@ -198,6 +202,247 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
return rv; return rv;
} }
static void block_variance(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
int w, int h, unsigned int *sse, int *sum,
int block_size, unsigned int *sse8x8,
int *sum8x8, unsigned int *var8x8) {
int i, j, k = 0;
*sse = 0;
*sum = 0;
for (i = 0; i < h; i += block_size) {
for (j = 0; j < w; j += block_size) {
vp9_get8x8var(src + src_stride * i + j, src_stride,
ref + ref_stride * i + j, ref_stride,
&sse8x8[k], &sum8x8[k]);
*sse += sse8x8[k];
*sum += sum8x8[k];
var8x8[k] = sse8x8[k] - (((unsigned int)sum8x8[k] * sum8x8[k]) >> 6);
k++;
}
}
}
static void calculate_variance(int bw, int bh, TX_SIZE tx_size,
unsigned int *sse_i, int *sum_i,
unsigned int *var_o, unsigned int *sse_o,
int *sum_o) {
const BLOCK_SIZE unit_size = txsize_to_bsize[tx_size];
const int nw = 1 << (bw - b_width_log2_lookup[unit_size]);
const int nh = 1 << (bh - b_height_log2_lookup[unit_size]);
int i, j, k = 0;
for (i = 0; i < nh; i += 2) {
for (j = 0; j < nw; j += 2) {
sse_o[k] = sse_i[i * nw + j] + sse_i[i * nw + j + 1] +
sse_i[(i + 1) * nw + j] + sse_i[(i + 1) * nw + j + 1];
sum_o[k] = sum_i[i * nw + j] + sum_i[i * nw + j + 1] +
sum_i[(i + 1) * nw + j] + sum_i[(i + 1) * nw + j + 1];
var_o[k] = sse_o[k] - (((unsigned int)sum_o[k] * sum_o[k]) >>
(b_width_log2_lookup[unit_size] +
b_height_log2_lookup[unit_size] + 6));
k++;
}
}
}
static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum,
unsigned int *var_y, unsigned int *sse_y,
int mi_row, int mi_col, int *early_term) {
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
unsigned int sse;
int rate;
int64_t dist;
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
const uint32_t dc_quant = pd->dequant[0];
const uint32_t ac_quant = pd->dequant[1];
const int64_t dc_thr = dc_quant * dc_quant >> 6;
const int64_t ac_thr = ac_quant * ac_quant >> 6;
unsigned int var;
int sum;
int skip_dc = 0;
const int bw = b_width_log2_lookup[bsize];
const int bh = b_height_log2_lookup[bsize];
const int num8x8 = 1 << (bw + bh - 2);
unsigned int sse8x8[64] = {0};
int sum8x8[64] = {0};
unsigned int var8x8[64] = {0};
TX_SIZE tx_size;
int i, k;
// Calculate variance for whole partition, and also save 8x8 blocks' variance
// to be used in following transform skipping test.
block_variance(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
4 << bw, 4 << bh, &sse, &sum, 8, sse8x8, sum8x8, var8x8);
var = sse - (((int64_t)sum * sum) >> (bw + bh + 4));
*var_y = var;
*sse_y = sse;
if (cpi->common.tx_mode == TX_MODE_SELECT) {
if (sse > (var << 2))
tx_size = MIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
else
tx_size = TX_8X8;
if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
cyclic_refresh_segment_id_boosted(xd->mi[0].src_mi->mbmi.segment_id))
tx_size = TX_8X8;
else if (tx_size > TX_16X16)
tx_size = TX_16X16;
}
} else {
tx_size = MIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
}
assert(tx_size >= TX_8X8);
xd->mi[0].src_mi->mbmi.tx_size = tx_size;
// Evaluate if the partition block is a skippable block in Y plane.
{
unsigned int sse16x16[16] = {0};
int sum16x16[16] = {0};
unsigned int var16x16[16] = {0};
const int num16x16 = num8x8 >> 2;
unsigned int sse32x32[4] = {0};
int sum32x32[4] = {0};
unsigned int var32x32[4] = {0};
const int num32x32 = num8x8 >> 4;
int ac_test = 1;
int dc_test = 1;
const int num = (tx_size == TX_8X8) ? num8x8 :
((tx_size == TX_16X16) ? num16x16 : num32x32);
const unsigned int *sse_tx = (tx_size == TX_8X8) ? sse8x8 :
((tx_size == TX_16X16) ? sse16x16 : sse32x32);
const unsigned int *var_tx = (tx_size == TX_8X8) ? var8x8 :
((tx_size == TX_16X16) ? var16x16 : var32x32);
// Calculate variance if tx_size > TX_8X8
if (tx_size >= TX_16X16)
calculate_variance(bw, bh, TX_8X8, sse8x8, sum8x8, var16x16, sse16x16,
sum16x16);
if (tx_size == TX_32X32)
calculate_variance(bw, bh, TX_16X16, sse16x16, sum16x16, var32x32,
sse32x32, sum32x32);
// Skipping test
x->skip_txfm[0] = 0;
for (k = 0; k < num; k++)
// Check if all ac coefficients can be quantized to zero.
if (!(var_tx[k] < ac_thr || var == 0)) {
ac_test = 0;
break;
}
for (k = 0; k < num; k++)
// Check if dc coefficient can be quantized to zero.
if (!(sse_tx[k] - var_tx[k] < dc_thr || sse == var)) {
dc_test = 0;
break;
}
if (ac_test) {
x->skip_txfm[0] = 2;
if (dc_test)
x->skip_txfm[0] = 1;
} else if (dc_test) {
skip_dc = 1;
}
}
if (x->skip_txfm[0] == 1) {
int skip_uv[2] = {0};
unsigned int var_uv[2];
unsigned int sse_uv[2];
*out_rate_sum = 0;
*out_dist_sum = sse << 4;
// Transform skipping test in UV planes.
for (i = 1; i <= 2; i++) {
struct macroblock_plane *const p = &x->plane[i];
struct macroblockd_plane *const pd = &xd->plane[i];
const TX_SIZE uv_tx_size = get_uv_tx_size(&xd->mi[0].src_mi->mbmi, pd);
const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size];
const int sf = (bw - b_width_log2_lookup[unit_size]) +
(bh - b_height_log2_lookup[unit_size]);
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf);
const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf);
int j = i - 1;
vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i);
var_uv[j] = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, &sse_uv[j]);
if (var_uv[j] < uv_ac_thr || var_uv[j] == 0) {
if (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j])
skip_uv[j] = 1;
}
}
// If the transform in YUV planes are skippable, the mode search checks
// fewer inter modes and doesn't check intra modes.
if (skip_uv[0] & skip_uv[1]) {
*early_term = 1;
}
return;
}
if (!skip_dc) {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
dc_quant >> (xd->bd - 5), &rate, &dist);
} else {
vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
dc_quant >> 3, &rate, &dist);
}
#else
vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
dc_quant >> 3, &rate, &dist);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
if (!skip_dc) {
*out_rate_sum = rate >> 1;
*out_dist_sum = dist << 3;
} else {
*out_rate_sum = 0;
*out_dist_sum = (sse - var) << 4;
}
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
ac_quant >> (xd->bd - 5), &rate, &dist);
} else {
vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
ac_quant >> 3, &rate, &dist);
}
#else
vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
ac_quant >> 3, &rate, &dist);
#endif // CONFIG_VP9_HIGHBITDEPTH
*out_rate_sum += rate;
*out_dist_sum += dist << 4;
}
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd, MACROBLOCK *x, MACROBLOCKD *xd,
@@ -312,6 +557,132 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
*out_dist_sum += dist << 4; *out_dist_sum += dist << 4;
} }
#if CONFIG_VP9_HIGHBITDEPTH
static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
int *skippable, int64_t *sse, int plane,
BLOCK_SIZE bsize, TX_SIZE tx_size) {
MACROBLOCKD *xd = &x->e_mbd;
unsigned int var_y, sse_y;
(void)plane;
(void)tx_size;
model_rd_for_sb_y(cpi, bsize, x, xd, rate, dist, &var_y, &sse_y);
*sse = INT_MAX;
*skippable = 0;
return;
}
#else
static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
int *skippable, int64_t *sse, int plane,
BLOCK_SIZE bsize, TX_SIZE tx_size) {
MACROBLOCKD *xd = &x->e_mbd;
const struct macroblockd_plane *pd = &xd->plane[plane];
const struct macroblock_plane *const p = &x->plane[plane];
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
const int step = 1 << (tx_size << 1);
const int block_step = (1 << tx_size);
int block = 0, r, c;
int shift = tx_size == TX_32X32 ? 0 : 2;
const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 :
xd->mb_to_right_edge >> (5 + pd->subsampling_x));
const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 :
xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
int eob_cost = 0;
(void)cpi;
vp9_subtract_plane(x, bsize, plane);
*skippable = 1;
// Keep track of the row and column of the blocks we use so that we know
// if we are in the unrestricted motion border.
for (r = 0; r < max_blocks_high; r += block_step) {
for (c = 0; c < num_4x4_w; c += block_step) {
if (c < max_blocks_wide) {
const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[bsize];
const int16_t *src_diff;
src_diff = &p->src_diff[(r * diff_stride + c) << 2];
switch (tx_size) {
case TX_32X32:
vp9_fdct32x32_rd(src_diff, coeff, diff_stride);
vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
p->round_fp, p->quant_fp, p->quant_shift,
qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_16X16:
vp9_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff);
vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_8X8:
vp9_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff);
vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
default:
assert(0);
break;
}
*skippable &= (*eob == 0);
eob_cost += 1;
}
block += step;
}
}
if (*skippable && *sse < INT64_MAX) {
*rate = 0;
*dist = (*sse << 6) >> shift;
*sse = *dist;
return;
}
block = 0;
*rate = 0;
*dist = 0;
*sse = (*sse << 6) >> shift;
for (r = 0; r < max_blocks_high; r += block_step) {
for (c = 0; c < num_4x4_w; c += block_step) {
if (c < max_blocks_wide) {
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
if (*eob == 1)
*rate += (int)abs(qcoeff[0]);
else if (*eob > 1)
*rate += (int)vp9_satd((const int16_t *)qcoeff, step << 4);
*dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> shift;
}
block += step;
}
}
if (*skippable == 0) {
*rate <<= 10;
*rate += (eob_cost << 8);
}
}
#endif
static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE bsize, static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd, MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum, int *out_rate_sum, int64_t *out_dist_sum,
@@ -518,7 +889,9 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
int i, j; int i, j;
int rate; int rate;
int64_t dist; int64_t dist;
unsigned int var_y, sse_y; int64_t this_sse = INT64_MAX;
int is_skippable;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
assert(plane == 0); assert(plane == 0);
(void) plane; (void) plane;
@@ -533,8 +906,13 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
x->skip_encode ? src_stride : dst_stride, x->skip_encode ? src_stride : dst_stride,
pd->dst.buf, dst_stride, pd->dst.buf, dst_stride,
i, j, 0); i, j, 0);
// This procedure assumes zero offset from p->src.buf and pd->dst.buf.
model_rd_for_sb_y(cpi, bsize_tx, x, xd, &rate, &dist, &var_y, &sse_y); // TODO(jingning): This needs further refactoring.
block_yrd(cpi, x, &rate, &dist, &is_skippable, &this_sse, 0,
bsize_tx, MIN(tx_size, TX_16X16));
x->skip_txfm[0] = is_skippable;
rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), is_skippable);
p->src.buf = src_buf_base; p->src.buf = src_buf_base;
pd->dst.buf = dst_buf_base; pd->dst.buf = dst_buf_base;
args->rate += rate; args->rate += rate;
@@ -562,12 +940,11 @@ void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
MIN(max_txsize_lookup[bsize], MIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
MODE_INFO *const mic = xd->mi[0].src_mi; MODE_INFO *const mic = xd->mi[0].src_mi;
int *bmode_costs; int bmode_costs;
const MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi; const MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;
const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL; const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;
const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0); const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0); const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
bmode_costs = cpi->y_mode_costs[A][L];
(void) ctx; (void) ctx;
vp9_rd_cost_reset(&best_rdc); vp9_rd_cost_reset(&best_rdc);
@@ -585,11 +962,17 @@ void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
args.rate = 0; args.rate = 0;
args.dist = 0; args.dist = 0;
mbmi->tx_size = intra_tx_size; mbmi->tx_size = intra_tx_size;
if (A == L)
bmode_costs = (this_mode == A) ? 406 : 961;
else // (A != L)
bmode_costs = (this_mode == A) || (this_mode == L) ? 512 : 1024;
vp9_foreach_transformed_block_in_plane(xd, bsize, 0, vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
estimate_block_intra, &args); estimate_block_intra, &args);
this_rdc.rate = args.rate; this_rdc.rate = args.rate;
this_rdc.dist = args.dist; this_rdc.dist = args.dist;
this_rdc.rate += bmode_costs[this_mode]; this_rdc.rate += bmode_costs;
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
this_rdc.rate, this_rdc.dist); this_rdc.rate, this_rdc.dist);
@@ -602,10 +985,6 @@ void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
*rd_cost = best_rdc; *rd_cost = best_rdc;
} }
static const int ref_frame_cost[MAX_REF_FRAMES] = {
1235, 229, 530, 615,
};
typedef struct { typedef struct {
MV_REFERENCE_FRAME ref_frame; MV_REFERENCE_FRAME ref_frame;
PREDICTION_MODE pred_mode; PREDICTION_MODE pred_mode;
@@ -682,6 +1061,21 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int ref_frame_skip_mask = 0; int ref_frame_skip_mask = 0;
int idx; int idx;
int best_pred_sad = INT_MAX; int best_pred_sad = INT_MAX;
int best_early_term = 0;
int ref_frame_cost[MAX_REF_FRAMES];
vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
ref_frame_cost[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
ref_frame_cost[LAST_FRAME] = ref_frame_cost[GOLDEN_FRAME] =
ref_frame_cost[ALTREF_FRAME] = vp9_cost_bit(intra_inter_p, 1);
ref_frame_cost[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
if (reuse_inter_pred) { if (reuse_inter_pred) {
int i; int i;
@@ -773,6 +1167,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int mode_index; int mode_index;
int i; int i;
PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode; PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode;
int64_t this_sse;
int is_skippable;
int this_early_term = 0;
if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode)))
continue; continue;
@@ -850,6 +1248,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
best_pred_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, best_pred_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf,
x->plane[0].src.stride, x->plane[0].src.stride,
pre_buf, pre_stride); pre_buf, pre_stride);
x->pred_mv_sad[LAST_FRAME] = best_pred_sad;
} }
if (this_mode != NEARESTMV && if (this_mode != NEARESTMV &&
@@ -924,17 +1323,54 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
var_y = pf_var[best_filter]; var_y = pf_var[best_filter];
sse_y = pf_sse[best_filter]; sse_y = pf_sse[best_filter];
x->skip_txfm[0] = skip_txfm; x->skip_txfm[0] = skip_txfm;
if (reuse_inter_pred) {
pd->dst.buf = this_mode_pred->data;
pd->dst.stride = this_mode_pred->stride;
}
} else { } else {
mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref; mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref;
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
// For large partition blocks, extra testing is done.
if (bsize > BLOCK_32X32 && xd->mi[0].src_mi->mbmi.segment_id != 1 &&
cm->base_qindex) {
model_rd_for_sb_y_large(cpi, bsize, x, xd, &this_rdc.rate,
&this_rdc.dist, &var_y, &sse_y, mi_row, mi_col,
&this_early_term);
} else {
model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
&var_y, &sse_y); &var_y, &sse_y);
this_rdc.rate += }
cm->interp_filter == SWITCHABLE ? }
vp9_get_switchable_rate(cpi, xd) : 0;
if (!this_early_term) {
this_sse = (int64_t)sse_y;
block_yrd(cpi, x, &this_rdc.rate, &this_rdc.dist, &is_skippable,
&this_sse, 0, bsize, MIN(mbmi->tx_size, TX_16X16));
x->skip_txfm[0] = is_skippable;
if (is_skippable) {
this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
} else {
if (RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist) <
RDCOST(x->rdmult, x->rddiv, 0, this_sse)) {
this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
} else {
this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
this_rdc.dist = this_sse;
x->skip_txfm[0] = 1;
}
}
if (cm->interp_filter == SWITCHABLE) {
if ((mbmi->mv[0].as_mv.row | mbmi->mv[0].as_mv.col) & 0x07)
this_rdc.rate += vp9_get_switchable_rate(cpi, xd);
}
} else {
this_rdc.rate += cm->interp_filter == SWITCHABLE ?
vp9_get_switchable_rate(cpi, xd) : 0;
this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
} }
// chroma component rate-distortion cost modeling
if (x->color_sensitivity[0] || x->color_sensitivity[1]) { if (x->color_sensitivity[0] || x->color_sensitivity[1]) {
int uv_rate = 0; int uv_rate = 0;
int64_t uv_dist = 0; int64_t uv_dist = 0;
@@ -942,7 +1378,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1); vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1);
if (x->color_sensitivity[1]) if (x->color_sensitivity[1])
vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2); vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2);
model_rd_for_sb_uv(cpi, bsize, x, xd, &uv_rate, &uv_dist, &var_y, &sse_y); model_rd_for_sb_uv(cpi, bsize, x, xd, &uv_rate, &uv_dist,
&var_y, &sse_y);
this_rdc.rate += uv_rate; this_rdc.rate += uv_rate;
this_rdc.dist += uv_dist; this_rdc.dist += uv_dist;
} }
@@ -981,6 +1418,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
best_tx_size = mbmi->tx_size; best_tx_size = mbmi->tx_size;
best_ref_frame = ref_frame; best_ref_frame = ref_frame;
best_mode_skip_txfm = x->skip_txfm[0]; best_mode_skip_txfm = x->skip_txfm[0];
best_early_term = this_early_term;
if (reuse_inter_pred) { if (reuse_inter_pred) {
free_pred_buffer(best_pred); free_pred_buffer(best_pred);
@@ -993,6 +1431,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (x->skip) if (x->skip)
break; break;
// If early termination flag is 1 and at least 2 modes are checked,
// the mode search is terminated.
if (best_early_term && idx > 0) {
x->skip = 1;
break;
}
} }
mbmi->mode = best_mode; mbmi->mode = best_mode;
@@ -1041,6 +1486,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
const PREDICTION_MODE this_mode = intra_mode_list[i]; const PREDICTION_MODE this_mode = intra_mode_list[i];
if (!((1 << this_mode) & cpi->sf.intra_y_mode_mask[intra_tx_size])) if (!((1 << this_mode) & cpi->sf.intra_y_mode_mask[intra_tx_size]))
continue; continue;
mbmi->mode = this_mode;
mbmi->ref_frame[0] = INTRA_FRAME;
args.mode = this_mode; args.mode = this_mode;
args.rate = 0; args.rate = 0;
args.dist = 0; args.dist = 0;
@@ -1057,17 +1504,17 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (this_rdc.rdcost < best_rdc.rdcost) { if (this_rdc.rdcost < best_rdc.rdcost) {
best_rdc = this_rdc; best_rdc = this_rdc;
mbmi->mode = this_mode; best_mode = this_mode;
best_intra_tx_size = mbmi->tx_size; best_intra_tx_size = mbmi->tx_size;
mbmi->ref_frame[0] = INTRA_FRAME; best_ref_frame = INTRA_FRAME;
mbmi->uv_mode = this_mode; mbmi->uv_mode = this_mode;
mbmi->mv[0].as_int = INVALID_MV; mbmi->mv[0].as_int = INVALID_MV;
best_mode_skip_txfm = x->skip_txfm[0];
} }
} }
// Reset mb_mode_info to the best inter mode. // Reset mb_mode_info to the best inter mode.
if (mbmi->ref_frame[0] != INTRA_FRAME) { if (best_ref_frame != INTRA_FRAME) {
x->skip_txfm[0] = best_mode_skip_txfm;
mbmi->tx_size = best_tx_size; mbmi->tx_size = best_tx_size;
} else { } else {
mbmi->tx_size = best_intra_tx_size; mbmi->tx_size = best_intra_tx_size;
@@ -1075,6 +1522,15 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
} }
pd->dst = orig_dst; pd->dst = orig_dst;
mbmi->mode = best_mode;
mbmi->ref_frame[0] = best_ref_frame;
x->skip_txfm[0] = best_mode_skip_txfm;
{
int i;
for (i = 0; i < 64; ++i)
mbmi->inter_tx_size[i] = mbmi->tx_size;
}
if (reuse_inter_pred && best_pred != NULL) { if (reuse_inter_pred && best_pred != NULL) {
if (best_pred->data != orig_dst.buf && is_inter_mode(mbmi->mode)) { if (best_pred->data != orig_dst.buf && is_inter_mode(mbmi->mode)) {

View File

@@ -66,12 +66,7 @@ static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
static void fill_mode_costs(VP9_COMP *cpi) { static void fill_mode_costs(VP9_COMP *cpi) {
const FRAME_CONTEXT *const fc = cpi->common.fc; const FRAME_CONTEXT *const fc = cpi->common.fc;
int i, j; int i;
for (i = 0; i < INTRA_MODES; ++i)
for (j = 0; j < INTRA_MODES; ++j)
vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
vp9_intra_mode_tree);
vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree); vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME], vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
@@ -457,6 +452,7 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
int best_sad = INT_MAX; int best_sad = INT_MAX;
int this_sad = INT_MAX; int this_sad = INT_MAX;
int max_mv = 0; int max_mv = 0;
int near_same_nearest;
uint8_t *src_y_ptr = x->plane[0].src.buf; uint8_t *src_y_ptr = x->plane[0].src.buf;
uint8_t *ref_y_ptr; uint8_t *ref_y_ptr;
const int num_mv_refs = MAX_MV_REF_CANDIDATES + const int num_mv_refs = MAX_MV_REF_CANDIDATES +
@@ -469,23 +465,27 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
pred_mv[2] = x->pred_mv[ref_frame]; pred_mv[2] = x->pred_mv[ref_frame];
assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0]))); assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
near_same_nearest =
mbmi->ref_mvs[ref_frame][0].as_int == mbmi->ref_mvs[ref_frame][1].as_int;
// Get the sad for each candidate reference mv. // Get the sad for each candidate reference mv.
for (i = 0; i < num_mv_refs; ++i) { for (i = 0; i < num_mv_refs; ++i) {
const MV *this_mv = &pred_mv[i]; const MV *this_mv = &pred_mv[i];
int fp_row, fp_col;
max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3); if (i == 1 && near_same_nearest)
if (is_zero_mv(this_mv) && zero_seen)
continue; continue;
fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
zero_seen |= is_zero_mv(this_mv); if (fp_row ==0 && fp_col == 0 && zero_seen)
continue;
ref_y_ptr = zero_seen |= (fp_row ==0 && fp_col == 0);
&ref_y_buffer[ref_y_stride * (this_mv->row >> 3) + (this_mv->col >> 3)];
ref_y_ptr =&ref_y_buffer[ref_y_stride * fp_row + fp_col];
// Find sad for current vector. // Find sad for current vector.
this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
ref_y_ptr, ref_y_stride); ref_y_ptr, ref_y_stride);
// Note if it is the best so far. // Note if it is the best so far.
if (this_sad < best_sad) { if (this_sad < best_sad) {
best_sad = this_sad; best_sad = this_sad;

File diff suppressed because it is too large Load Diff

View File

@@ -29,6 +29,15 @@ void vp9_rd_pick_intra_mode_sb(struct VP9_COMP *cpi, struct macroblock *x,
struct RD_COST *rd_cost, BLOCK_SIZE bsize, struct RD_COST *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx, int64_t best_rd); PICK_MODE_CONTEXT *ctx, int64_t best_rd);
unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi,
const struct buf_2d *ref,
BLOCK_SIZE bs);
#if CONFIG_VP9_HIGHBITDEPTH
unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi,
const struct buf_2d *ref,
BLOCK_SIZE bs, int bd);
#endif
void vp9_rd_pick_inter_mode_sb(struct VP9_COMP *cpi, void vp9_rd_pick_inter_mode_sb(struct VP9_COMP *cpi,
struct TileDataEnc *tile_data, struct TileDataEnc *tile_data,
struct macroblock *x, struct macroblock *x,

View File

@@ -301,7 +301,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
(frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1); (frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1);
sf->max_delta_qindex = is_keyframe ? 20 : 15; sf->max_delta_qindex = is_keyframe ? 20 : 15;
sf->partition_search_type = REFERENCE_PARTITION; sf->partition_search_type = REFERENCE_PARTITION;
sf->use_nonrd_pick_mode = !is_keyframe; sf->use_nonrd_pick_mode = 1;
sf->allow_skip_recode = 0; sf->allow_skip_recode = 0;
sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;

View File

@@ -580,7 +580,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
mb_y_offset += 16 * (f->y_stride - mb_cols); mb_y_offset += 16 * (f->y_stride - mb_cols);
mb_uv_offset += mb_uv_height * f->uv_stride - mb_uv_width * mb_cols; mb_uv_offset += mb_uv_height * f->uv_stride - mb_uv_width * mb_cols;
} }
// Restore input state // Restore input state
for (i = 0; i < MAX_MB_PLANE; i++) for (i = 0; i < MAX_MB_PLANE; i++)
mbd->plane[i].pre[0].buf = input_buffer[i]; mbd->plane[i].pre[0].buf = input_buffer[i];

View File

@@ -65,18 +65,6 @@ const vp9_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
-CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 10 = CAT_FIVE -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 10 = CAT_FIVE
}; };
// Unconstrained Node Tree
const vp9_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
2, 6, // 0 = LOW_VAL
-TWO_TOKEN, 4, // 1 = TWO
-THREE_TOKEN, -FOUR_TOKEN, // 2 = THREE
8, 10, // 3 = HIGH_LOW
-CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 4 = CAT_ONE
12, 14, // 5 = CAT_THREEFOUR
-CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 6 = CAT_THREE
-CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 7 = CAT_FIVE
};
static const vp9_tree_index cat1[2] = {0, 0}; static const vp9_tree_index cat1[2] = {0, 0};
static const vp9_tree_index cat2[4] = {2, 2, 0, 0}; static const vp9_tree_index cat2[4] = {2, 2, 0, 0};
static const vp9_tree_index cat3[6] = {2, 2, 4, 4, 0, 0}; static const vp9_tree_index cat3[6] = {2, 2, 4, 4, 0, 0};
@@ -454,6 +442,20 @@ struct tokenize_b_args {
TOKENEXTRA **tp; TOKENEXTRA **tp;
}; };
static void set_entropy_context_b_inter(int plane, int block,
BLOCK_SIZE plane_bsize,
int blk_row, int blk_col,
TX_SIZE tx_size, void *arg) {
struct tokenize_b_args* const args = arg;
ThreadData *const td = args->td;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *p = &x->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
vp9_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0,
blk_col, blk_row);
}
static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) { TX_SIZE tx_size, void *arg) {
struct tokenize_b_args* const args = arg; struct tokenize_b_args* const args = arg;
@@ -498,6 +500,85 @@ static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id,
return vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max; return vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
} }
static void tokenize_b_inter(int plane, int block, BLOCK_SIZE plane_bsize,
int blk_row, int blk_col,
TX_SIZE tx_size, void *arg) {
struct tokenize_b_args* const args = arg;
VP9_COMP *cpi = args->cpi;
ThreadData *const td = args->td;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
TOKENEXTRA **tp = args->tp;
uint8_t token_cache[32 * 32];
struct macroblock_plane *p = &x->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
int pt; /* near block/prev token context index */
int c;
TOKENEXTRA *t = *tp; /* store tokens starting here */
int eob = p->eobs[block];
const PLANE_TYPE type = pd->plane_type;
const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
const int segment_id = mbmi->segment_id;
const int16_t *scan, *nb;
const scan_order *so;
const int ref = is_inter_block(mbmi);
unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
td->rd_counts.coef_counts[tx_size][type][ref];
vp9_prob (*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
cpi->common.fc->coef_probs[tx_size][type][ref];
unsigned int (*const eob_branch)[COEFF_CONTEXTS] =
td->counts->eob_branch[tx_size][type][ref];
const uint8_t *const band = get_band_translate(tx_size);
const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
int16_t token;
EXTRABIT extra;
pt = get_entropy_context(tx_size, pd->above_context + blk_col,
pd->left_context + blk_row);
so = get_scan(xd, tx_size, type, block);
scan = so->scan;
nb = so->neighbors;
c = 0;
while (c < eob) {
int v = 0;
int skip_eob = 0;
v = qcoeff[scan[c]];
while (!v) {
add_token_no_extra(&t, coef_probs[band[c]][pt], ZERO_TOKEN, skip_eob,
counts[band[c]][pt]);
eob_branch[band[c]][pt] += !skip_eob;
skip_eob = 1;
token_cache[scan[c]] = 0;
++c;
pt = get_coef_context(nb, token_cache, c);
v = qcoeff[scan[c]];
}
vp9_get_token_extra(v, &token, &extra);
add_token(&t, coef_probs[band[c]][pt], extra, (uint8_t)token,
(uint8_t)skip_eob, counts[band[c]][pt]);
eob_branch[band[c]][pt] += !skip_eob;
token_cache[scan[c]] = vp9_pt_energy_class[token];
++c;
pt = get_coef_context(nb, token_cache, c);
}
if (c < seg_eob) {
add_token_no_extra(&t, coef_probs[band[c]][pt], EOB_TOKEN, 0,
counts[band[c]][pt]);
++eob_branch[band[c]][pt];
}
*tp = t;
vp9_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, blk_col, blk_row);
}
static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) { TX_SIZE tx_size, void *arg) {
struct tokenize_b_args* const args = arg; struct tokenize_b_args* const args = arg;
@@ -619,6 +700,105 @@ int vp9_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
return result; return result;
} }
void tokenize_tx(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
int dry_run, TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
int blk_row, int blk_col, int block, int plane,
void *arg) {
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
const struct macroblockd_plane *const pd = &xd->plane[plane];
int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
(blk_col >> (1 - pd->subsampling_x));
TX_SIZE plane_tx_size = plane ?
get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], plane_bsize, 0, 0) :
mbmi->inter_tx_size[tx_idx];
int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
if (xd->mb_to_bottom_edge < 0)
max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
if (xd->mb_to_right_edge < 0)
max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
return;
if (tx_size == plane_tx_size) {
if (!dry_run)
tokenize_b_inter(plane, block, plane_bsize,
blk_row, blk_col, tx_size, arg);
else
set_entropy_context_b_inter(plane, block, plane_bsize,
blk_row, blk_col, tx_size, arg);
} else {
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bh = num_4x4_blocks_wide_lookup[bsize];
int i;
assert(num_4x4_blocks_high_lookup[bsize] ==
num_4x4_blocks_wide_lookup[bsize]);
for (i = 0; i < 4; ++i) {
int offsetr = (i >> 1) * bh / 2;
int offsetc = (i & 0x01) * bh / 2;
int step = 1 << (2 * (tx_size - 1));
tokenize_tx(cpi, td, t, dry_run, tx_size - 1, plane_bsize,
blk_row + offsetr, blk_col + offsetc,
block + i * step, plane, arg);
}
}
}
void vp9_tokenize_sb_inter(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
int dry_run, BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
TOKENEXTRA *t_backup = *t;
const int ctx = vp9_get_skip_context(xd);
const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id,
SEG_LVL_SKIP);
struct tokenize_b_args arg = {cpi, td, t};
int plane;
if (mbmi->skip) {
if (!dry_run)
td->counts->skip[ctx][1] += skip_inc;
reset_skip_context(xd, bsize);
if (dry_run)
*t = t_backup;
return;
}
if (!dry_run)
td->counts->skip[ctx][0] += skip_inc;
else
*t = t_backup;
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
int bh = num_4x4_blocks_wide_lookup[txb_size];
int idx, idy;
int block = 0;
int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
for (idy = 0; idy < mi_height; idy += bh) {
for (idx = 0; idx < mi_width; idx += bh) {
tokenize_tx(cpi, td, t, dry_run, max_txsize_lookup[plane_bsize],
plane_bsize, idy, idx, block, plane, &arg);
block += step;
}
}
}
}
void vp9_tokenize_sb(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, void vp9_tokenize_sb(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
int dry_run, BLOCK_SIZE bsize) { int dry_run, BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &cpi->common; VP9_COMMON *const cm = &cpi->common;

View File

@@ -51,6 +51,9 @@ int vp9_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
struct VP9_COMP; struct VP9_COMP;
struct ThreadData; struct ThreadData;
void vp9_tokenize_sb_inter(struct VP9_COMP *cpi, struct ThreadData *td,
TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize);
void vp9_tokenize_sb(struct VP9_COMP *cpi, struct ThreadData *td, void vp9_tokenize_sb(struct VP9_COMP *cpi, struct ThreadData *td,
TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize); TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize);

View File

@@ -57,6 +57,179 @@ unsigned int vp9_avg_4x4_sse2(const uint8_t *s, int p) {
return (avg + 8) >> 4; return (avg + 8) >> 4;
} }
static void hadamard_col8_sse2(__m128i *in, int iter) {
__m128i a0 = in[0];
__m128i a1 = in[1];
__m128i a2 = in[2];
__m128i a3 = in[3];
__m128i a4 = in[4];
__m128i a5 = in[5];
__m128i a6 = in[6];
__m128i a7 = in[7];
__m128i b0 = _mm_add_epi16(a0, a1);
__m128i b1 = _mm_sub_epi16(a0, a1);
__m128i b2 = _mm_add_epi16(a2, a3);
__m128i b3 = _mm_sub_epi16(a2, a3);
__m128i b4 = _mm_add_epi16(a4, a5);
__m128i b5 = _mm_sub_epi16(a4, a5);
__m128i b6 = _mm_add_epi16(a6, a7);
__m128i b7 = _mm_sub_epi16(a6, a7);
a0 = _mm_add_epi16(b0, b2);
a1 = _mm_add_epi16(b1, b3);
a2 = _mm_sub_epi16(b0, b2);
a3 = _mm_sub_epi16(b1, b3);
a4 = _mm_add_epi16(b4, b6);
a5 = _mm_add_epi16(b5, b7);
a6 = _mm_sub_epi16(b4, b6);
a7 = _mm_sub_epi16(b5, b7);
if (iter == 0) {
b0 = _mm_add_epi16(a0, a4);
b7 = _mm_add_epi16(a1, a5);
b3 = _mm_add_epi16(a2, a6);
b4 = _mm_add_epi16(a3, a7);
b2 = _mm_sub_epi16(a0, a4);
b6 = _mm_sub_epi16(a1, a5);
b1 = _mm_sub_epi16(a2, a6);
b5 = _mm_sub_epi16(a3, a7);
a0 = _mm_unpacklo_epi16(b0, b1);
a1 = _mm_unpacklo_epi16(b2, b3);
a2 = _mm_unpackhi_epi16(b0, b1);
a3 = _mm_unpackhi_epi16(b2, b3);
a4 = _mm_unpacklo_epi16(b4, b5);
a5 = _mm_unpacklo_epi16(b6, b7);
a6 = _mm_unpackhi_epi16(b4, b5);
a7 = _mm_unpackhi_epi16(b6, b7);
b0 = _mm_unpacklo_epi32(a0, a1);
b1 = _mm_unpacklo_epi32(a4, a5);
b2 = _mm_unpackhi_epi32(a0, a1);
b3 = _mm_unpackhi_epi32(a4, a5);
b4 = _mm_unpacklo_epi32(a2, a3);
b5 = _mm_unpacklo_epi32(a6, a7);
b6 = _mm_unpackhi_epi32(a2, a3);
b7 = _mm_unpackhi_epi32(a6, a7);
in[0] = _mm_unpacklo_epi64(b0, b1);
in[1] = _mm_unpackhi_epi64(b0, b1);
in[2] = _mm_unpacklo_epi64(b2, b3);
in[3] = _mm_unpackhi_epi64(b2, b3);
in[4] = _mm_unpacklo_epi64(b4, b5);
in[5] = _mm_unpackhi_epi64(b4, b5);
in[6] = _mm_unpacklo_epi64(b6, b7);
in[7] = _mm_unpackhi_epi64(b6, b7);
} else {
in[0] = _mm_add_epi16(a0, a4);
in[7] = _mm_add_epi16(a1, a5);
in[3] = _mm_add_epi16(a2, a6);
in[4] = _mm_add_epi16(a3, a7);
in[2] = _mm_sub_epi16(a0, a4);
in[6] = _mm_sub_epi16(a1, a5);
in[1] = _mm_sub_epi16(a2, a6);
in[5] = _mm_sub_epi16(a3, a7);
}
}
void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride,
int16_t *coeff) {
__m128i src[8];
src[0] = _mm_load_si128((const __m128i *)src_diff);
src[1] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
src[2] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
src[3] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
src[4] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
src[5] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
src[6] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
src[7] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
hadamard_col8_sse2(src, 0);
hadamard_col8_sse2(src, 1);
_mm_store_si128((__m128i *)coeff, src[0]);
coeff += 8;
_mm_store_si128((__m128i *)coeff, src[1]);
coeff += 8;
_mm_store_si128((__m128i *)coeff, src[2]);
coeff += 8;
_mm_store_si128((__m128i *)coeff, src[3]);
coeff += 8;
_mm_store_si128((__m128i *)coeff, src[4]);
coeff += 8;
_mm_store_si128((__m128i *)coeff, src[5]);
coeff += 8;
_mm_store_si128((__m128i *)coeff, src[6]);
coeff += 8;
_mm_store_si128((__m128i *)coeff, src[7]);
}
void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride,
int16_t *coeff) {
int idx;
for (idx = 0; idx < 4; ++idx) {
int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride
+ (idx & 0x01) * 8;
vp9_hadamard_8x8_sse2(src_ptr, src_stride, coeff + idx * 64);
}
for (idx = 0; idx < 64; idx += 8) {
__m128i coeff0 = _mm_load_si128((const __m128i *)coeff);
__m128i coeff1 = _mm_load_si128((const __m128i *)(coeff + 64));
__m128i coeff2 = _mm_load_si128((const __m128i *)(coeff + 128));
__m128i coeff3 = _mm_load_si128((const __m128i *)(coeff + 192));
__m128i b0 = _mm_add_epi16(coeff0, coeff1);
__m128i b1 = _mm_sub_epi16(coeff0, coeff1);
__m128i b2 = _mm_add_epi16(coeff2, coeff3);
__m128i b3 = _mm_sub_epi16(coeff2, coeff3);
coeff0 = _mm_add_epi16(b0, b2);
coeff1 = _mm_add_epi16(b1, b3);
coeff0 = _mm_srai_epi16(coeff0, 1);
coeff1 = _mm_srai_epi16(coeff1, 1);
_mm_store_si128((__m128i *)coeff, coeff0);
_mm_store_si128((__m128i *)(coeff + 64), coeff1);
coeff2 = _mm_sub_epi16(b0, b2);
coeff3 = _mm_sub_epi16(b1, b3);
coeff2 = _mm_srai_epi16(coeff2, 1);
coeff3 = _mm_srai_epi16(coeff3, 1);
_mm_store_si128((__m128i *)(coeff + 128), coeff2);
_mm_store_si128((__m128i *)(coeff + 192), coeff3);
coeff += 8;
}
}
int16_t vp9_satd_sse2(const int16_t *coeff, int length) {
int i;
__m128i sum = _mm_load_si128((const __m128i *)coeff);
__m128i sign = _mm_srai_epi16(sum, 15);
__m128i val = _mm_xor_si128(sum, sign);
sum = _mm_sub_epi16(val, sign);
coeff += 8;
for (i = 8; i < length; i += 8) {
__m128i src_line = _mm_load_si128((const __m128i *)coeff);
sign = _mm_srai_epi16(src_line, 15);
val = _mm_xor_si128(src_line, sign);
val = _mm_sub_epi16(val, sign);
sum = _mm_add_epi16(sum, val);
coeff += 8;
}
val = _mm_srli_si128(sum, 8);
sum = _mm_add_epi16(sum, val);
val = _mm_srli_epi64(sum, 32);
sum = _mm_add_epi16(sum, val);
val = _mm_srli_epi32(sum, 16);
sum = _mm_add_epi16(sum, val);
return _mm_extract_epi16(sum, 0);
}
void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref, void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
const int ref_stride, const int height) { const int ref_stride, const int height) {
int idx; int idx;

View File

@@ -293,7 +293,8 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
if (!skip_block) { if (!skip_block) {
__m128i eob; __m128i eob;
__m128i round, quant, dequant; __m128i round, quant, dequant, thr;
int16_t nzflag;
{ {
__m128i coeff0, coeff1; __m128i coeff0, coeff1;
@@ -368,6 +369,7 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
// AC only loop // AC only loop
index = 2; index = 2;
thr = _mm_srai_epi16(dequant, 1);
while (n_coeffs < 0) { while (n_coeffs < 0) {
__m128i coeff0, coeff1; __m128i coeff0, coeff1;
{ {
@@ -387,6 +389,10 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
_mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
if (nzflag) {
qcoeff0 = _mm_adds_epi16(qcoeff0, round); qcoeff0 = _mm_adds_epi16(qcoeff0, round);
qcoeff1 = _mm_adds_epi16(qcoeff1, round); qcoeff1 = _mm_adds_epi16(qcoeff1, round);
qtmp0 = _mm_mulhi_epi16(qcoeff0, quant); qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
@@ -406,9 +412,16 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0); _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1); _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
} else {
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
}
} }
{ if (nzflag) {
// Scan for eob // Scan for eob
__m128i zero_coeff0, zero_coeff1; __m128i zero_coeff0, zero_coeff1;
__m128i nzero_coeff0, nzero_coeff1; __m128i nzero_coeff0, nzero_coeff1;

View File

@@ -72,3 +72,49 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
movd edx, m5 movd edx, m5
%endif %endif
RET RET
; Compute the sum of squared difference between two int16_t vectors.
; int64_t vp9_block_error_fp(int16_t *coeff, int16_t *dqcoeff,
; intptr_t block_size)
INIT_XMM sse2
cglobal block_error_fp, 3, 3, 8, uqc, dqc, size
pxor m4, m4 ; sse accumulator
pxor m5, m5 ; dedicated zero register
lea uqcq, [uqcq+sizeq*2]
lea dqcq, [dqcq+sizeq*2]
neg sizeq
.loop:
mova m2, [uqcq+sizeq*2]
mova m0, [dqcq+sizeq*2]
mova m3, [uqcq+sizeq*2+mmsize]
mova m1, [dqcq+sizeq*2+mmsize]
psubw m0, m2
psubw m1, m3
; individual errors are max. 15bit+sign, so squares are 30bit, and
; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
pmaddwd m0, m0
pmaddwd m1, m1
; accumulate in 64bit
punpckldq m7, m0, m5
punpckhdq m0, m5
paddq m4, m7
punpckldq m7, m1, m5
paddq m4, m0
punpckhdq m1, m5
paddq m4, m7
paddq m4, m1
add sizeq, mmsize
jl .loop
; accumulate horizontally and store in return value
movhlps m5, m4
paddq m4, m5
%if ARCH_X86_64
movq rax, m4
%else
pshufd m5, m4, 0x1
movd eax, m4
movd edx, m5
%endif
RET

View File

@@ -230,6 +230,8 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
const int16_t* scan_ptr, const int16_t* scan_ptr,
const int16_t* iscan_ptr) { const int16_t* iscan_ptr) {
__m128i zero; __m128i zero;
__m128i thr;
int16_t nzflag;
(void)scan_ptr; (void)scan_ptr;
(void)zbin_ptr; (void)zbin_ptr;
(void)quant_shift_ptr; (void)quant_shift_ptr;
@@ -316,6 +318,8 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
n_coeffs += 8 * 2; n_coeffs += 8 * 2;
} }
thr = _mm_srai_epi16(dequant, 1);
// AC only loop // AC only loop
while (n_coeffs < 0) { while (n_coeffs < 0) {
__m128i coeff0, coeff1; __m128i coeff0, coeff1;
@@ -335,6 +339,10 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
_mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
if (nzflag) {
qcoeff0 = _mm_adds_epi16(qcoeff0, round); qcoeff0 = _mm_adds_epi16(qcoeff0, round);
qcoeff1 = _mm_adds_epi16(qcoeff1, round); qcoeff1 = _mm_adds_epi16(qcoeff1, round);
qtmp0 = _mm_mulhi_epi16(qcoeff0, quant); qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
@@ -354,9 +362,16 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0); _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1); _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
} else {
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
}
} }
{ if (nzflag) {
// Scan for eob // Scan for eob
__m128i zero_coeff0, zero_coeff1; __m128i zero_coeff0, zero_coeff1;
__m128i nzero_coeff0, nzero_coeff1; __m128i nzero_coeff0, nzero_coeff1;

View File

@@ -282,6 +282,8 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
psignw m8, m9 psignw m8, m9
psignw m13, m10 psignw m13, m10
psrlw m0, m3, 2 psrlw m0, m3, 2
%else
psrlw m0, m3, 1
%endif %endif
mova [r4q+ncoeffq*2+ 0], m8 mova [r4q+ncoeffq*2+ 0], m8
mova [r4q+ncoeffq*2+16], m13 mova [r4q+ncoeffq*2+16], m13
@@ -302,7 +304,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
pabsw m6, m9 ; m6 = abs(m9) pabsw m6, m9 ; m6 = abs(m9)
pabsw m11, m10 ; m11 = abs(m10) pabsw m11, m10 ; m11 = abs(m10)
%ifidn %1, fp_32x32
pcmpgtw m7, m6, m0 pcmpgtw m7, m6, m0
pcmpgtw m12, m11, m0 pcmpgtw m12, m11, m0
pmovmskb r6d, m7 pmovmskb r6d, m7
@@ -310,7 +312,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
or r6, r2 or r6, r2
jz .skip_iter jz .skip_iter
%endif
pcmpeqw m7, m7 pcmpeqw m7, m7
paddsw m6, m1 ; m6 += round paddsw m6, m1 ; m6 += round
@@ -348,7 +350,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
add ncoeffq, mmsize add ncoeffq, mmsize
jl .ac_only_loop jl .ac_only_loop
%ifidn %1, fp_32x32
jmp .accumulate_eob jmp .accumulate_eob
.skip_iter: .skip_iter:
mova [r3q+ncoeffq*2+ 0], m5 mova [r3q+ncoeffq*2+ 0], m5
@@ -357,7 +358,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
mova [r4q+ncoeffq*2+16], m5 mova [r4q+ncoeffq*2+16], m5
add ncoeffq, mmsize add ncoeffq, mmsize
jl .ac_only_loop jl .ac_only_loop
%endif
.accumulate_eob: .accumulate_eob:
; horizontally accumulate/max eobs and write into [eob] memory pointer ; horizontally accumulate/max eobs and write into [eob] memory pointer

View File

@@ -1260,6 +1260,21 @@ static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx,
} }
} }
static vpx_codec_err_t ctrl_get_active_map(vpx_codec_alg_priv_t *ctx,
va_list args) {
vpx_active_map_t *const map = va_arg(args, vpx_active_map_t *);
if (map) {
if (!vp9_get_active_map(ctx->cpi, map->active_map,
(int)map->rows, (int)map->cols))
return VPX_CODEC_OK;
else
return VPX_CODEC_INVALID_PARAM;
} else {
return VPX_CODEC_INVALID_PARAM;
}
}
static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx,
va_list args) { va_list args) {
vpx_scaling_mode_t *const mode = va_arg(args, vpx_scaling_mode_t *); vpx_scaling_mode_t *const mode = va_arg(args, vpx_scaling_mode_t *);
@@ -1417,6 +1432,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) #if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
{VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id}, {VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id},
#endif #endif
{VP9E_GET_ACTIVEMAP, ctrl_get_active_map},
{ -1, NULL}, { -1, NULL},
}; };

View File

@@ -116,6 +116,9 @@ static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) {
(FrameWorkerData *)worker->data1; (FrameWorkerData *)worker->data1;
vp9_get_worker_interface()->end(worker); vp9_get_worker_interface()->end(worker);
vp9_remove_common(&frame_worker_data->pbi->common); vp9_remove_common(&frame_worker_data->pbi->common);
#if CONFIG_VP9_POSTPROC
vp9_free_postproc_buffers(&frame_worker_data->pbi->common);
#endif
vp9_decoder_remove(frame_worker_data->pbi); vp9_decoder_remove(frame_worker_data->pbi);
vpx_free(frame_worker_data->scratch_buffer); vpx_free(frame_worker_data->scratch_buffer);
#if CONFIG_MULTITHREAD #if CONFIG_MULTITHREAD
@@ -129,8 +132,10 @@ static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) {
#endif #endif
} }
if (ctx->buffer_pool) if (ctx->buffer_pool) {
vp9_free_ref_frame_buffers(ctx->buffer_pool);
vp9_free_internal_frame_buffers(&ctx->buffer_pool->int_frame_buffers); vp9_free_internal_frame_buffers(&ctx->buffer_pool->int_frame_buffers);
}
vpx_free(ctx->frame_workers); vpx_free(ctx->frame_workers);
vpx_free(ctx->buffer_pool); vpx_free(ctx->buffer_pool);
@@ -750,6 +755,8 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx,
(FrameWorkerData *)worker->data1; (FrameWorkerData *)worker->data1;
ctx->next_output_worker_id = ctx->next_output_worker_id =
(ctx->next_output_worker_id + 1) % ctx->num_frame_workers; (ctx->next_output_worker_id + 1) % ctx->num_frame_workers;
if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
set_ppflags(ctx, &flags);
// Wait for the frame from worker thread. // Wait for the frame from worker thread.
if (winterface->sync(worker)) { if (winterface->sync(worker)) {
// Check if worker has received any frames. // Check if worker has received any frames.

View File

@@ -425,10 +425,18 @@ struct vpx_internal_error_info {
jmp_buf jmp; jmp_buf jmp;
}; };
#define CLANG_ANALYZER_NORETURN
#if defined(__has_feature)
#if __has_feature(attribute_analyzer_noreturn)
#undef CLANG_ANALYZER_NORETURN
#define CLANG_ANALYZER_NORETURN __attribute__((analyzer_noreturn))
#endif
#endif
void vpx_internal_error(struct vpx_internal_error_info *info, void vpx_internal_error(struct vpx_internal_error_info *info,
vpx_codec_err_t error, vpx_codec_err_t error,
const char *fmt, const char *fmt,
...); ...) CLANG_ANALYZER_NORETURN;
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"

View File

@@ -16,7 +16,7 @@
* @{ * @{
*/ */
#include "./vp8.h" #include "./vp8.h"
#include "vpx/vpx_encoder.h" #include "./vpx_encoder.h"
/*!\file /*!\file
* \brief Provides definitions for using VP8 or VP9 encoder algorithm within the * \brief Provides definitions for using VP8 or VP9 encoder algorithm within the
@@ -508,6 +508,12 @@ enum vp8e_enc_control_id {
* Supported in codecs: VP9 * Supported in codecs: VP9
*/ */
VP9E_SET_COLOR_SPACE, VP9E_SET_COLOR_SPACE,
/*!\brief Codec control function to get an Active map back from the encoder.
*
* Supported in codecs: VP9
*/
VP9E_GET_ACTIVEMAP,
}; };
/*!\brief vpx 1-D scaling mode /*!\brief vpx 1-D scaling mode
@@ -691,6 +697,8 @@ VPX_CTRL_USE_TYPE(VP9E_SET_NOISE_SENSITIVITY, unsigned int)
VPX_CTRL_USE_TYPE(VP9E_SET_TUNE_CONTENT, int) /* vp9e_tune_content */ VPX_CTRL_USE_TYPE(VP9E_SET_TUNE_CONTENT, int) /* vp9e_tune_content */
VPX_CTRL_USE_TYPE(VP9E_SET_COLOR_SPACE, int) VPX_CTRL_USE_TYPE(VP9E_SET_COLOR_SPACE, int)
VPX_CTRL_USE_TYPE(VP9E_GET_ACTIVEMAP, vpx_active_map_t *)
/*! @} - end defgroup vp8_encoder */ /*! @} - end defgroup vp8_encoder */
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"

View File

@@ -59,7 +59,7 @@ extern "C" {
* types, removing or reassigning enums, adding/removing/rearranging * types, removing or reassigning enums, adding/removing/rearranging
* fields to structures * fields to structures
*/ */
#define VPX_ENCODER_ABI_VERSION (4 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ #define VPX_ENCODER_ABI_VERSION (5 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
/*! \brief Encoder capabilities bitfield /*! \brief Encoder capabilities bitfield

View File

@@ -1080,9 +1080,6 @@ int main_loop(int argc, const char **argv_) {
} }
} }
} }
if (stop_after && frame_in >= stop_after)
break;
} }
if (summary || progress) { if (summary || progress) {

View File

@@ -63,6 +63,7 @@ int file_is_webm(struct WebmInputContext *webm_ctx,
struct VpxInputContext *vpx_ctx) { struct VpxInputContext *vpx_ctx) {
mkvparser::MkvReader *const reader = new mkvparser::MkvReader(vpx_ctx->file); mkvparser::MkvReader *const reader = new mkvparser::MkvReader(vpx_ctx->file);
webm_ctx->reader = reader; webm_ctx->reader = reader;
webm_ctx->reached_eos = 0;
mkvparser::EBMLHeader header; mkvparser::EBMLHeader header;
long long pos = 0; long long pos = 0;
@@ -121,6 +122,11 @@ int webm_read_frame(struct WebmInputContext *webm_ctx,
uint8_t **buffer, uint8_t **buffer,
size_t *bytes_in_buffer, size_t *bytes_in_buffer,
size_t *buffer_size) { size_t *buffer_size) {
// This check is needed for frame parallel decoding, in which case this
// function could be called even after it has reached end of input stream.
if (webm_ctx->reached_eos) {
return 1;
}
mkvparser::Segment *const segment = mkvparser::Segment *const segment =
reinterpret_cast<mkvparser::Segment*>(webm_ctx->segment); reinterpret_cast<mkvparser::Segment*>(webm_ctx->segment);
const mkvparser::Cluster* cluster = const mkvparser::Cluster* cluster =
@@ -140,6 +146,7 @@ int webm_read_frame(struct WebmInputContext *webm_ctx,
cluster = segment->GetNext(cluster); cluster = segment->GetNext(cluster);
if (cluster == NULL || cluster->EOS()) { if (cluster == NULL || cluster->EOS()) {
*bytes_in_buffer = 0; *bytes_in_buffer = 0;
webm_ctx->reached_eos = 1;
return 1; return 1;
} }
status = cluster->GetFirst(block_entry); status = cluster->GetFirst(block_entry);

View File

@@ -29,6 +29,7 @@ struct WebmInputContext {
int video_track_index; int video_track_index;
uint64_t timestamp_ns; uint64_t timestamp_ns;
int is_key_frame; int is_key_frame;
int reached_eos;
}; };
// Checks if the input is a WebM file. If so, initializes WebMInputContext so // Checks if the input is a WebM file. If so, initializes WebMInputContext so