Compare commits

..

393 Commits

Author SHA1 Message Date
Scott LaVarnway
437d033dbb Merge "Remove tile param" 2015-06-29 18:04:56 +00:00
Parag Salasakar
f3a1295cff Merge "mips msa vp9 temporal filter optimization" 2015-06-27 01:29:04 +00:00
Tom Finegan
8fdfeb3f40 Merge "vpxenc.sh: Add basic multithreaded frame parallel encode test." 2015-06-26 16:12:34 +00:00
Parag Salasakar
b92cc27b76 mips msa vp9 temporal filter optimization
average improvement ~4x-5x

Change-Id: Iad9c0a296dbc2ea96d000bd009077999ed58a3c5
2015-06-26 12:00:24 +05:30
Parag Salasakar
c040f96e4b mips msa vp9 subtract block optimization
average improvement ~3x-4x

Change-Id: Idbe4d13a00d05ff8be6559b116f416e42c3b4097
2015-06-26 09:23:56 +05:30
Parag Salasakar
d017f5ba38 Merge "mips msa vp9 block error optimization" 2015-06-26 03:42:31 +00:00
Parag Salasakar
1543f2b60e mips msa vp9 block error optimization
average improvement ~3x-4x

Change-Id: If0fdcc34b17437a7e3e7fb4caaf1067bc175f291
2015-06-26 09:04:00 +05:30
Tom Finegan
92f7d79030 vpxenc.sh: Add basic multithreaded frame parallel encode test.
Change-Id: Id526783fa2e3e9bb31229931b6548ac7a9b2b7e6
2015-06-25 13:52:04 -07:00
Marco
1c7b1f9aec Update to dynamic resize logic for 1pass CBR.
Only do the check for resizing if the feature is selected
(i.e., resize_mode = RESIZE_DYNAMIC).

And modify condition for checking to be resize_count >= window,
(since framerate can change).

Change-Id: Idceb4e50956bb965a1492b4993b0dcb393c9be4d
2015-06-25 12:28:43 -07:00
James Zern
3393243d5e Merge "vp8_subpixelvariance_neon: right size coeff table" 2015-06-25 02:14:18 +00:00
Marco
3dd9cde2a5 Fix to unstable build from commit 517a66.
Change-Id: I123db2d20ae65a10e2dec95eec61150e2f69546d
2015-06-24 17:28:57 -07:00
James Zern
d219f2b9d2 Merge "vp9_reconintra_neon: add d45 16x16" 2015-06-24 21:23:15 +00:00
Tom Finegan
0bd61519c0 Merge "vpxenc.sh: Add basic vp9 multithread encode test." 2015-06-24 16:36:09 +00:00
James Zern
4bd87a9b9e vp8_subpixelvariance_neon: right size coeff table
only uint8 is required; each use only loads one value as a uint8
quiets a few type conversion warnings

Change-Id: I03dc0dc0eb01ac23a6e8673daa2b77c6c57bf1b0
2015-06-23 23:48:12 -07:00
James Zern
9f0383c66f Merge "build: add *test-no-data-check targets" 2015-06-24 06:20:38 +00:00
Tom Finegan
8281a19465 vpxenc.sh: Add basic vp9 multithread encode test.
- Change default real time speed to -6.
- Add vpxenc_vp9_webm_rt_multithread, which encodes
  niklas_1280_720_30.y4m with 2 to 4 threads using 2 to 4
  tile columns.

Change-Id: I4d86c3360aec67ae5d1ba82eb6e0f0be8068b5af
2015-06-23 18:30:52 -07:00
Marco
4774d38970 Merge "aq-mode=3: Reduce boost for segment#2 at low bitrates/low res." 2015-06-23 23:55:16 +00:00
Marco
517a662005 aq-mode=3: Reduce boost for segment#2 at low bitrates/low res.
Reduce boost for segment#2 for low bitrates and low-res.

This change is to reduce the rate overshoot at low bitrates.
No change in behavior, except at the very low bitrates.

Change-Id: I0dbd9d3b6356da5804de94adf10fca6a7a8f8948
2015-06-23 16:50:43 -07:00
Tom Finegan
46df71396d Merge "Fix building with iOS 9 beta SDK" 2015-06-23 22:57:05 +00:00
James Zern
9db1f24c47 vp9_reconintra_neon: add d45 16x16
~90% faster over 20M pixels

Change-Id: I92d80f66e91e0a870a672cfb5dd29bf1a17cb11a
2015-06-22 21:00:07 -07:00
Parag Salasakar
7555e2b822 mips msa vp9 avg optimization
average improvement ~2x-3x

Change-Id: I76f7fc00c0ffdf2b4ba41bf3819f3b6044bcdeff
2015-06-23 07:32:25 +05:30
Parag Salasakar
7b71cdb0b4 Merge "mips msa vp9 fdct 4x4 optimization" 2015-06-23 01:46:54 +00:00
Marco
fb2a89b1fb Fixes for key frame coding at speed 5.
Keep the same transform cutoff and partition selection
for speed 5 as in speeds >=6 (non-rd speed settings).

Existing setting for key frame at speed 5 allowed transform size
up to 32x32 on key frames, and did not allow for 4x4 block partition size.
This created more visual artifacts on first few frames.

avgPSNR/overallPSNR/SSIM gains of 0.2/0.7/0.8 for rtc_derf(low-res) set,
and 0/0.7/1.1 gains for rtc set.

Change-Id: I8c139ec6c9bb74e14b4ffbad5f12e94f18a59c0b
2015-06-22 16:57:35 -07:00
James Zern
c8b9658ecc Merge "vp9_reconintra_neon: add d45 8x8" 2015-06-22 22:27:57 +00:00
Brion Vibber
78637b6136 Fix building with iOS 9 beta SDK
configure.sh was setting some Mac OS X options for iOS targets, which
confuses the iOS 9 beta SDK in Xcode 7 when linking libraries.

Additionally, old armv6 media extensions were being enabled on iOS
when they're not needed (we always have Neon since iOS 6). These
broke on iOS 9 SDK which no longer assembles those instructions.

Change-Id: I4e4d2722392ead3382ce96289c03ef1e489799d6
2015-06-22 12:09:09 -07:00
Marco
8e029fcfa9 Merge "Reduce max_partition_size for low resolutions at speed 5." 2015-06-22 16:59:48 +00:00
Scott LaVarnway
86f4a3d8af Remove tile param
and added to MACROBLOCKD.

Change-Id: I0e60aaa9f84bcc9f2376d71bd934f251baee38db
2015-06-22 06:09:38 -07:00
Parag Salasakar
bc94999148 mips msa vp9 fdct 4x4 optimization
average improvement ~2x-3x

Change-Id: Idf8be780b8b4228fc91f110a94e4ee1fd9af0163
2015-06-22 14:30:24 +05:30
Frank Galligan
1395b56a1d Add assembly tests for int projections.
BUG=https://code.google.com/p/webm/issues/detail?id=1022

Change-Id: I5ae4acac39fd75c56d3feff0716cb52133de3b22
2015-06-20 12:05:59 -07:00
Parag Salasakar
b6131a733d Merge "mips msa vp9 fdct 8x8 optimization" 2015-06-20 02:58:10 +00:00
James Zern
12c6688e31 vp9_reconintra_neon: add d45 8x8
based on ssse3 implementation

~91% faster over 20M pixels

Change-Id: I6d743a53352c2d6de0efe7899d7996e8b0f7fa29
2015-06-19 19:19:22 -07:00
Parag Salasakar
7ca84888c2 mips msa vp9 fdct 8x8 optimization
average improvement ~4x-5x

Change-Id: I37582efc2622bc20b2bf99617a76110ab24e9f6a
2015-06-20 07:48:35 +05:30
James Zern
7b480ee90c Merge "Add dynamic range comment to vp9_int_pro_row" 2015-06-20 01:43:54 +00:00
Jingning Han
922af194bc Add dynamic range comment to vp9_int_pro_row
Change-Id: Icaa643568159c4e2db24eef42090b002ae02a45e
2015-06-19 17:39:33 -07:00
Jingning Han
c539ec022f Merge "Add dynamic range comment to vp9_int_pro_col" 2015-06-20 00:35:07 +00:00
James Zern
557ae511cb build: add *test-no-data-check targets
skips testdata verification; useful with slow media or if the data was
retrieved via a separate call to testdata

Change-Id: Ifd97892cee6c04b0111874cc8071675e90ec852b
2015-06-19 16:50:26 -07:00
Marco
debe4e920f Reduce max_partition_size for low resolutions at speed 5.
For speed 5 real-time mode, the selection of the partition size for
superblocks on the segment (aq-mode=3) uses the non-rd recursive
pick partition search, and can sometimes select 64x64.

For low resolutions, visually better to limit this to 32x32.

Change-Id: I69657a7ed8899f8b3cf8c9c318a2509c5c72c565
2015-06-19 16:48:16 -07:00
Alex Converse
90c9ede8e6 Limit cyclic refresh revisitng blocks at the same quantizer.
For screen content don't refresh a block at a quantizer higher than
it was last coded at. PReviosuly at realtime speeds the encoder had a
tendency to recode a block from GOLDEN with a higher Q than it was last
coded at.

Change-Id: Iacd561806c769dcce1a81b9827ffc70090f5ba18
2015-06-19 15:23:02 -07:00
Yaowu Xu
e5d4062962 Merge "Fix a msvc compiler warning" 2015-06-19 19:02:15 +00:00
Jingning Han
8e8bc5f28b Add dynamic range comment to vp9_int_pro_col
Change-Id: If14d9f874bd0bf2c5a455982088fd70591f5ea5a
2015-06-19 09:43:57 -07:00
Johann Koenig
001baa5dd8 Merge "Move vp8 variance files" 2015-06-19 16:27:25 +00:00
Yaowu Xu
d8428ae35d Fix a msvc compiler warning
Change-Id: Ida8a04370895ed14bd118324ec2577da926e4648
2015-06-19 09:04:29 -07:00
James Zern
714a46a63c Merge "vp9_filter: make all filter tables static" 2015-06-19 03:32:24 +00:00
James Zern
c5d779d266 Merge changes I2552d810,I51952c0a,Ib82e4247,I9c8d16cb
* changes:
  vp9_mcomp: make search_step_table static
  vp9_encodeframe: delete auto_partition_range()
  vp9_mcomp: don't mark setup_center_error() inline
  vp9_encoder: hide adjust_image_stat()
2015-06-19 03:31:38 +00:00
James Zern
a2c69af50e Merge "vp9_reconintra_neon: add d45 4x4" 2015-06-19 03:27:23 +00:00
James Zern
5d1d72df16 Merge changes from topic 'vp9-intra-pred'
* changes:
  vp9_reconintra_neon: add d135 4x4
  vp9_reconintra: correct d135 4x4 signature
2015-06-19 03:24:58 +00:00
Marco
4b45088fc9 Merge "Add dynamic resize logic for 1 pass CBR." 2015-06-19 00:56:06 +00:00
Marco
d77f51ba9e Add dynamic resize logic for 1 pass CBR.
Decision to scale down/up is based on buffer state and average QP
over previous time window. Limit the total amount of down-scaling
to be at most one scale down for now.

Reset certain quantities after resize (buffer level, cyclic refresh,
rate correction factor).

Feature is enable via the setting rc_resize_allowed = 1.

Change-Id: I9b1a53024e1e1e953fb8a1e1f75d21d160280dc7
2015-06-18 17:13:37 -07:00
Johann
907b33cdc4 Move vp8 variance files
There is a naming conflict in the chromium build system.

The rest of the variance functions will move to vpx_dsp soon.

Change-Id: Iff78da2aafb0d7380eda73e38d7dac72110a1e47
2015-06-18 16:42:28 -07:00
James Zern
ce88d74d34 vp9_reconintra_neon: add d45 4x4
based on webp's LD4()

~59% faster over 20M pixels

Change-Id: I371eaed9ce8f470451046997e130b0ba1a2f7a9c
2015-06-18 15:25:07 -07:00
James Zern
337b221e00 vp9_reconintra_neon: add d135 4x4
based on webp's RD4()

~50% faster over 20M pixels

Change-Id: Ifcb7bf7f7fc8eabf79d9e3b219ce1be67abc524a
2015-06-18 15:25:06 -07:00
James Zern
e8e3583fc7 vp9_reconintra: correct d135 4x4 signature
add missing '_c' suffix

Change-Id: I928d6cf8f90db0b8ca0b1f3bbf10b3d792062cec
2015-06-18 15:25:06 -07:00
James Zern
41d8545ab6 Merge "vp9_reconintra_neon: add DC 4x4 predictors" 2015-06-18 22:24:55 +00:00
James Zern
6e44bf20f7 vp9_reconintra_neon: add DC 4x4 predictors
~85-89% faster over 20M pixels

Change-Id: I3812e8adfffe5255034da88dfe6546e12f4d10ee
2015-06-18 15:22:43 -07:00
James Zern
e77f859d72 Merge "vp9_reconintra_neon: add DC 32x32 predictors" 2015-06-18 22:17:51 +00:00
Jingning Han
d1398e9f13 Merge "Add dynamic range comment to vp9_satd" 2015-06-18 19:36:53 +00:00
Jingning Han
4f1f510f16 Add dynamic range comment to vp9_satd
Change-Id: I75873846e6fdafbe7597a1bd0192115d2d1e9987
2015-06-18 09:18:22 -07:00
Parag Salasakar
b6ea0c4c57 Merge "mips msa vp9 fdct 32x32 optimization" 2015-06-18 04:30:53 +00:00
Jingning Han
7f6cddb58f Take out assertion for block_yrd in rtc coding flow
The internal behavior of block_yrd differs in high bit depth
settings from 8-bit one. This causes the assertion condition not
true for high bit depth.

Change-Id: I15dc02e7162d27cabe78c451941d769d488b1174
2015-06-17 08:51:16 -07:00
James Zern
0d51a97ae9 Merge "Fix integer overflow issue in rtc coding flow intra mode search" 2015-06-17 05:29:32 +00:00
Jingning Han
bc7074508a Fix integer overflow issue in rtc coding flow intra mode search
The overflow issue affects a variable that is only used in inter
mode. This commit fixes the ioc warning triggered in the intra
mode. It does not affect the compression performance.

Change-Id: I593d1b5650599de07f3e68176dd1442c6cb7bdbc
2015-06-16 19:31:24 -07:00
Parag Salasakar
d9fedf7832 mips msa vp9 fdct 32x32 optimization
average improvement ~4x-6x

Change-Id: Ibcac3ef8ed5e207cf8c121e696570e6b63d3c0f4
2015-06-17 07:58:34 +05:30
Parag Salasakar
fa53008fb7 Merge "mips msa vp9 fdct 16x16 optimization" 2015-06-17 01:21:59 +00:00
Scott LaVarnway
97b3913dcc Merge "Moved has_rows, has_cols in vp9_decodeframe.c" 2015-06-16 22:01:06 +00:00
Scott LaVarnway
ce6a6c5411 Moved has_rows, has_cols in vp9_decodeframe.c
from read_partition() to decode_partition().

Change-Id: I6bee2a0e9ff315290a690c9d773c9648dd2a200d
2015-06-16 11:50:54 -07:00
Marco
8914ab696d Remove duplicate calls for set_frame_size in 1 pass mode.
set_frame_size() is being called twice, once before entering
encode_encode_frame_to_data_rate(), and once again in that function.
No need to call it twice for one-pass mode.

Change-Id: I5fabaf0a90482d4f42cd89ef7ae1402c31aec600
2015-06-16 09:57:13 -07:00
Scott LaVarnway
5fe0e55ca4 Merge "Eliminated frame_type check in get_partition_probs()" 2015-06-16 13:40:23 +00:00
Scott LaVarnway
b2658ec321 Eliminated frame_type check in get_partition_probs()
Moved the frame_type check to the tile level and stored
the prob ptr in MACROBLOCKD.

Change-Id: I10b5a4abd58213dc7610e3ade1a1583c01526842
2015-06-16 05:37:54 -07:00
Scott LaVarnway
a41fe749a8 Merge "Update use_prev_frame_mvs flag in decoder." 2015-06-16 12:28:46 +00:00
Parag Salasakar
89b4b315aa mips msa vp9 fdct 16x16 optimization
average improvement ~4x-6x

Change-Id: Id3b2243e5b3c7844c90c4231a5e75fa69911362c
2015-06-16 12:49:34 +05:30
James Zern
79fb3a013e vp9_reconintra_neon: add DC 32x32 predictors
~84-85% faster over 20M pixels

Change-Id: Ia67a7f4a342bf7b0a9280e05c25d81a774d90469
2015-06-15 20:57:28 -07:00
Yunqing Wang
e820ca6973 Merge "vp9_ethread: create enough threads while using SVC" 2015-06-15 23:03:32 +00:00
James Zern
a6d126709a Merge changes I19588f9e,I6dc338a6
* changes:
  vp9_encodeframe: make coord_lookup[] static
  vp9_resize: make vp9_filteredinterp_*[] static
2015-06-15 23:03:28 +00:00
James Zern
17c9678a3c Merge "vp9_entropy: delete vp9_coefmodel_tree[]" 2015-06-15 23:02:42 +00:00
James Zern
e8d3491ec2 Merge "vp9_entropymode: make vp9_init_mode_probs private" 2015-06-15 23:02:36 +00:00
James Zern
43d49e4710 Merge "enable vp9_d153_predictor_32x32_ssse3" 2015-06-15 23:01:29 +00:00
Yunqing Wang
c98273c9e7 vp9_ethread: create enough threads while using SVC
This patch modified the thread creating code. When use_svc is true,
the number of threads created is decided by the highest resolution.
This resolved WebM issue 1018.

Change-Id: I367227b14d1f8b08bbdad3635b232a3a37bbba26
2015-06-15 14:30:54 -07:00
Marco
24b3ede251 Remove redundant second declaration in svc_layercontext.c
Change-Id: Ia3b1c1db54204fd92a56b7f698a9f26d27ee572a
2015-06-15 14:06:43 -07:00
James Zern
98f0178611 enable vp9_d153_predictor_32x32_ssse3
unused since its initial commit
~91% faster over 20M pixels

Change-Id: Ic8b5b3246bc97c8406be8bc4496601370403b70a
2015-06-12 19:48:22 -07:00
James Zern
5214bd52c8 vp9_encodeframe: make coord_lookup[] static
Change-Id: I19588f9e674c8635b6e58e4633120be736d256a6
2015-06-12 19:47:46 -07:00
James Zern
5168baea10 vp9_resize: make vp9_filteredinterp_*[] static
+ drop the vp9_ prefix

Change-Id: I6dc338a69265dcaa8c6fe071e5757312bf92efca
2015-06-12 19:47:45 -07:00
James Zern
ef75416ab7 vp9_entropy: delete vp9_coefmodel_tree[]
it's been unused since:
4ac6a25 Moving vp9_tree_probs_from_distribution() to encoder.

Change-Id: Ieae65864277fc3dbe993c5c08d75c6c5fcaa3a2d
2015-06-12 18:43:37 -07:00
James Zern
53b7f33f2d vp9_entropymode: make vp9_init_mode_probs private
rename to init_mode_probs

Change-Id: Id451d7763b784ed37e43f2c35073a778078d3d0f
2015-06-12 18:25:23 -07:00
James Zern
aaa49f0485 vp9_mcomp: make search_step_table static
Change-Id: I2552d8101cf49ed951782ab69adce407579700fc
2015-06-12 18:11:54 -07:00
James Zern
31509af247 vp9_encodeframe: delete auto_partition_range()
unused since:
1f00a9b Fix choose_partitioning threshold setup for speed -5

Change-Id: I51952c0a1be3e6e0aa36ff2ffcfbbea60a505960
2015-06-12 17:57:37 -07:00
James Zern
7ea431df98 vp9_mcomp: don't mark setup_center_error() inline
this function is a bit too involved for the hint; avoids a -Winline
warning

Change-Id: Ib82e424764aa78b37ddb94116e2b009a6de31d35
2015-06-12 17:56:33 -07:00
James Zern
471302a07b vp9_encoder: hide adjust_image_stat()
this function is only needed with CONFIG_INTERNAL_STATS

Change-Id: I9c8d16cb9069dd8370f8b30329933c0d97f6d0aa
2015-06-12 17:55:08 -07:00
James Zern
a4bb5f2a29 Merge "decode_tiles_mt: remove incorrect TODO" 2015-06-12 22:13:38 +00:00
James Zern
dc0f0f1cf2 Merge changes I342075eb,I222eaa4e
* changes:
  vp9_decodeframe: simplify init_read_bit_buffer signature
  vp9_decode_frame: remove explicit read_bit_buffer init
2015-06-12 22:13:28 +00:00
James Zern
e21c1eab9d Merge changes Iedb5b6a3,Iaea98508,I36580cea,Ia0574320
* changes:
  vp9_decodeframe.h: remove unused prototype
  vp9_decodeframe: move public funcs to end of file
  vp9_decodeframe: reorder some functions
  vp9_decodeframe: hide vp9_dec_build_inter_predictors_sb
2015-06-12 22:12:30 +00:00
James Zern
44317a511b Merge "variance_test: fix build w/--disable-vp8-encoder" 2015-06-12 22:08:33 +00:00
Jingning Han
d8985f5360 Merge "Fix potential overflow issue in hadamard_16x16()" 2015-06-12 19:03:21 +00:00
Jingning Han
bea691b5c9 Merge "Add dynamic range comment to hadamard_8x8()" 2015-06-12 18:43:41 +00:00
Parag Salasakar
ecbbef6b67 Merge "mips msa vp9 filter by weight optimization" 2015-06-12 18:30:11 +00:00
Jingning Han
176c291d9c Fix potential overflow issue in hadamard_16x16()
This commit fixes a potential integer overflow issue in function
hadamard_16x16. It adds corresponding dynamic range comment.

Change-Id: Iec22f3be345fb920ec79178e016378e2f65b20be
2015-06-12 10:56:18 -07:00
Jingning Han
4f52d49f1e Add dynamic range comment to hadamard_8x8()
Add comment to assist SIMD optimization.

Change-Id: I300d5a848e6e9947e451de2a871a88940703fc9f
2015-06-12 10:39:49 -07:00
Yunqing Wang
254a4c033c Merge "Allocate tile data adaptively to accommodate the frame size increase" 2015-06-12 15:49:40 +00:00
Parag Salasakar
fbac961b47 mips msa vp9 filter by weight optimization
filter by weight - average improvement ~2x-3x

Change-Id: I4832033335d339cdafdce697f07ce3e643920057
2015-06-12 12:06:42 +05:30
James Zern
07799ef28a Merge "test_intra_pred_speed: add ClearSystemState() call" 2015-06-12 06:27:45 +00:00
James Zern
e0e4045db8 variance_test: fix build w/--disable-vp8-encoder
s/CONFIG_VP8\b/CONFIG_VP8_ENCODER/

Change-Id: I616aace9cf8f18d7e83f00f7aef3b8a26fc4c17b
2015-06-11 23:15:30 -07:00
James Zern
e2b52f6f01 vp9_filter: make all filter tables static
these are returned via vp9_get_interp_kernel()

Change-Id: I45ed75e5b1515c4f5be9212759dcb50a456b5548
2015-06-11 15:15:52 -07:00
James Zern
33b3953c54 vp9_filter: restore vp9_bilinear_filters alignment
the declaration containing the alignment in vp9_filter.h was removed in:
eb88b17 Make vp9 subpixel match vp8

fixes a crash in 32-bit builds

Change-Id: I9a97e6b4e8e94698e43ff79d0d8bb85043b73c61
2015-06-11 15:15:25 -07:00
Scott LaVarnway
0fbc277746 Merge "inline vp9_get_segdata()" 2015-06-11 19:48:19 +00:00
Yunqing Wang
2c838ede68 Allocate tile data adaptively to accommodate the frame size increase
If the frame size increases, the tile data buffer needs to be
re-allocated according to the number of tiles existing in current
frame. This patch makes the multi-tile encoding work in spatial
SVC usage case, and partially solved WebM issue 1018.

Change-Id: I1ad6f33058cf5ce6f60ed5024455a709ca80c5ad
2015-06-11 11:30:18 -07:00
Scott LaVarnway
cca866f578 inline vp9_get_segdata()
and change name.

Change-Id: I706645cf9d9dc04f1b3b6ac80df80edb7f101854
2015-06-11 09:52:00 -07:00
Marco
2aa67ce20f Move adjustment of some CR parameters to existing function.
Refactor/no change in behavior.

Change-Id: Idb3c55b1304feaf689b90403f79bc96dba26f060
2015-06-11 08:31:03 -07:00
Scott LaVarnway
a49c701529 Merge "inline vp9_segfeature_active()" 2015-06-11 12:29:45 +00:00
Scott LaVarnway
42c0b1b1f1 inline vp9_segfeature_active()
and changed name.

Change-Id: Ie023ca66cc2c823032f58d4faeb53fd1863c94f3
2015-06-11 04:20:55 -07:00
Parag Salasakar
c7489f4815 Merge "mips msa vp9 intra-pred optimization" 2015-06-11 03:31:49 +00:00
James Zern
15c839f563 decode_tiles_mt: remove incorrect TODO
all allocated workers are used, the final one in the main thread.

Change-Id: I04647d4fb2e01d6d404790e7899515289047f553
2015-06-10 15:56:51 -07:00
James Zern
b105414118 vp9_decodeframe: simplify init_read_bit_buffer signature
Change-Id: I342075eb5a4ba2a85d36d47ae52d1f3476039e31
2015-06-10 15:53:30 -07:00
James Zern
92146eb8ac vp9_decode_frame: remove explicit read_bit_buffer init
this is done by init_read_bit_buffer()

Change-Id: I222eaa4e9758ff9f7e1e4122106c5c4652ffa99c
2015-06-10 15:50:54 -07:00
James Zern
dca319040c vp9_decodeframe.h: remove unused prototype
vp9_init_dequantizer() was deleted in:
bdd249b Optimize the dequantization process on decoder side.

Change-Id: Iedb5b6a3a03964dd6901c1e3b2325194d94bc708
2015-06-10 15:48:59 -07:00
James Zern
b0bafd0439 vp9_decodeframe: move public funcs to end of file
Change-Id: Iaea9850890b726c7b5552c5f02b3a309086edc85
2015-06-10 15:47:57 -07:00
James Zern
38dd0448cd vp9_decodeframe: reorder some functions
removes the need for a prototype for the static function
dec_build_inter_predictors_sb

Change-Id: I36580ceae061d27f341ab0a16ece479f92e98004
2015-06-10 15:44:21 -07:00
James Zern
587bd3669e vp9_decodeframe: hide vp9_dec_build_inter_predictors_sb
+ strip 'vp9_' prefix

Change-Id: Ia057432095e5741473275d4da03ab665c37e924e
2015-06-10 15:31:55 -07:00
James Zern
1898d1336d test_intra_pred_speed: add ClearSystemState() call
fixes instability; noticed on mingw

Change-Id: Idef4349339444ec84916e5fcd908ee9633d28aaa
2015-06-10 12:44:07 -07:00
James Zern
44afbbb72d Merge "vp9_reconintra/d45_predictor: remove temp storage" 2015-06-10 19:23:57 +00:00
James Zern
c620c632c1 Merge "test/android/Android.mk: fix build w/ENABLE_SHARED=0" 2015-06-10 19:22:42 +00:00
James Zern
4ded624ff3 Merge changes I89a8440f,Ifa3926e1
* changes:
  libs.mk: relocate TEST_INTRA_PRED_SPEED_OBJS init
  test_intra_pred_speed: remove #if w/in another macro
2015-06-10 19:18:04 +00:00
Paul Wilkins
59114915bc Merge "Changes to active maxq calculation in two pass." 2015-06-10 13:33:53 +00:00
Scott LaVarnway
97880c3324 Merge "Reducing size of MODE_INFO struct" 2015-06-10 13:15:19 +00:00
James Zern
39d93f3891 libs.mk: relocate TEST_INTRA_PRED_SPEED_OBJS init
allows the visual studio project to be generated

Change-Id: I89a8440fb0bcaef11ff89dd967aa37bfe200758d
2015-06-09 19:30:46 -07:00
James Zern
6a422e4452 test_intra_pred_speed: remove #if w/in another macro
fixes the compile under visual studio

Change-Id: Ifa3926e198af97d73250540c6d0ef692f5e354ff
2015-06-09 19:30:04 -07:00
James Zern
0f8ee6eb4b test/android/Android.mk: fix build w/ENABLE_SHARED=0
add a dependency on *_rtcd.h to ensure they're generated before
attempting to build the test files

Change-Id: Ibbbd1f6ea77912bfd297129e7c83b9a80923ea12
2015-06-09 19:27:20 -07:00
Marco
61c5c96ae1 Merge "Adjust some parameters for cyclic refresh for low bitrates." 2015-06-09 23:22:29 +00:00
Marco
997ac14c6a Adjust some parameters for cyclic refresh for low bitrates.
Reduce motion threshold and boost factor for second segment,
for low bitrates, at low resolutions for now.
This is to reduce the rate fluctuation/frame dropping that occurs
at these low bitrates.

Change-Id: Ia66c3be41831882fca8c1e4fe104f5ea8fbe7142
2015-06-09 15:10:03 -07:00
Debargha Mukherjee
2fd31bbba7 Merge "Prevent dividing by 0 when target-bitrate is 0." 2015-06-09 17:03:22 +00:00
Paul Wilkins
faf8c63b0f Changes to active maxq calculation in two pass.
Some initial experiments into discounting dead zone
formating bars and intra skip blocks (common in some
types of animation and graphics) in the calculation of
the active max Q for each ARF/GF group.

TODO: check for vertical formating bars and validate the
horizontal bar at the bottom edge of the image.

As expected, this change as it stands, does not make much
difference for the natural videos in the std-hd and derf sets.
However, for the yt and yt hd set there is a significant rise
in the average PSNR with overall PSNR and SSIM remaining
neutral.

The mean rise for the YT-HD test set was > 6%. This is mainly
because the change allows Q to drop further on titles and
other graphics sections where spending a small number of
extra bits gives a sharp rise in PSNR.

Change-Id: I3f878ae91fc1854312d7ecf9fa792c17bc1aa6b7
2015-06-09 15:31:24 +01:00
Paul Wilkins
4a28da5843 Enable more split modes for animated content.
For content that is identified as likely to contain some
animation or graphics content, increase the availability
of split modes for good quality speeds 1-3.

On a problem test animation clip this improves metrics
results by about 0.25 db and makes a noticeable difference
visually. It also causes a small drop in file size (~0.5%) but
a rise in encode time of about 5-6% at speed  2.

For more normal content it should have no effect.

Change-Id: Ic4cd9a8de065af9f9402f4477a17442aebf0e439
2015-06-09 14:50:44 +01:00
Debargha Mukherjee
c23a9e218c Prevent dividing by 0 when target-bitrate is 0.
Change-Id: I05b7fb378b6b0c5f263e7839f96ba6830ee34fd2
2015-06-08 16:19:13 -07:00
Paul Wilkins
b19b16cfa1 Merge "Animation and dead zone detection." 2015-06-08 14:26:07 +00:00
Scott LaVarnway
c9976b32b4 Update use_prev_frame_mvs flag in decoder.
Added check to see if last frame was all intra.  This will
eliminate two checks in find_mv_refs_idx().  Also, do not
update the frame mvs if the current frame is all intra.

This improved performance on material with frequent
intra-only frames.

Change-Id: I44a4042c3670ab0d38439d565062a0e2a1ba9d1e
2015-06-08 03:38:13 -07:00
Parag Salasakar
a2288d274c mips msa vp9 intra-pred optimization
intra pred - average improvement ~2x-3x

Change-Id: Ie3f7d6eded5ecb7ed7ee506ba8e4d98f93803b09
2015-06-06 22:29:32 +05:30
James Zern
e67d45d4ce Merge "vpxdec: cosmetics: break some long lines" 2015-06-06 16:11:00 +00:00
James Zern
913ddbf747 Merge "vpxenc: relocate vp8/vp9 only options" 2015-06-06 16:10:18 +00:00
James Zern
ad8e6f2ed6 Merge "disable vp8_sub_pixel_variance8x8_neon" 2015-06-06 16:07:58 +00:00
James Zern
a3938266f5 Merge "vs/armv7: use -oldit armasm flag" 2015-06-06 03:24:35 +00:00
James Zern
47fe535422 disable vp8_sub_pixel_variance8x8_neon
fails unit tests:
[  FAILED  ] NEON/VP8SubpelVarianceTest.ExtremeRef/0, where GetParam() = (3, 3, 0x14e36d, 0)
[  FAILED  ] NEON/VP8SubpelVarianceTest.Ref/0, where GetParam() = (3, 3, 0x14e36d, 0)

the tests were recently enabled in:
eb88b17 Make vp9 subpixel match vp8

the functions likely haven't changed since being converted from assembly

Change-Id: I6141717b111b8f735f436c160d74270af53ef722
2015-06-05 20:18:51 -07:00
James Zern
3a070ba502 vpxdec: cosmetics: break some long lines
Change-Id: I95827a6f5e585d2accbb8fc09501f2d38db585e4
2015-06-05 16:18:49 -07:00
James Zern
f82cfc2ee8 vpxenc: relocate vp8/vp9 only options
move them under their respective config check to avoid some unused
variable warnings when disabled

Change-Id: Ic5e5280cf1bc1f56e8349676f0bedae4acef34ea
2015-06-05 16:14:14 -07:00
hkuang
87c21a95ae Optimize the decode_partition. About 0.7% gain on N10.
Change-Id: Ia689c254bd2d4f274abcc451a9b758f62e3a2b1f
2015-06-05 15:47:09 -07:00
James Zern
5908e0b664 vs/armv7: use -oldit armasm flag
this quiets warnings from armv6 code [1].
from msdn [2]:
-oldit
Generate ARMv7-style IT blocks. By default, ARMv8-compatible IT blocks
are generated.

a new configuration would be needed for armv8 in any case as the neon
assembly is being built, so removing this should be harmless

[1] A4509: This form of conditional instruction is deprecated
[2] https://msdn.microsoft.com/en-us/library/hh873189.aspx

Change-Id: I4c3b838b52a87401c6daecd83d22ab148ed7c5d9
2015-06-05 15:21:10 -07:00
James Zern
9c6eea35b6 Merge "vp9_reconintra: simplify d63_predictor" 2015-06-05 21:49:13 +00:00
Scott LaVarnway
5831a7127e Merge "BUG FIX: Remove counts param" 2015-06-05 20:04:26 +00:00
Scott LaVarnway
63819c033c BUG FIX: Remove counts param
member access within null pointer of type 'FRAME_COUNTS'

Change-Id: Id3bf75e0a6f2a1abf8522cf9fbb98b3a4443de38
2015-06-05 11:54:04 -07:00
Frank Galligan
8c854769fc Merge "Add control to skip loop filter in VP9 decoder." 2015-06-05 18:03:51 +00:00
Frank Galligan
44138d7d9e Merge "vpxenc: Add support for pixel aspect ratio." 2015-06-05 18:01:52 +00:00
Frank Galligan
bfb6d48812 Add control to skip loop filter in VP9 decoder.
This control allows the application to skip the loop filter in the
decoder. This is an advanced control that should only be used in
extreme circumstances as it may introduce and accumulate decode
artifacts.

Change-Id: I278c65c60826f84c9141ebe06c6eeed3c2335fa8
2015-06-05 10:07:09 -07:00
Frank Galligan
09acd267bc vpxenc: Add support for pixel aspect ratio.
WebM files will adjust the display width and height according to the
input pixel aspect ratio. The default pixel aspect ratio is 1:1.

BUG=https://code.google.com/p/webm/issues/detail?id=1005

Change-Id: I23e0a601b7259fa9513cb86110c41b8437769808
2015-06-05 09:56:50 -07:00
Johann
a4dad3e961 Merge "Duplicate reference variance code" 2015-06-05 16:54:33 +00:00
Marco
8710cceb45 Fix to spatial svc: set reference_frame masking.
For real-time mode: keep reference_frame masking off
for spatial svc.

Change-Id: I15e123c06f67ea040172b8d4042a672f3525b9d8
2015-06-05 08:25:33 -07:00
Parag Salasakar
78b434e8b1 Merge "mips msa vp9 loopfilter 4, 8 optimization" 2015-06-05 05:32:18 +00:00
Parag Salasakar
d43fd99822 mips msa vp9 loopfilter 4, 8 optimization
average improvement ~3x-4x

Change-Id: I59279293ce4b2a1e99bd10579ac97740e943643f
2015-06-05 09:56:08 +05:30
Marco
0d0db3e8c3 Merge "Bugfx in setting layer framerate." 2015-06-05 04:26:06 +00:00
James Zern
60d0b3364c vp9_reconintra/d45_predictor: remove temp storage
dst row 0 can be reused in the same way

Change-Id: Id977da62545dcc4a89cebbcbad90ba84f8ff5d6b
2015-06-04 20:11:53 -07:00
James Zern
7012ba6395 vp9_reconintra: simplify d63_predictor
calculate the averages needed for even and odd rows once; this removes a
conditional from the inner loop
the final average calculated currently relies on above[] being extended,
it could be reduced to use
above[block_size - 2] + 3 * above[block_size - 1]

Change-Id: I70f5eac8d8a2a959c7114844a95826f445c3dd4d
2015-06-04 19:21:05 -07:00
Parag Salasakar
dc07cc6fed Merge "mips msa vp9 loopfilter 16 optimization" 2015-06-05 02:15:26 +00:00
James Zern
c2cf347fe2 Merge "vp9_reconintra: use AVG[23] consistently" 2015-06-05 02:15:22 +00:00
James Zern
2b6d62140e Merge "vp9_reconintra_neon_asm/tm4x4: simplify left load" 2015-06-05 01:46:39 +00:00
James Zern
6c3b691c49 Merge "vp9_reconintra: fix d45/d63 discrepancies" 2015-06-04 22:56:43 +00:00
Scott LaVarnway
7ccd7fc325 Merge "Remove cm parameter from vp9_decode_block_tokens() part 2" 2015-06-04 22:20:55 +00:00
James Zern
faea038f4f vp9_reconintra: fix d45/d63 discrepancies
the final index in rows 2, 3 differ from vp8

Change-Id: I0fcea907b4ab44e266c0f1fd77b290d2236b280a
2015-06-04 14:49:56 -07:00
Marco
8f7e7663ad Bugfx in setting layer framerate.
Index for ts_rate_decimator should be temporal layer (tl) index.

Change-Id: I0320b7f7ae987ef64fdfe7c45099e7978a8fef17
2015-06-04 13:12:09 -07:00
Yaowu Xu
f990b35fa4 Make vp9 the default codec for vpxenc
Change-Id: Ic3b4f3c9a6d8f9b04efb8b5ee080880895063564
2015-06-04 08:28:12 -07:00
Scott LaVarnway
baaaa57533 Reducing size of MODE_INFO struct
Reduced size from 124 bytes to 104 bytes.  For decode only builds,
it is reduced to 68 bytes.

Change-Id: If9e6b92285459425fa086ab5a743d0a598a69de3
2015-06-04 07:32:16 -07:00
Scott LaVarnway
8bb37dd069 Remove cm parameter from vp9_decode_block_tokens() part 2
Change-Id: Iee24b6bb095f748333223e6036fc5c9d9e7e5f1c
2015-06-04 07:13:19 -07:00
Scott LaVarnway
877fac122b Merge "Remove counts param" 2015-06-04 13:46:42 +00:00
Parag Salasakar
914f8f9ee0 mips msa vp9 loopfilter 16 optimization
average improvement ~3x-4x

Change-Id: I8ef263da6ebcf8f20aabaefeccf25a84640ba048
2015-06-04 11:50:41 +05:30
Johann Koenig
c005792951 Merge "Make vp9 subpixel match vp8" 2015-06-04 06:16:13 +00:00
Parag Salasakar
fd891a9655 Merge "mips msa vp9 convolve8 avg hv optimization" 2015-06-04 05:44:24 +00:00
Johann
eb88b172fe Make vp9 subpixel match vp8
The only difference between the two was that the vp9 function allowed
for every step in the bilinear filter (16 steps) while vp8 only allowed
for half of those. Since all the call sites in vp9 (<< 1) the input, it
only ever used the same steps as vp8.

This will allow moving the subpel variance to vpx_dsp with the rest of
the variance functions.

Change-Id: I6fa2509350a2dc610c46b3e15bde98a15a084b75
2015-06-03 22:10:51 -07:00
hkuang
ce5e17072d Merge "Optimize the idct assembly code." 2015-06-04 04:32:11 +00:00
James Zern
4fcabf5169 vp9_reconintra: use AVG[23] consistently
Change-Id: Iab7215f82be0c0c831cd81b6f8091afc3710dd54
2015-06-03 19:52:46 -07:00
Parag Salasakar
bdfbc3e876 mips msa vp9 convolve8 avg hv optimization
average improvement ~4x-6x

Change-Id: I7c8b4f2334491be8a859592606e568bc95d019aa
2015-06-04 08:11:01 +05:30
James Zern
2da8d24e8f Merge "vp9_reconintra: simplify d45_predictor" 2015-06-04 01:59:10 +00:00
James Zern
a9f55e8324 Merge changes from topic 'vp9-intra-pred'
* changes:
  vp9_reconintra: specialize d135 4x4
  vp9_reconintra: specialize d117 4x4
  vp9_reconintra: specialize d207 4x4
  vp9_reconintra: specialize d153 4x4
  vp9_reconintra: specialize d63 4x4
  vp9_reconintra: specialize d45 4x4
2015-06-04 01:58:28 +00:00
Marco
2561b4dd4c Merge "Remove ABI check for 1 pass CBR SVC." 2015-06-04 01:54:19 +00:00
James Zern
65d9599807 vp9_reconintra_neon_asm/tm4x4: simplify left load
use vld1.8 {d0[]}, [r0] rather than ldrb+vdup; mildly faster

Change-Id: Ia5ffc736bcb0f5497b7d9e55a93bf5a5f5f6928c
2015-06-03 18:51:13 -07:00
Parag Salasakar
f1b09c0433 Merge "mips msa vp9 convolve8 avg horiz optimization" 2015-06-04 01:16:21 +00:00
Marco
a8c5ab2ca6 Remove ABI check for 1 pass CBR SVC.
Remove the ABI check for the controls needed for SVC 1 pass CBR mode.
Bump up the ABI version.

Change-Id: I35b79ee010e14af83c6d1e801d574deaaa2fc7eb
2015-06-03 17:43:22 -07:00
hkuang
98e88e6ad8 Optimize the idct assembly code.
Change-Id: Ia0ff859ff1c813dbe100e2f27b1ef78167483f4e
2015-06-03 17:20:35 -07:00
Paul Wilkins
668e804504 Animation and dead zone detection.
Adds code to detect dead zone bars at the top and bottom
of reformatted letterbox video (note that the code only
looks at the top of the image and assumes any dead zone
is symmetrical).  Use of this to adapt rate control etc.
will follow in a subsequent patch.

Also counts other blocks (excluding the dead zone) that
have no intra signal. The presence of a significant
number of such blocks can be used as a identify that the frame
may be artificial (e.g.  animation, screen capture, graphics).
This patch contains plumbing only and does not use
the signal.

Change-Id: I59bc93529cd4065416cef773e405fda3ae006a20
2015-06-04 01:01:20 +01:00
Marco
5df6c04585 Fix to sample encoder: vpx_temporal_svc_encoder.c
vp8 uses ts_target_bitrate for layer settings.

Change-Id: Ie72477b549051396feebff87a3744fed04366bf4
2015-06-03 14:50:27 -07:00
Johann
ce2ca9f777 Duplicate reference variance code
Some places are using the unoptimized variance function. This was never
intended and does not fit into the optimization framework.

Change-Id: Id96238407aad03b0ffd4a46cd183555a026daedc
2015-06-03 13:28:59 -07:00
Johann
516c087c51 Remove unused sub pixel mse
Change-Id: I7a5e4e2632c3fa69d2a85a68fa9b418631caf09c
2015-06-03 08:00:51 -07:00
Parag Salasakar
b8c1cdcd12 mips msa vp9 convolve8 avg horiz optimization
average improvement ~5x-8x

Change-Id: I179a69ec620fbd69979bd128f05d18113618aab4
2015-06-03 11:33:42 +05:30
Parag Salasakar
179bceffdb Merge "mips msa vp9 convolve8 avg vert optimization" 2015-06-03 05:58:43 +00:00
James Zern
0601f92a88 Merge "test-data.sha1: mark test data files as binary" 2015-06-03 05:49:43 +00:00
James Zern
88fadafa9e Merge "Support building shared libraries on OS/2" 2015-06-03 05:47:43 +00:00
KO Myung-Hun
19dbc0f066 test-data.sha1: mark test data files as binary
Change-Id: Ie3605bf4c4fb16eb21186adbb4577c20a8027344
2015-06-03 13:48:35 +09:00
KO Myung-Hun
6d52fe2e71 Support building shared libraries on OS/2
Change-Id: I5750db0504cb69ead52f9f44bf583e693bffbb7e
2015-06-03 13:48:34 +09:00
Parag Salasakar
c543d38ac7 mips msa vp9 convolve8 avg vert optimization
average improvement ~4x-6x

Change-Id: Ia2e6f770da46416ebec31fdcea5cc7878879a9d9
2015-06-03 09:55:25 +05:30
Parag Salasakar
622beaba67 Merge "mips msa vp9 idct4x4 and iwht4x4 optimization" 2015-06-03 04:10:14 +00:00
Johann
01853d7ce9 Merge "Unify reference variance functions" 2015-06-03 02:03:45 +00:00
Johann
d90536c1a2 Unify reference variance functions
Use uint32_t for all output and make all functions static

Change-Id: I2c9c6f6310732dc53444607d1c1a268ac1ab83ba
2015-06-02 15:14:55 -07:00
Johann
e2a5fd2cf4 Merge "Disable neon bilinear 4x4" 2015-06-02 21:44:14 +00:00
Scott LaVarnway
f779dba405 Remove counts param
Moved to MACROBLOCKD.

Change-Id: Icce765b334f2755f4fe2a4c39fb2ae2d7660d004
2015-06-02 09:06:00 -07:00
Marco
c139b81a13 Vidyo patch: Rate control for SVC, 1 pass CBR mode.
-Make Rate control work for SVC 1 pass CBR mode.
-Added temporal layering mode.
-Fixed bug in non-rd variance partition.
-Modified/updated the sample encoders (vp9_spatial_svc_encoder, vpx_temporal_svc_encoder).
-Added datarate unittest(s) for 1 pass CBR SVC.

Change-Id: Ie94b1b68a56ea1267b5087c625e5df04def2ee48
2015-06-02 07:54:13 -07:00
Parag Salasakar
54a6f73958 mips msa vp9 idct4x4 and iwht4x4 optimization
average improvement ~3x-4x
moved assert to respective files

Change-Id: I6c915059d456a00bdd76fab0dd2eede8b6c6ea58
2015-06-02 12:16:28 +05:30
Parag Salasakar
ebf7466cd8 mips msa vp9 updated convolve horiz, vert, hv, copy, avg module
Updated sources according to improved version of common MSA macros.
Enabled respective convolve MSA hooks and tests.
Overall, this is just upgrading the code with styling changes.

Change-Id: If5ad6ef8ea7ca47feed6d2fc9f34f0f0e8b6694d
2015-06-02 12:03:51 +05:30
Parag Salasakar
cf1c0ebc3a Merge "mips msa vp9 updated idct 8x8, 16x16 and 32x32 module" 2015-06-02 04:48:02 +00:00
Johann
86d0cb8325 Disable neon bilinear 4x4
Clang adds alignment hints when casting up the loads/stores. Although
this should be safe for most paths, it's causing some crashes. Either
the source of the misalignment needs to be determined and adjusted or
the intrinsics need to be rewritten to avoid using the cast to load the
data.

BUG=817,892

Change-Id: Ia3aa824d6a4cd97e14325ff49dc730b6f85ec7e8
2015-06-02 00:02:55 +00:00
Johann Koenig
cbebbff025 Merge "Adds subsecond frame rates to webm" 2015-06-01 22:36:09 +00:00
James Zern
71d923232c Merge changes from topic 'vp9-intra-pred'
* changes:
  vp9_reconintra_neon/tm: improve above_left load
  vp9_reconintra_neon: cosmetics: normalize fn params
2015-06-01 20:03:47 +00:00
James Zern
b601202905 Merge "vp9_reconintra_neon_asm/tm: simplify above_left load" 2015-06-01 20:01:38 +00:00
Johann
275c102787 Merge "Cast variance reference output" 2015-06-01 18:56:50 +00:00
paulwilkins
dbd3760712 Merge "Fast feedback of bits on undershoot." 2015-06-01 18:15:10 +00:00
Johann
fdc549994a Cast variance reference output
The larger internal variables are required for the intermediates
but RoundHighBitDepth brings them down to uint32_t/unsigned int.

Fixes type warnings in visual studio.

Change-Id: I48d35284d6cbde330ccdc1f46b6215a645d5eb00
2015-06-01 10:56:52 -07:00
Johnny Klonaris
d02aa04422 Adds subsecond frame rates to webm
Numerator was being range checked against the
denominator - preventing any frame rate slower
than 1 fps.

I've tested this on a Mac using using ffmpeg and
results are comparable to mp4 and ogg files generated
at the same time.

Not yet tested on Windows.

Johnny Klonaris
google@jawknee.com

Change-Id: Idb358dbc2e7dc000037880ede4a1b0df248a42c8
2015-06-01 10:13:58 -07:00
Parag Salasakar
6af9d7f2e2 mips msa vp9 updated idct 8x8, 16x16 and 32x32 module
Updated sources according to improved version of common MSA macros.
Enabled idct MSA hooks and tests.
Overall, this is just upgrading the code with styling changes.

Change-Id: I1f488ab2c741f6c622b7a855388a202168082209
2015-06-01 09:24:23 +05:30
James Zern
acc481eaae vp9_reconintra: simplify d45_predictor
only the immediate above right pixel is needed; this removes a
conditional from the inner loop
the final average calculated currently relies on above[] being extended,
it could be reduced to use above[block_size - 2] + 3 * above_right

Change-Id: Ica4f2b8d25eec3ca1d6fa52ef0d4adc228eeea3f
2015-05-30 13:30:59 -07:00
James Zern
6e068e51b5 vp9_reconintra: specialize d135 4x4
based on webp's RD4()

Change-Id: I64c8f0a1325a8f201eaad39b396fae7a2d06efff
2015-05-30 13:29:40 -07:00
James Zern
b6782686f4 vp9_reconintra: specialize d117 4x4
based on webp's VR4()

Change-Id: Ic8c0b8ed65a63772ca0a4321592880a5e8947db5
2015-05-30 13:29:02 -07:00
James Zern
c022dbc4d3 vp9_reconintra: specialize d207 4x4
based on webp's HU4()

Change-Id: I2401ef307cd94e70cc7904f55954af04290c8af9
2015-05-30 13:28:22 -07:00
James Zern
2276eb16f3 vp9_reconintra: specialize d153 4x4
based on webp's HD4()

Change-Id: Icba1e21ec4b8f5026dc92e49741a68b059c8b9b1
2015-05-30 13:27:50 -07:00
James Zern
102123821d vp9_reconintra: specialize d63 4x4
based on webp's VL4()

Change-Id: Ibab962053843eae8752b4e74b6481a53bb034ae9
2015-05-30 13:27:03 -07:00
James Zern
6051bcc3dc vp9_reconintra: specialize d45 4x4
based on webp's LD4()

Change-Id: I74855d23ce73e1c6988fe08bf7c959b7a69b4abf
2015-05-30 13:26:21 -07:00
Parag Salasakar
71e88f903d Merge "mips msa vp9 updated macros and disable all MSA functions" 2015-05-30 02:52:27 +00:00
James Zern
7621b48a1c vp9_reconintra_neon/tm: improve above_left load
use vld1?_dup_u8 over vdup?_n_u8, reduces general register use; mildly
faster

Change-Id: Ie0e4e550849a207b34b378541196b553c9f12011
2015-05-29 19:18:43 -07:00
James Zern
f2d621e383 vp9_reconintra_neon: cosmetics: normalize fn params
s/y_stride/stride/

Change-Id: Ie98c3fe241dc240b653849eda356a8862bdd52f4
2015-05-29 19:01:39 -07:00
James Zern
b337c54cc4 vp9_reconintra_neon_asm/tm: simplify above_left load
use vld1.8 {d0[]}, [r0] rather than ldrb+vdup; mildly faster

Change-Id: I5c24d49a90c2855c94395184774b289da8e9d5a7
2015-05-29 18:56:16 -07:00
James Zern
7544e766e4 Merge changes from topic 'vp9-intra-pred'
* changes:
  vp9_reconintra_neon: add DC 16x16 predictors
  vp9_reconintra_neon: add DC 8x8 predictors
2015-05-30 01:51:35 +00:00
James Zern
a2a13cbe5f vp9_reconintra_neon: add DC 16x16 predictors
85-89% faster over 20M pixels

Change-Id: I9b320ed6b9e67f27df738b84c8b43b65a93c50c2
2015-05-29 15:41:44 -07:00
James Zern
e97b849219 vp9_reconintra_neon: add DC 8x8 predictors
~90% faster over 20M pixels

Change-Id: Iab791510cc57c8332c2f9a5da0ed50702e5f5763
2015-05-29 15:39:08 -07:00
Parag Salasakar
f9f078ebb6 mips msa vp9 updated macros and disable all MSA functions
Done little restructuring/styling changes to the sources like generic macro definitions, their use to reduce code lines, better code alignments etc.
Disabled all MSA hooks and tests

Change-Id: Ic6f2dce0b501f46b80c06c46c0fe2043d557b190
2015-05-29 13:34:33 +05:30
James Zern
be380f2005 variance_neon: add missing include
vpx_ports/mem.h is necessary for MSVC __builtin_prefetch compatibility
macro

Change-Id: I210fad6c6b4545df1874d028b31f42018490b029
2015-05-28 23:38:53 -07:00
Marco
26ab314176 For non-rd pickmode: remove VAR_PARTITION condition.
Keep the logic, transform size based on cyclic refresh and bsize,
(that was conditioned on VAR_PARTITION conditions) the same
for all speeds in non-rd mode (speeds >= 5).

No change to speeds >=6.
Small improvement for speed 5, ~0.5/1.5% gain for avg psnr/ssim.

Change-Id: If9c5657f3d30efd3c7f147166bba7cb69ea55114
2015-05-28 17:29:47 -07:00
Minghai Shang
45db29784d Merge "[svc] Disable tiles for spatial svc case" 2015-05-28 22:13:54 +00:00
Johann
7c16dcc79b Merge "Check size restrictions before running test vector" 2015-05-28 22:01:53 +00:00
Scott LaVarnway
bbea7c95d8 Merge "Re-worked header files" 2015-05-28 19:56:39 +00:00
Johann
a927aec5f8 Merge "Use correct parameters for NEON variance tests" 2015-05-28 19:53:50 +00:00
Johann
89ab1dca50 Merge "Remove conversion warnings from hbd shifts" 2015-05-28 19:39:32 +00:00
Johann
3f2a06674a Merge "Don't #define snprintf in VS 2015 or higher." 2015-05-28 19:38:57 +00:00
Johann
efc2e9844e Use correct parameters for NEON variance tests
Change-Id: Ib2949d0a3e9273e7952bbf91956357c1138093f1
2015-05-28 11:28:06 -07:00
hkuang
5317185eb0 Merge "Add error handling when running out of free frame buffers." 2015-05-28 17:41:01 +00:00
Johann
cad0eca25c Don't #define snprintf in VS 2015 or higher.
In VS 2015 and higher snprintf is supplied and therefore vsnprintf
doesn't need to be defined. This also avoids problems caused by
_snprintf being different from snprintf.

This fixes a build break with VS 2015 and improves security.

Originally submitted via chromium by brucedawson@chromium.org
https://codereview.chromium.org/1055603003

Additionally break this MSVC-specific tweak to a new file, which will
become the home of all such MSVC-specific things.

This requires adding a dependency on msvc.h to every example which uses
args.c and tools_common.h

Change-Id: I35b5f8e7ea00f6627403aabc9ea79b0412557a99
2015-05-27 18:28:25 -07:00
Johann
c855ed72a6 Remove conversion warnings from hbd shifts
ROUND_POWER_OF_TWO has some poor side effects when used
with [u]int64_t such as doing the shifting in 32bits.

Change-Id: Ic85a19765cd316fb43657cb21c86f35ceb772773
2015-05-27 17:54:22 -07:00
Johann
1e4473b216 Check size restrictions before running test vector
Change-Id: I60ea7724e6ab06fc658f678c1b76d984a43f5a5e
2015-05-27 15:55:17 -07:00
hkuang
131cab7c27 Add error handling when running out of free frame buffers.
Change-Id: If28b59b9521204a6e3aecedcf75932d76a752567
2015-05-27 14:20:58 -07:00
Johann
ed93470a69 Merge "Only use one 'END' per file" 2015-05-27 20:36:31 +00:00
Minghai Shang
cbdfdb947c Merge "[decoder] Optimize context buffer re-allocation" 2015-05-27 20:24:30 +00:00
Marco
a49fff632c Non-rd variance partition: Adjust thresholds for 1080p.
Increase the 32x32 split threshold, to allow for more 32x32
at expense of 16x16. Visually looks somewhat better.

Change-Id: Ia1439c3a0dc2d7933468b88bd59266fcd9f03505
2015-05-27 12:30:35 -07:00
Johann
7e272e8c43 Merge "Correct case in Get4x4SSEFunc" 2015-05-27 19:26:47 +00:00
Marco
109a2edf90 Merge "Refactor set_vbp_thresholds." 2015-05-27 19:10:28 +00:00
Johann
bbefdce7eb Only use one 'END' per file
On visual studio builds the 'END' directive aggressively signals the end
of file.

Change-Id: I28714da32762ef5abcbaeb5a109fb02b80dd13ec
2015-05-27 12:01:32 -07:00
Johann
c5a7c89e89 Correct case in Get4x4SSEFunc
Change-Id: Ie8a7508798fa8e65c579a77cedb8305cee4ddc81
2015-05-27 11:38:43 -07:00
Minghai Shang
30181c46d8 Merge "[svc] Make size of empty frame to be 16x16 all the time" 2015-05-27 17:49:00 +00:00
Marco
f76d42a98a Refactor set_vbp_thresholds.
Break out the setting of the block variance split thresholds,
since they are locally modified, e.g., based on local/segment qp.

No change in performance.

Change-Id: I0a3238e6dab05140657539fc4bd27ac5ff7a554e
2015-05-27 09:25:18 -07:00
Minghai Shang
15353216c5 [svc] Make size of empty frame to be 16x16 all the time
Change-Id: Ibab09aa0e8c69cf5efea2f0ec035e5da9cc894b0
2015-05-26 16:04:36 -07:00
Johann
dee70d355f Merge "Move variance functions to vpx_dsp" 2015-05-26 23:02:11 +00:00
Johann
c3bdffb0a5 Move variance functions to vpx_dsp
subpel functions will be moved in another patch.

Change-Id: Idb2e049bad0b9b32ac42cc7731cd6903de2826ce
2015-05-26 12:01:52 -07:00
Minghai Shang
9ae5fb706e Merge "[svc] Turn on frame_parallel_decoding_mode" 2015-05-26 17:50:45 +00:00
Jingning Han
55ef1ae9e7 Merge "Fix integral projection motion search for frame resize" 2015-05-26 16:08:31 +00:00
James Zern
02fda6582c Merge changes Ie15e301e,Ib070c79b
* changes:
  vp9_reconintra_neon: cosmetics: reindent
  vp9_reconintra_neon: cosmetics: drop unneeded returns
2015-05-23 17:47:52 +00:00
James Zern
4e11f3ca6e vp9_reconintra_neon: cosmetics: reindent
Change-Id: Ie15e301e8f55cf928f42a03e53a8bb8b66d0e5d5
2015-05-22 21:04:30 -07:00
James Zern
ff683ab1da vp9_reconintra_neon: cosmetics: drop unneeded returns
Change-Id: Ib070c79bdbb9c1f4e25af693d7056ec9f964c789
2015-05-22 20:59:36 -07:00
James Zern
8c15ced172 vp9: move ssse3 convolve fns to intrinsics file
+ synchronize filter function signatures

this makes any intrinsics filters available for inlining and has the
side-effect of making those filters static, quieting missing-prototype
warnings.

Change-Id: I1908875caffa585bd4fc65aaf10d17a5e20cfb46
2015-05-22 20:14:16 -07:00
James Zern
2161e44025 vp9: move avx2 convolve fns to intrinsics file
+ synchronize filter function signatures

this makes any intrinsics filters available for inlining and has the
side-effect of making those filters static, quieting missing-prototype
warnings.

Change-Id: I1cd55c9d52547793ad65aa90c7620f0e426edaa2
2015-05-22 20:13:06 -07:00
James Zern
ef2b3cce50 add vp9/common/x86/convolve.h
collect the vp9_convolve function definition macros there; this will
allow some relocation of functions from vp9_asm_stubs.c

Change-Id: Idadd117fa256dd48748379856973fd985b8204e8
2015-05-22 20:12:16 -07:00
Johann
9e420c01da Merge "Remove unused "alt-tree-layout"" 2015-05-22 23:52:48 +00:00
Jingning Han
96dba4902c Fix integral projection motion search for frame resize
This commit fixes the integral projection motion search crash when
frame resize is used. It fixes issue 994.

Change-Id: Ieeb52619121d7444f7d6b3d0cf09415f990d1506
2015-05-22 15:40:45 -07:00
James Zern
48d8291df4 vp9_subpixel_8t_intrin_ssse3: quiet vs9 warning
reorder includes to avoid:
warning C4985: 'ceil': attributes not present on previous declaration.

this is the same workaround used in vp9/common/vp9_systemdependent.h

Change-Id: Ia10dd63de24f96fa1507a6179220e9d6ec774db6
2015-05-22 12:05:02 -07:00
Johann
7875e1d8ad Remove unused "alt-tree-layout"
Change-Id: Idddefb2fd3e4441421f61bd246479c627020a652
2015-05-22 11:35:37 -07:00
Scott LaVarnway
b962646fc5 Re-worked header files
Various header/test files had to be re-worked in order to
build "Remove cm parameter from vp9_decode_block_tokens()".

This patch reverts the "Remove cm" part and only contains
the re-worked header files.

Change-Id: I520958a88d1991fee988a3c784d0eac40e117a32
2015-05-22 11:19:51 -07:00
Marco
7ca17435d5 Fix to visual studio build error.
Change-Id: Ide080141ebc064584574c861fb324fe64cc572cc
2015-05-21 14:08:32 -07:00
paulwilkins
da8c3bf218 Merge "Fix issues with mixed ARF and GF groups." 2015-05-21 12:08:15 +00:00
James Zern
a492bcef87 vp9_mvref_common.c: fix compile warning
string literal to int within an assert

Change-Id: Ifd7acc717e01ee1bb3955ef830ec0d1645942459
2015-05-20 16:45:16 -07:00
Minghai Shang
9843e7c635 [svc] Disable tiles for spatial svc case
Change-Id: I8655a6760ab61947c09f337ddd9f4c1baf803a56
2015-05-20 14:31:49 -07:00
Marco
6f41e29064 vp8_drop_encodedframe_overshoot: fix to return setting.
Make sure force_maxqp is set to 0 for return 0.

Change-Id: Ie7c57842637226e932a390e7080e5ebb99996da3
2015-05-20 12:26:08 -07:00
Minghai Shang
48bfee8797 [decoder] Optimize context buffer re-allocation
1. Check existing buffer sizes when re-allocate context buffers.
2. Don't need to set mi buffers to 0 during setup_mi.

Change-Id: I6b48b0e077a4d804312b605ad0dc34aec5795a6d
2015-05-20 11:05:22 -07:00
Minghai Shang
e2c6a633fb [svc] Turn on frame_parallel_decoding_mode
Change-Id: I33b0384ee87f83950e03be6c999bc5f193055fd3
2015-05-20 10:56:48 -07:00
paulwilkins
883fdd45cf Fast feedback of bits on undershoot.
This patch provides a partial rapid feedback of bits
resulting from extreme undershoot.

Some  improvement on some problem animated material
but in its current form only a small impact on the metrics results
of our standard test sets.

Change-Id: Ie03036ea8123bc2553437cb8c8c9e7a9fc5dac5d
2015-05-20 16:47:34 +01:00
paulwilkins
ade9693a30 Fix issues with mixed ARF and GF groups.
This patch addresses two issues that can occur when the
encoder chooses to use a mixture of ARF and GF groups.

The first issue relates to a failure to reset the "ARF active" flag
correctly when transitioning from coding ARF groups to coding
GF groups. This caused some golden frames to be  encoded
with an incorrect bit rate target as if they were ARF overlay frames.

The second issue relates to the encoding of a single short GF group
just before a key frame.  Where the last group before a key frame
is an ARF group we expect the final frame before the key frame  to
be an low data rate overlay frame. However, when the last  group
is a GF group, the final frame before the key frame should be a normal
frame with a normal bit allocation. This issue had the potential to cause
a single poorly coded frame just before a key frame. If that key frame
were a forced key frame rather than a real scene cut, this might cause
pulsing.

Change-Id: Idf1eb5eaf63a231495a74de7899236e1ead9fb00
2015-05-20 16:46:44 +01:00
Marco
976f7f42c1 VP8: For high overshoot, force drop frame and max-out QP.
This allows rate control to react to content of current frame being encoded.
Enabling this feature via the setting: screen_content_mode = 2.

Change-Id: Ib2c6670551d96f4907495d5b7b76bb8c49e673db
2015-05-19 08:19:21 -07:00
James Zern
37d03809bf Merge "rename vp9_dct_impl_sse2.c to vp9_dct_sse2_impl.h" 2015-05-19 00:01:57 +00:00
James Zern
1bcf7a35e9 Merge "rename vp9_dct32x32_sse2.c to vp9_dct32x32_sse2_impl.h" 2015-05-19 00:01:22 +00:00
James Zern
0f854e9ab5 Merge "rename vp9_dct32x32_avx2.c to vp9_dct32x32_avx2_impl.h" 2015-05-19 00:01:07 +00:00
James Zern
3373f0e93b md5_helper.h: fix type conversion warning
add a cast in Add() from size_t -> uint32; quiets a visual studio build
warning

Change-Id: I0d87a3e460faf1fe2d4fb44df5f4042ef7500190
2015-05-17 11:21:22 -07:00
James Zern
8089f1cf13 test_intra_pred_speed: reuse test_libvpx's main()
this allows test_libvpx's simd caps check to be used; it also fixes a
link error on OS X with -fcommon.

Change-Id: I1a62a3e74ba06b8f3b37a22fcfdebf90c04ab289
2015-05-16 17:16:27 -07:00
James Zern
129a25c76d test_libvpx: add <arch>.* to negative filter
in addition to <arch>/*. this will pick up tests defined with TEST()
instead of INSTANTIATE_TEST_CASE_P()

Change-Id: I0917741baac89d9ce857f4d4aa53790e8a0c6c12
2015-05-16 11:52:59 -07:00
James Zern
85076fc5ab dec_build_inter_predictors: don't return a void fn
split call of extend_and_predict() and return, fixes visual studio build
warning since:
0a80164 Move mc_buf to cut down size of MACROBLOCKD.

Change-Id: I7cdf712941ef773a07f038539cb8080dc27861cd
2015-05-16 10:33:57 -07:00
James Zern
a989c66b84 rename vp9_dct_impl_sse2.c to vp9_dct_sse2_impl.h
this file shouldn't be built directly, it is included in vp9_dct_sse2.c
to create a non-high-bitdepth and a high-bitdepth version

silences missing prototype warnings for the unused FDCT* functions

Change-Id: Ide6ff8c24ab31bdb0f833260505ae33660a1ad5b
2015-05-15 17:01:19 -07:00
James Zern
587a71f1d6 rename vp9_dct32x32_sse2.c to vp9_dct32x32_sse2_impl.h
this file shouldn't be built directly, it is included in vp9_dct_sse2.c
to create a non-high-bitdepth and a high-bitdepth version

silences missing prototype warnings for the unused FDCT32x32* functions

Change-Id: I0e38f16dae5ea1728de184ee2c89287d48675c51
2015-05-15 16:59:52 -07:00
James Zern
4ec47249bc rename vp9_dct32x32_avx2.c to vp9_dct32x32_avx2_impl.h
this file shouldn't be built directly, it is included in vp9_dct_avx2.c
to create a non-high-bitdepth and a high-bitdepth version

silences missing prototype warnings for the unused FDCT32x32* functions

Change-Id: I4c19935c0e035b393be513bde735e9a78064a494
2015-05-15 16:47:51 -07:00
James Zern
cdf16c22ba Merge "tests: add test_intra_pred_speed" 2015-05-15 23:34:06 +00:00
James Zern
985f19bc6b Merge changes from topic 'missing-proto'
* changes:
  vp9_subexp.h: add a missing prototype
  vp9: add some missing includes
  vp9 intrinsics: add vp9_rtcd include
  vp9: correct some function signatures
  vp9_variance_sse2: sync function signatures
  vp9/encoder: make some functions static
  vp9_dct_sse2: make some functions static
  vp9_decodeframe.c: make a function static
2015-05-15 23:08:15 +00:00
Marco
beec69cfe2 Merge "Change tx_size_search_method setting for non-rd speed 5." 2015-05-15 21:29:01 +00:00
Marco
e88de49faa Change tx_size_search_method setting for non-rd speed 5.
Use the same settting as in speed >=6.
This will use same logic for tx_size selecton as in speed >=6,
which limits the transform size and reduces ringing artifact.
Also metrics go up on average with this change: ~2% for PSNR, ~10% for SSIM.

Change-Id: Ia2d50db236ae1cc72f742bfa6c9ec5ea50ff0e0a
2015-05-15 11:12:47 -07:00
James Zern
ca5a54113f vp9_subexp.h: add a missing prototype
+ include the .h in the .c
silences missing prototype warnings

Change-Id: Ia87366dccb4bf4e9f2ffa5d3ab51ac6ca5488c91
2015-05-15 10:43:48 -07:00
James Zern
97db651ce0 vp9: add some missing includes
mostly: <file>.c should include <file>.h
silences missing prototype warnings

Change-Id: Ic05ec32c6f7b2224b78825904d96d73aacad6000
2015-05-15 10:43:47 -07:00
James Zern
330fba41e2 vp9 intrinsics: add vp9_rtcd include
silences a missing declaration warning

Change-Id: I59a34e1a1377cf3529b678d7ec0122bd43ab1bf1
2015-05-15 10:43:47 -07:00
James Zern
18b60af27c vp9: correct some function signatures
silences missing prototype warnings

Change-Id: Idaf68d83d2cb03847f3ee002c4d00c2ac79da604
2015-05-15 10:43:47 -07:00
James Zern
43d5cc7fe1 vp9_variance_sse2: sync function signatures
+ include vp9_rtcd.h
silences missing prototype warnings

Change-Id: I77902f07a454029baad4fe5fe6fc37c65644e6f7
2015-05-15 10:43:47 -07:00
James Zern
700b7fd0a9 vp9/encoder: make some functions static
silences missing prototype warnings

Change-Id: I3338fcaa67b5dcdf6bf237e8b374db3befd18753
2015-05-15 10:43:47 -07:00
James Zern
8515e62e6b vp9_dct_sse2: make some functions static
silences missing prototype warnings

Change-Id: I773b6a6b5bd7c57db18c3b17c519534f80e131de
2015-05-15 10:43:47 -07:00
James Zern
f3bf5f2029 vp9_decodeframe.c: make a function static
silences a missing declaration warning

Change-Id: I2f49ebca9ba7a47f3c48f5fe919b90cd4114a9bc
2015-05-15 10:43:47 -07:00
Adrian Grange
8371c897dc Merge "Fix illegal memory access when stream starts w/ invisible frame." 2015-05-15 17:36:41 +00:00
James Zern
8dfeece878 Merge changes from topic 'missing-proto'
* changes:
  vp8/rdopt.h+onyx_int.h: add some missing prototypes
  vp8: add some missing includes
  vp8: make some functions static
  vp8/common/variance*: add vp8_rtcd include
  vp8_copy32xn: sync function signature
2015-05-15 17:19:50 +00:00
James Zern
0e5d0f7485 Merge changes from topic 'missing-proto'
* changes:
  sad*_avx2.c: sync function signatures
  vpx_scale: add missing rtcd + vpx_scale includes
2015-05-15 17:17:13 +00:00
James Zern
1fb9a8eed2 Merge changes from topic 'missing-proto'
* changes:
  fdct8x8_test: move functions to an anonymous namespace
  md5_utils.c: make a function static
  vpx_config.c: add vpx_codec include
2015-05-15 17:16:52 +00:00
Frank Galligan
d610ead258 Merge "Move mc_buf to cut down size of MACROBLOCKD." 2015-05-15 15:20:39 +00:00
paulwilkins
4f569e8485 Merge "Revert "Skip the last frame update for some frame repeats."" 2015-05-15 09:17:19 +00:00
James Zern
60b9f685cb vp8/rdopt.h+onyx_int.h: add some missing prototypes
silences missing prototype warnings

Change-Id: Icd477e37b502205d0a60e7389e51b1ba17d8888e
2015-05-14 22:41:26 -07:00
James Zern
62ad8baa40 vp8: add some missing includes
silences missing prototype warnings

Change-Id: Ib62e4743532b871e63bc99732875fff20501b8ac
2015-05-14 22:41:25 -07:00
James Zern
632177fa7f vp8: make some functions static
silences missing prototype warnings

Change-Id: I9f24a3214c832c982ca0dc5a032316eba48472ff
2015-05-14 22:41:25 -07:00
James Zern
f80bbc0efb vp8/common/variance*: add vp8_rtcd include
silences missing prototype warnings

Change-Id: I5ca198b56a5ff0cf5b93c89957526f243c04e9c8
2015-05-14 22:41:25 -07:00
James Zern
6eb1016301 vp8_copy32xn: sync function signature
+ include vp8_rtcd.h in copy_c.c
silences missing prototype warnings

Change-Id: Iecc279c695b08a26b231dedb41e3b84c551703f3
2015-05-14 22:41:13 -07:00
James Zern
4be50c5289 sad*_avx2.c: sync function signatures
+ include vpx_dsp_rtcd.h
silences missing prototype warnings

Change-Id: Ifa1780bcf72b1fa2b153025d0d78d91ad38774c3
2015-05-14 20:58:56 -07:00
James Zern
ad8bae3c2d vpx_scale: add missing rtcd + vpx_scale includes
silences missing prototype warnings

Change-Id: I33320f66c789b8c5c51d69f9dc0e017f9e06b0d0
2015-05-14 20:58:44 -07:00
James Zern
c47d868d99 fdct8x8_test: move functions to an anonymous namespace
silences missing declaration warnings

Change-Id: I9486c13c973df3d25fcdc2c3de9a0da783ce8fd6
2015-05-14 20:51:17 -07:00
James Zern
26453e7b67 md5_utils.c: make a function static
silences a missing prototype warning

Change-Id: I982c9c70e0e9f94a5201559300cadb8bd22441f9
2015-05-14 20:51:14 -07:00
James Zern
db4afa6bd2 vpx_config.c: add vpx_codec include
silences a missing prototype warning

Change-Id: I31a11c8390ae58e9a99ecc7cdf2a36334fac3f65
2015-05-14 20:51:08 -07:00
Frank Galligan
0a80164c94 Move mc_buf to cut down size of MACROBLOCKD.
Change-Id: Icea64b9e5632b41aaa7cd7018c501d6add9b7a7f
2015-05-14 19:10:02 -07:00
James Zern
0d2f348392 tests: add test_intra_pred_speed
useful for speed testing / verifying individual function optimizations;
currently tests non-high-bitdepth VP9 intra predictors

Change-Id: Ibd247765e43a31894697d43f1d39d312e0ba2090
2015-05-14 16:20:21 -07:00
Adrian Grange
f480c1256d Fix illegal memory access when stream starts w/ invisible frame.
Add a check to make sure we have a decoded frame available
before copying its 'corrupt' flag.

(Originally submitted to the old repository by Alexander Voronov
as: https://gerrit.chromium.org/gerrit/#/c/74305/).

Change-Id: Iceb4686c785afb437b668015bf8818b18d60e0ce
2015-05-14 15:49:10 -07:00
Tom Finegan
0de534b725 Merge "Remove BUILD_LIBVPX_SO variable from the build system." 2015-05-14 20:47:43 +00:00
Tom Finegan
f9a5b96fd2 Merge "Remove the BUILD_LIBVPX variable from the build system." 2015-05-14 19:08:06 +00:00
Tom Finegan
556ff27e2e Remove BUILD_LIBVPX_SO variable from the build system.
Was just an additional layer of indirection for CONFIG_SHARED, so
use CONFIG_SHARED directly.

Change-Id: I7c022b9f926a63c2c5884def5f03dfd648329afc
2015-05-14 11:20:34 -07:00
Tom Finegan
d104b16b56 Remove the BUILD_LIBVPX variable from the build system.
Replace it with it's value (yes), which is constant.

Change-Id: Ifb0c0408a53ecadf81156da6a64fa583b2ada746
2015-05-14 10:46:44 -07:00
paulwilkins
eb8faf1c89 Revert "Skip the last frame update for some frame repeats."
Testing on another rate control patch reveals that in some
situations, where the encoder is flipping in and out of arf
mode, we get an encoder decoder mismatch.

Whilst it is still not clear why, skipping  the last buffer
update seems to trigger the problem. Until I can establish
why, or if there is another underlying cause, I am reverting
this change.

This reverts commit e5112b3ae3.

Change-Id: I315c5200414de89458015823344b7367e9dd75ba
2015-05-14 17:21:44 +01:00
Johann
cafae5b544 Merge "Relocate memory operations for common code" 2015-05-13 19:47:24 +00:00
Johann
1d7ccd5325 Relocate memory operations for common code
With the sad functions, and hopefully the variance functions soon,
moving to the vpx_dsp location, place the defines used in the
reference C code in a common location.

Change-Id: I4c8ce7778eb38a0a3ee674d2f1c488eda01cfeca
2015-05-13 11:41:15 -07:00
Tom Finegan
3007db0b45 Avoid failed include when Makefile is processed with no target.
Basically just a warning, but disconcerting nonetheless. Removes this
output from the build:
Makefile:59: -x86_64-darwin13-gcc.mk: No such file or directory

Change-Id: Ibb379506352b2f613ef4a7b1ac47e9c95d0d1580
2015-05-13 11:16:09 -07:00
Tom Finegan
ae14b37431 Merge "Remove claims of universal target support from the build system." 2015-05-13 17:05:46 +00:00
Parag Salasakar
686616a989 Merge "mips msa vp9 idct 8x8 optimization" 2015-05-13 04:36:34 +00:00
Tom Finegan
3d7063d4b0 Remove claims of universal target support from the build system.
These targets no longer build (PPC support was removed from
libvpx). Remove the dead code and misleading help output.

BUG=https://code.google.com/p/webm/issues/detail?id=997

Change-Id: Ib35614806adeae970f3821da0d8dbcc54ab8d868
2015-05-12 19:16:49 -07:00
Yunqing Wang
f72af26305 Merge "Remove unneeded variable declaration" 2015-05-12 23:33:31 +00:00
Yaowu Xu
a8015e217e Merge "Protect new metric computation with use_highbitdepth flag" 2015-05-12 23:20:35 +00:00
Yaowu Xu
3f42d10805 Protect new metric computation with use_highbitdepth flag
The computation of new metrics is not supported yet in highbitdepth
mode. This commit adds protection to make sure the computation is
done only when highbitdepth is not on. This protection shall be
revised when support of highbitdpeth computation is added.

This resolves the encoder crash when configured with both
--enable-internal-stats
--enable-vp9-highbitdepth

Change-Id: Id9f4bcc4fa26d9ca0e9eabade83f3f88a5b212e6
2015-05-12 15:12:05 -07:00
Yunqing Wang
8ba2d2d5a0 Remove unneeded variable declaration
This patch fixed the following warning:
src\third_party\libvpx\source\libvpx\vp9\encoder\vp9_pickmode.c(1607) :
warning C6246: Local declaration of 'this_mode' hides declaration of the
same name in outer scope.

Change-Id: I1d93c4a47a13cb13089fec5bd61e8b58e6cd8d58
2015-05-12 15:01:40 -07:00
James Zern
5c02f88a81 Merge "libs.mk: simplify test_libvpx target" 2015-05-12 21:10:43 +00:00
James Zern
f237849862 Merge changes from topic 'missing-proto'
* changes:
  vpxenc: make some functions static
  vpxdec: make some functions static
  tools_common.h: fix get_vpx_decoder_count() proto
  tools_common.h: fix get_vpx_encoder_count() proto
  tools_common.h: fix usage_exit() prototype
2015-05-12 19:02:05 +00:00
Johann
73de6ad522 Merge "Remove reference to compatibility layer" 2015-05-12 18:44:24 +00:00
Adrian Grange
17fc3e94c9 Merge "Recompute tile params on frame resize" 2015-05-12 17:48:16 +00:00
Johann
4aaf4661c3 Remove reference to compatibility layer
The compatibility layer was removed before the 1.4.0 release.

Change-Id: I268513ee9b3a2640ec33c4a25a5c5614fee7b3b2
2015-05-12 10:40:13 -07:00
James Zern
5c337fd08a vpxenc: make some functions static
silences missing-prototype warnings

Change-Id: I641a61b21f9de135e59e3769a2063d5a94f34746
2015-05-11 20:55:03 -07:00
James Zern
6a2e0f029a vpxdec: make some functions static
silences missing-prototype warnings

Change-Id: I080d2a24eeb6faa8c160683cc1fbf30d3b0b49e5
2015-05-11 20:55:02 -07:00
James Zern
d1999cb234 tools_common.h: fix get_vpx_decoder_count() proto
silences a missing-prototype warning

Change-Id: I99f8b4a9b7df836208e93170159733c5b5c2d881
2015-05-11 20:55:02 -07:00
James Zern
5a73bbdb83 tools_common.h: fix get_vpx_encoder_count() proto
silences a missing-prototype warning

Change-Id: Icf5c7f1f3e8ae9792276068fb3c0fd04b40fc7ad
2015-05-11 20:55:01 -07:00
James Zern
59e7a47c41 tools_common.h: fix usage_exit() prototype
+ the definitions in the examples

silences a missing-prototype warning

Change-Id: I7c064cacd6d2073a6107de6c3e2cc46ef8a68411
2015-05-11 20:54:55 -07:00
James Zern
b0789cd299 libs.mk: simplify test_libvpx target
rename LIBVPX_TEST_BINS to LIBVPX_TEST_BIN and remove foreach usage.
this was a leftover from having multiple targets with their own (single)
object list; the use of LIBVPX_TEST_OBJS so widely makes extending these
loops difficult.

Change-Id: I61bda1b91acb43145609f04b8fe6e45ec4483e22
2015-05-11 19:13:38 -07:00
James Zern
465ce0e420 Merge "build_intra_predictors*: reduce above_data size" 2015-05-11 23:48:58 +00:00
Adrian Grange
5f0dc57653 Merge "Reduce border extension by 1 line" 2015-05-11 22:47:49 +00:00
Adrian Grange
65b768fdf9 Recompute tile params on frame resize
When the frame size changes we must recompute details
of the tile dimensions.

Change-Id: Ie519bd6da47b5cd43933c0bcfc0f2429bcb01986
2015-05-11 15:45:26 -07:00
Jingning Han
2dcfd16fbd Merge "Sort variables dependency in read_uncompressed_header" 2015-05-11 19:11:56 +00:00
Jingning Han
2b2b461d39 Sort variables dependency in read_uncompressed_header
Remove a few repeated data structure reads from
read_uncompressed_header.

Change-Id: I6eb741b39f9415ad0aa4631dfbf4a1ace4eba56a
2015-05-11 10:07:55 -07:00
Marco
913862be8c Fix rate control issue with layers and aq-mode=3.
When aq-mode=3 is enabled, only for base layer frames should the
qp of the frame incorporate the segment delta-qp.

This was causing more rate mismatch for the enhancement layer frames
when running temporal layers with aq-mode=3 on.

Change-Id: I1c5e69d1ef8a51188af8696753c17fd8f67699b3
2015-05-11 10:04:18 -07:00
paulwilkins
aa5c1b4c5d Merge "Skip the last frame update for some frame repeats." 2015-05-11 12:28:45 +00:00
James Zern
a5e4ca8390 build_intra_predictors*: reduce above_data size
currently this needs to be 2x (NEED_ABOVERIGHT) the size of the largest
block (32) + 1 (for above_left). reduce the buffer size from 128 + 16
(alignment) to 64 + 16.

Change-Id: Idaca1806c7e1214e9437de24e15edc2ebf18f95d
2015-05-08 20:17:20 -07:00
James Zern
6d22713722 Merge "build_intra_predictors*: reduce left_col size" 2015-05-09 00:53:55 +00:00
hkuang
d53fb0fda5 Fix clang ioc warning due to NULL mi pointer.
The warning only happens in VP9 encoder's first pass due to src_mi
is not set up yet. But it will not fail the encoder as left_mi and
above_mi are not used in the first_pass and they will be set up again
in the second pass.

Change-Id: I0713b4660d71e229e196654cb0970ba6b1574f28
2015-05-08 15:42:50 -07:00
Johann Koenig
f003f77b8c Merge "Fix ndk build." 2015-05-08 18:56:21 +00:00
hkuang
f5574fb44c Merge "Add more sse2 code for intra prediction." 2015-05-08 17:26:30 +00:00
paulwilkins
e5112b3ae3 Skip the last frame update for some frame repeats.
Where a frame appears to be a repeat of an earlier
frame or frame buffer,  but the first pass code
does not anticipate this (usually because it is matching
the GF or ARF buffer not the last frame buffer), do not
update the last frame buffer.

This helps ensure that the content of the last frame buffer
is kept "different" where possible, and not updated to
match the GF or ARF. This is particularly helpful in some
animated sequences where there are groups of repeating
frames. Here it has quite a big impact. However, in most
of our standard test clips it has little or no impact.

Change-Id: I77332ee1a69f9ffc0c6080bfeb811c43fd8828e6
2015-05-08 17:51:26 +01:00
Parag Salasakar
7c5f00f868 mips msa vp9 idct 8x8 optimization
average improvement ~4x-6x

Change-Id: I5edf713721b9e24c7e0ce2e69d8fc3ecab625d91
2015-05-08 12:23:27 +05:30
Parag Salasakar
a8a9c2bb45 Merge "mips msa vp9 idct 32x32 optimization" 2015-05-08 04:27:44 +00:00
Frank Galligan
a4bcc8c318 Fix ndk build.
Android.mk should depend on vpx_dsp_rtcd.h

Change-Id: Ib614af195a9574fd849c28b695f9f4b3b2bd125c
2015-05-07 18:22:29 -07:00
Johann
11a4a3c065 Merge "Remove only remaining uses of 'fast_unaligned'" 2015-05-07 23:32:18 +00:00
James Zern
7e55ff1593 build_intra_predictors*: reduce left_col size
this should only need to be the size of the largest block, i.e., 32, not
64.

Change-Id: Ib8cb2424771fdd2a64c55379597248b2722a5ceb
2015-05-07 16:16:42 -07:00
Johann
802e1d84cc Remove only remaining uses of 'fast_unaligned'
Use memcpy instead of casting.

Change-Id: Ieca725cc628883985bde23c7d742af8781c5dbb5
2015-05-07 14:39:37 -07:00
James Zern
fd3658b0e4 replace DECLARE_ALIGNED_ARRAY w/DECLARE_ALIGNED
this macro was used inconsistently and only differs in behavior from
DECLARE_ALIGNED when an alignment attribute is unavailable. this macro
is used with calls to assembly, while generic c-code doesn't rely on it,
so in a c-only build without an alignment attribute the code will
function as expected.

Change-Id: Ie9d06d4028c0de17c63b3a27e6c1b0491cc4ea79
2015-05-07 11:55:08 -07:00
Johann
76a08210b6 Merge "Move shared SAD code to vpx_dsp" 2015-05-07 18:33:06 +00:00
hkuang
086934136b Merge "Remove an unnecessary check." 2015-05-07 15:51:11 +00:00
Marco
97307af21a Merge "Remvoe EIGHTTAP_SHARP filter check for non-rd mode." 2015-05-07 15:40:11 +00:00
paulwilkins
aecb1770d5 Merge "Image size restriction to rd auto partition search." 2015-05-07 14:12:14 +00:00
Parag Salasakar
1601c1385a mips msa vp9 idct 32x32 optimization
average improvement ~4x-6x

Change-Id: Idaba7e49fbd7f388caee0d73773ccf6e4807ef17
2015-05-07 12:42:23 +05:30
hkuang
7153b822ed Add more sse2 code for intra prediction.
vp9_dc_left_predictor_16x16
vp9_dc_top_predictor_32x32
vp9_dc_left_predictor_32x32
vp9_dc_128_predictor_32x32

Change-Id: Ib9861deefd01c3527235b92ff6b3d571ef6b4bc6
2015-05-06 17:17:00 -07:00
Marco
76fe5dfc67 Remvoe EIGHTTAP_SHARP filter check for non-rd mode.
Using EIGHTTAP and EIGHTTAP_SMOOTH seem sufficient.
Hard to see any visual gain from allowing EIGHTTAP_SHARP, and it is
rarely selected.

PSNR/SSIM metrics go up by ~0.18/0.14%.

Change-Id: I96fa0d98f9321b913e3ebcd464d4ff3c63018791
2015-05-06 17:08:34 -07:00
Johann
d5d9289800 Move shared SAD code to vpx_dsp
Create a new component, vpx_dsp, for code that can be shared
between codecs. Move the SAD code into the component.

This reduces the size of vpxenc/dec by 36k on x86_64 builds.

Change-Id: I73f837ddaecac6b350bf757af0cfe19c4ab9327a
2015-05-06 16:58:20 -07:00
Adrian Grange
0af5ff49bd Reduce border extension by 1 line
The code was using one row too many.

Change-Id: Ie0c05d02c22ae3d0d13d3b6565c40de3bc4fa17a
2015-05-06 14:44:24 -07:00
hkuang
240767b29d Remove an unnecessary check.
Change-Id: Id0f224ac4667dd173363b0f05711678448291d4e
2015-05-06 14:15:00 -07:00
hkuang
92b199061a Correct the inter prediction coordinate calculation which greatly reduced the
times of border extension.

Change-Id: I8e5bd590cc696ee71cfe1f4cc66c12fb24aaf44e
2015-05-06 10:55:56 -07:00
hkuang
623e6eed5e Merge "Optimize the read_partition." 2015-05-06 17:29:52 +00:00
Yunqing Wang
7dbdada49f Merge "Add intra mode early termination in non-rd mode" 2015-05-06 17:12:11 +00:00
Parag Salasakar
d1cdda88bd Merge "mips msa vp9 idct 16x16 optimization" 2015-05-06 06:40:56 +00:00
Yunqing Wang
36eabb1c3c Add intra mode early termination in non-rd mode
Added the intra mode early termination in order to
speed up the mode search in non-rd case since we
started to include more intra modes in the search
list. Borg tests(rtc set) showed a 0.048% PSNR gain
and 0.061 SSIM gain. No speed change.

Change-Id: I6f255fe534dc50b736e6a66a726ad458eb9b4443
2015-05-05 16:31:36 -07:00
hkuang
4c1a8be29d Optimize the read_partition.
Change-Id: I5a796425ce5706824a2fc17c6f24f983c5b9e43b
2015-05-05 15:51:04 -07:00
James Zern
ccae5d99d2 fix and enable vp9_dc_128_predictor_16x16
widen the loads and stores to 128-bit.

this was added, but not enabled in:
493a857 Add some sse2 code for intra prediction.

Change-Id: I277d7db608a7db7d75cc0bde86f48fa66ad487e4
2015-05-05 11:40:13 -07:00
hkuang
e47811ef8f Merge "Add some sse2 code for intra prediction." 2015-05-05 17:11:07 +00:00
paulwilkins
af76953448 Merge "Remove CONSTRAIN_NEIGHBORING_MIN_MAX." 2015-05-05 09:32:11 +00:00
paulwilkins
4cd65e4f19 Merge "Adjust ARF min and max interval." 2015-05-05 09:31:38 +00:00
Parag Salasakar
60052b618f mips msa vp9 idct 16x16 optimization
average improvement ~4x-6x

Change-Id: I55e95b7f2ba403dff11813958dc7c73a900dd022
2015-05-05 12:37:06 +05:30
Marco
b9a72d3c4d Allow for H and V intra modes for non-rd mode.
For non-rd mode (speed >=5): use mask based on prediction block size, and
(for non-screen content mode) allow for checking horiz and vert intra modes
for blocks sizes < 16x16.

Avg psnr/ssim metrics go up by about ~0.2%.

Only allowing H/V intra on block sizes below 16x16 for now, to keep
encoding time increase very small, and also when allowing H/V on 16x16 blocks,
metrics went down on a few clips which need to be further examined.

Change-Id: I8ae0bc8cb2a964f9709612c76c5661acaab1381e
2015-05-04 09:48:41 -07:00
James Zern
670b2c09ce vp9_idct_intrin_sse2: cosmetics: reindent
+ fix some whitespace

Change-Id: Id61b739282014288a7e5d3c17a9d6448d9d4cda2
2015-05-01 16:07:54 -07:00
Yunqing Wang
d31256cd38 Merge "Reduce intra_cost_penalty for BLOCK_8X8" 2015-05-01 18:29:38 +00:00
Yunqing Wang
57fefd5f9a Merge "Adjust the vbp early termination threshold slightly" 2015-05-01 18:29:25 +00:00
paulwilkins
4a7dcf8eb2 Image size restriction to rd auto partition search.
Impose a limit on the rd auto partition search based on
the image format. Smaller formats require that the search
includes includes a smaller minimum block size.

This change is intended to mitigate the visual impact of
ringing in some problem clips, for smaller image formats.

Change-Id: Ie039e5f599ee079bbef5d272f3e40e2e27d8f97b
2015-05-01 16:16:02 +01:00
paulwilkins
287b0c6da9 Remove CONSTRAIN_NEIGHBORING_MIN_MAX.
Remove one of the auto partition size cases.
This case can behaves badly in some types of animated content
and was only used for the rd encode path. A subsequent patch
will add additional checks to help further improve visual quality.

Change-Id: I0ebd8da3d45ab8501afa45d7959ced8c2d60ee4e
2015-05-01 15:15:16 +01:00
paulwilkins
e0786c280e Adjust ARF min and max interval.
Previously limit on max interval  set to 0.5 seconds.
Though this helped some low frame rate material it
appears to be a bit too aggressive for some 24 and 25 fps
content. This patch relaxes the limit to 0.75 seconds.

The patch also adds a new minimum interval variable
to replace the current hard wired value. This allows us
to impose a limit on the maximum number of primary
arfs per second for high frame rate (e.g. 50 & 60fps)
content. This is to address concerns regarding playback
performance on some platforms if there is a high base
frame rate and very frequent arfs.

Change-Id: I373e8b6b2a8ef522eced6c6d2cceb234ff763fcf
2015-05-01 15:11:49 +01:00
hkuang
493a8579f1 Add some sse2 code for intra prediction.
Change-Id: I16c0a62e52dab62837c547345df31e7518620ed4
2015-04-30 15:42:57 -07:00
Yunqing Wang
4907c29904 Reduce intra_cost_penalty for BLOCK_8X8
This patch reduced the BLOCK_8X8's intra_cost_penalty, which
allows 8x8 blocks to conduct intra mode search. Borg test
result(rtc set): 0.077% PSNR gain, 0.228% SSIM gain. No speed
changes.

Change-Id: Icfe90c4f6969de24bda8ecacbd3da50330bf22b2
2015-04-30 11:03:06 -07:00
Yunqing Wang
a257e469e1 Adjust the vbp early termination threshold slightly
Calculated cpi->vbp_threshold_sad from this frame's dequant value.
The encoding quality and speed didn't change much. Borg test
result: PSNR: -0.002%, SSIM: -0.003%.

Change-Id: I97c9826986f39582f29910d637d08a69c90afdee
2015-04-30 08:51:02 -07:00
312 changed files with 27441 additions and 16237 deletions

View File

@@ -1,3 +1,8 @@
xxxx-yy-zz v1.4.0 "Changes for next release"
vpxenc is changed to use VP9 by default.
Encoder controls added for 1 pass SVC.
Decoder control to toggle on/off loopfilter.
2015-04-03 v1.4.0 "Indian Runner Duck"
This release includes significant improvements to the VP9 codec.

7
README
View File

@@ -101,13 +101,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
x86_64-win64-vs10
x86_64-win64-vs11
x86_64-win64-vs12
universal-darwin8-gcc
universal-darwin9-gcc
universal-darwin10-gcc
universal-darwin11-gcc
universal-darwin12-gcc
universal-darwin13-gcc
universal-darwin14-gcc
generic-gnu
The generic-gnu target, in conjunction with the CROSS environment variable,

4
args.c
View File

@@ -14,9 +14,7 @@
#include <limits.h>
#include "args.h"
#ifdef _MSC_VER
#define snprintf _snprintf
#endif
#include "vpx_ports/msvc.h"
#if defined(__GNUC__) && __GNUC__
extern void die(const char *fmt, ...) __attribute__((noreturn));

View File

@@ -163,6 +163,7 @@ ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
endif
# Add a dependency to force generation of the RTCD files.
define rtcd_dep_template
ifeq ($(CONFIG_VP8), yes)
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp8_rtcd.h
endif
@@ -170,10 +171,14 @@ ifeq ($(CONFIG_VP9), yes)
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp9_rtcd.h
endif
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_scale_rtcd.h
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_dsp_rtcd.h
ifeq ($(TARGET_ARCH_ABI),x86)
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_config.asm
endif
endef
$(eval $(call rtcd_dep_template))
.PHONY: clean
clean:

View File

@@ -22,8 +22,10 @@ clean:: .DEFAULT
exampletest: .DEFAULT
install:: .DEFAULT
test:: .DEFAULT
test-no-data-check:: .DEFAULT
testdata:: .DEFAULT
utiltest: .DEFAULT
exampletest-no-data-check utiltest-no-data-check: .DEFAULT
# Note: md5sum is not installed on OS X, but openssl is. Openssl may not be
@@ -56,13 +58,10 @@ dist:
fi
endif
# Since we invoke make recursively for multiple targets we need to include the
# .mk file for the correct target, but only when $(target) is non-empty.
ifneq ($(target),)
# Normally, we want to build the filename from the target and the toolchain.
# This disambiguates from the $(target).mk file that exists in the source tree.
# However, the toolchain is part of the target in universal builds, so we
# don't want to include TOOLCHAIN in that case. FAT_ARCHS is used to test
# if we're in the universal case.
include $(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk
include $(target)-$(TOOLCHAIN).mk
endif
BUILD_ROOT?=.
VPATH=$(SRC_PATH_BARE)
@@ -116,6 +115,9 @@ test::
testdata::
.PHONY: utiltest
utiltest:
.PHONY: test-no-data-check exampletest-no-data-check utiltest-no-data-check
test-no-data-check::
exampletest-no-data-check utiltest-no-data-check:
# Add compiler flags for intrinsic files
ifeq ($(TOOLCHAIN), x86-os2-gcc)
@@ -313,18 +315,15 @@ $(1):
$$(filter %.o,$$^) $$(extralibs)
endef
define lipo_lib_template
$(1): $(addsuffix /$(1),$(FAT_ARCHS))
$(if $(quiet),@echo " [LIPO] $$@")
$(qexec)libtool -static -o $$@ $$?
endef
define lipo_bin_template
$(1): $(addsuffix /$(1),$(FAT_ARCHS))
$(if $(quiet),@echo " [LIPO] $$@")
$(qexec)lipo -output $$@ -create $$?
define dll_template
# Not using a pattern rule here because we don't want to generate empty
# archives when they are listed as a dependency in files not responsible
# for creating them.
$(1):
$(if $(quiet),@echo " [LD] $$@")
$(qexec)$$(LD) -Zdll $$(LDFLAGS) \
-o $$@ \
$$(filter %.o,$$^) $$(extralibs) $$(EXPORTS_FILE)
endef
@@ -385,6 +384,7 @@ LIBS=$(call enabled,LIBS)
$(foreach lib,$(filter %_g.a,$(LIBS)),$(eval $(call archive_template,$(lib))))
$(foreach lib,$(filter %so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
$(foreach lib,$(filter %$(SO_VERSION_MAJOR).dylib,$(LIBS)),$(eval $(call dl_template,$(lib))))
$(foreach lib,$(filter %$(SO_VERSION_MAJOR).dll,$(LIBS)),$(eval $(call dll_template,$(lib))))
INSTALL-LIBS=$(call cond_enabled,CONFIG_INSTALL_LIBS,INSTALL-LIBS)
ifeq ($(MAKECMDGOALS),dist)

View File

@@ -390,7 +390,7 @@ write_common_config_banner() {
write_common_config_targets() {
for t in ${all_targets}; do
if enabled ${t}; then
if enabled universal || enabled child; then
if enabled child; then
fwrite config.mk "ALL_TARGETS += ${t}-${toolchain}"
else
fwrite config.mk "ALL_TARGETS += ${t}"
@@ -647,14 +647,6 @@ process_common_toolchain() {
# detect tgt_os
case "$gcctarget" in
*darwin8*)
tgt_isa=universal
tgt_os=darwin8
;;
*darwin9*)
tgt_isa=universal
tgt_os=darwin9
;;
*darwin10*)
tgt_isa=x86_64
tgt_os=darwin10
@@ -736,6 +728,13 @@ process_common_toolchain() {
# Handle darwin variants. Newer SDKs allow targeting older
# platforms, so use the newest one available.
case ${toolchain} in
arm*-darwin*)
ios_sdk_dir="$(show_darwin_sdk_path iphoneos)"
if [ -d "${ios_sdk_dir}" ]; then
add_cflags "-isysroot ${ios_sdk_dir}"
add_ldflags "-isysroot ${ios_sdk_dir}"
fi
;;
*-darwin*)
osx_sdk_dir="$(show_darwin_sdk_path macosx)"
if [ -d "${osx_sdk_dir}" ]; then
@@ -789,7 +788,6 @@ process_common_toolchain() {
case ${toolchain} in
sparc-solaris-*)
add_extralibs -lposix4
disable_feature fast_unaligned
;;
*-solaris-*)
add_extralibs -lposix4
@@ -812,12 +810,17 @@ process_common_toolchain() {
if disabled neon && enabled neon_asm; then
die "Disabling neon while keeping neon-asm is not supported"
fi
soft_enable media
soft_enable fast_unaligned
case ${toolchain} in
*-darwin*)
# Neon is guaranteed on iOS 6+ devices, while old media extensions
# no longer assemble with iOS 9 SDK
;;
*)
soft_enable media
esac
;;
armv6)
soft_enable media
soft_enable fast_unaligned
;;
esac
@@ -1033,7 +1036,6 @@ EOF
tune_cflags="-mtune="
if enabled dspr2; then
check_add_cflags -mips32r2 -mdspr2
disable_feature fast_unaligned
fi
if enabled runtime_cpu_detect; then
@@ -1060,8 +1062,6 @@ EOF
add_cflags -mmsa
add_asflags -mmsa
add_ldflags -mmsa
disable_feature fast_unaligned
fi
fi
@@ -1221,7 +1221,7 @@ EOF
;;
esac
;;
universal*|*-gcc|generic-gnu)
*-gcc|generic-gnu)
link_with_cc=gcc
enable_feature gcc
setup_gnu_toolchain

View File

@@ -263,8 +263,8 @@ case "$target" in
;;
arm*)
platforms[0]="ARM"
asm_Debug_cmdline="armasm -nologo &quot;%(FullPath)&quot;"
asm_Release_cmdline="armasm -nologo &quot;%(FullPath)&quot;"
asm_Debug_cmdline="armasm -nologo -oldit &quot;%(FullPath)&quot;"
asm_Release_cmdline="armasm -nologo -oldit &quot;%(FullPath)&quot;"
;;
*) die "Unsupported target $target!"
;;

93
configure vendored
View File

@@ -31,8 +31,6 @@ Advanced options:
--size-limit=WxH max size to allow in the decoder
--as={yasm|nasm|auto} use specified assembler [auto, yasm preferred]
--sdk-path=PATH path to root of sdk (android builds only)
${toggle_fast_unaligned} don't use unaligned accesses, even when
supported by hardware [auto]
${toggle_codec_srcs} in/exclude codec library source code
${toggle_debug_libs} in/exclude debug version of libraries
${toggle_static_msvcrt} use static MSVCRT (VS builds only)
@@ -150,13 +148,6 @@ all_platforms="${all_platforms} x86_64-win64-vs9"
all_platforms="${all_platforms} x86_64-win64-vs10"
all_platforms="${all_platforms} x86_64-win64-vs11"
all_platforms="${all_platforms} x86_64-win64-vs12"
all_platforms="${all_platforms} universal-darwin8-gcc"
all_platforms="${all_platforms} universal-darwin9-gcc"
all_platforms="${all_platforms} universal-darwin10-gcc"
all_platforms="${all_platforms} universal-darwin11-gcc"
all_platforms="${all_platforms} universal-darwin12-gcc"
all_platforms="${all_platforms} universal-darwin13-gcc"
all_platforms="${all_platforms} universal-darwin14-gcc"
all_platforms="${all_platforms} generic-gnu"
# all_targets is a list of all targets that can be configured
@@ -193,6 +184,10 @@ if [ ${doxy_major:-0} -ge 1 ]; then
[ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable_feature doxygen
fi
# disable codecs when their source directory does not exist
[ -d "${source_path}/vp8" ] || disable_feature vp8
[ -d "${source_path}/vp9" ] || disable_feature vp9
# install everything except the sources, by default. sources will have
# to be enabled when doing dist builds, since that's no longer a common
# case.
@@ -203,37 +198,21 @@ enable_feature install_libs
enable_feature static
enable_feature optimizations
enable_feature dependency_tracking
enable_feature fast_unaligned #allow unaligned accesses, if supported by hw
enable_feature spatial_resampling
enable_feature multithread
enable_feature os_support
enable_feature temporal_denoising
[ -d "${source_path}/../include" ] && enable_feature alt_tree_layout
for d in vp8 vp9; do
[ -d "${source_path}/${d}" ] && disable_feature alt_tree_layout;
done
if ! enabled alt_tree_layout; then
# development environment
[ -d "${source_path}/vp8" ] && CODECS="${CODECS} vp8_encoder vp8_decoder"
[ -d "${source_path}/vp9" ] && CODECS="${CODECS} vp9_encoder vp9_decoder"
else
# customer environment
[ -f "${source_path}/../include/vpx/vp8cx.h" ] && CODECS="${CODECS} vp8_encoder"
[ -f "${source_path}/../include/vpx/vp8dx.h" ] && CODECS="${CODECS} vp8_decoder"
[ -f "${source_path}/../include/vpx/vp9cx.h" ] && CODECS="${CODECS} vp9_encoder"
[ -f "${source_path}/../include/vpx/vp9dx.h" ] && CODECS="${CODECS} vp9_decoder"
[ -f "${source_path}/../include/vpx/vp8cx.h" ] || disable_feature vp8_encoder
[ -f "${source_path}/../include/vpx/vp8dx.h" ] || disable_feature vp8_decoder
[ -f "${source_path}/../include/vpx/vp9cx.h" ] || disable_feature vp9_encoder
[ -f "${source_path}/../include/vpx/vp9dx.h" ] || disable_feature vp9_decoder
[ -f "${source_path}/../lib/*/*mt.lib" ] && soft_enable static_msvcrt
fi
CODECS="$(echo ${CODECS} | tr ' ' '\n')"
CODEC_FAMILIES="$(for c in ${CODECS}; do echo ${c%_*}; done | sort | uniq)"
CODECS="
vp8_encoder
vp8_decoder
vp9_encoder
vp9_decoder
"
CODEC_FAMILIES="
vp8
vp9
"
ARCH_LIST="
arm
@@ -265,7 +244,6 @@ HAVE_LIST="
${ARCH_EXT_LIST}
vpx_ports
stdint_h
alt_tree_layout
pthread_h
sys_mman_h
unistd_h
@@ -294,7 +272,6 @@ CONFIG_LIST="
codec_srcs
debug_libs
fast_unaligned
dequant_tokens
dc_recon
@@ -356,7 +333,6 @@ CMDLINE_SELECT="
libc
as
size_limit
fast_unaligned
codec_srcs
debug_libs
@@ -441,22 +417,8 @@ post_process_cmdline() {
process_targets() {
enabled child || write_common_config_banner
enabled universal || write_common_target_config_h ${BUILD_PFX}vpx_config.h
# For fat binaries, call configure recursively to configure for each
# binary architecture to be included.
if enabled universal; then
# Call configure (ourselves) for each subarchitecture
for arch in $fat_bin_archs; do
BUILD_PFX=${arch}/ toolchain=${arch} $self --child $cmdline_args || exit $?
done
fi
# The write_common_config (config.mk) logic is deferred until after the
# recursive calls to configure complete, because we want our universal
# targets to be executed last.
write_common_target_config_h ${BUILD_PFX}vpx_config.h
write_common_config_targets
enabled universal && echo "FAT_ARCHS=${fat_bin_archs}" >> config.mk
# Calculate the default distribution name, based on the enabled features
cf=""
@@ -532,11 +494,11 @@ process_detect() {
# Can only build shared libs on a subset of platforms. Doing this check
# here rather than at option parse time because the target auto-detect
# magic happens after the command line has been parsed.
if ! enabled linux; then
if ! enabled linux && ! enabled os2; then
if enabled gnu; then
echo "--enable-shared is only supported on ELF; assuming this is OK"
else
die "--enable-shared only supported on ELF for now"
die "--enable-shared only supported on ELF and OS/2 for now"
fi
fi
fi
@@ -601,24 +563,6 @@ EOF
process_toolchain() {
process_common_toolchain
# Handle universal binaries for this architecture
case $toolchain in
universal-darwin*)
darwin_ver=${tgt_os##darwin}
# Tiger (10.4/darwin8) brought support for x86
if [ $darwin_ver -ge 8 ]; then
fat_bin_archs="$fat_bin_archs x86-${tgt_os}-${tgt_cc}"
fi
# Leopard (10.5/darwin9) brought 64 bit support
if [ $darwin_ver -ge 9 ]; then
fat_bin_archs="$fat_bin_archs x86_64-${tgt_os}-${tgt_cc}"
fi
;;
esac
# Enable some useful compiler flags
if enabled gcc; then
enabled werror && check_add_cflags -Werror
@@ -706,7 +650,7 @@ process_toolchain() {
esac
# Other toolchain specific defaults
case $toolchain in x86*|universal*) soft_enable postproc;; esac
case $toolchain in x86*) soft_enable postproc;; esac
if enabled postproc_visualizer; then
enabled postproc || die "postproc_visualizer requires postproc to be enabled"
@@ -770,6 +714,7 @@ CONFIGURE_ARGS="$@"
process "$@"
print_webm_license ${BUILD_PFX}vpx_config.c "/*" " */"
cat <<EOF >> ${BUILD_PFX}vpx_config.c
#include "vpx/vpx_codec.h"
static const char* const cfg = "$CONFIGURE_ARGS";
const char *vpx_codec_build_config(void) {return cfg;}
EOF

View File

@@ -56,6 +56,7 @@ UTILS-$(CONFIG_DECODERS) += vpxdec.c
vpxdec.SRCS += md5_utils.c md5_utils.h
vpxdec.SRCS += vpx_ports/mem_ops.h
vpxdec.SRCS += vpx_ports/mem_ops_aligned.h
vpxdec.SRCS += vpx_ports/msvc.h
vpxdec.SRCS += vpx_ports/vpx_timer.h
vpxdec.SRCS += vpx/vpx_integer.h
vpxdec.SRCS += args.c args.h
@@ -80,6 +81,7 @@ vpxenc.SRCS += tools_common.c tools_common.h
vpxenc.SRCS += warnings.c warnings.h
vpxenc.SRCS += vpx_ports/mem_ops.h
vpxenc.SRCS += vpx_ports/mem_ops_aligned.h
vpxenc.SRCS += vpx_ports/msvc.h
vpxenc.SRCS += vpx_ports/vpx_timer.h
vpxenc.SRCS += vpxstats.c vpxstats.h
ifeq ($(CONFIG_LIBYUV),yes)
@@ -98,6 +100,7 @@ ifeq ($(CONFIG_SPATIAL_SVC),yes)
vp9_spatial_svc_encoder.SRCS += tools_common.c tools_common.h
vp9_spatial_svc_encoder.SRCS += video_common.h
vp9_spatial_svc_encoder.SRCS += video_writer.h video_writer.c
vp9_spatial_svc_encoder.SRCS += vpx_ports/msvc.h
vp9_spatial_svc_encoder.SRCS += vpxstats.c vpxstats.h
vp9_spatial_svc_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D
vp9_spatial_svc_encoder.DESCRIPTION = VP9 Spatial SVC Encoder
@@ -112,6 +115,7 @@ vpx_temporal_svc_encoder.SRCS += ivfenc.c ivfenc.h
vpx_temporal_svc_encoder.SRCS += tools_common.c tools_common.h
vpx_temporal_svc_encoder.SRCS += video_common.h
vpx_temporal_svc_encoder.SRCS += video_writer.h video_writer.c
vpx_temporal_svc_encoder.SRCS += vpx_ports/msvc.h
vpx_temporal_svc_encoder.GUID = B18C08F2-A439-4502-A78E-849BE3D60947
vpx_temporal_svc_encoder.DESCRIPTION = Temporal SVC Encoder
EXAMPLES-$(CONFIG_DECODERS) += simple_decoder.c
@@ -122,6 +126,7 @@ simple_decoder.SRCS += video_common.h
simple_decoder.SRCS += video_reader.h video_reader.c
simple_decoder.SRCS += vpx_ports/mem_ops.h
simple_decoder.SRCS += vpx_ports/mem_ops_aligned.h
simple_decoder.SRCS += vpx_ports/msvc.h
simple_decoder.DESCRIPTION = Simplified decoder loop
EXAMPLES-$(CONFIG_DECODERS) += postproc.c
postproc.SRCS += ivfdec.h ivfdec.c
@@ -130,6 +135,7 @@ postproc.SRCS += video_common.h
postproc.SRCS += video_reader.h video_reader.c
postproc.SRCS += vpx_ports/mem_ops.h
postproc.SRCS += vpx_ports/mem_ops_aligned.h
postproc.SRCS += vpx_ports/msvc.h
postproc.GUID = 65E33355-F35E-4088-884D-3FD4905881D7
postproc.DESCRIPTION = Decoder postprocessor control
EXAMPLES-$(CONFIG_DECODERS) += decode_to_md5.c
@@ -140,6 +146,7 @@ decode_to_md5.SRCS += video_common.h
decode_to_md5.SRCS += video_reader.h video_reader.c
decode_to_md5.SRCS += vpx_ports/mem_ops.h
decode_to_md5.SRCS += vpx_ports/mem_ops_aligned.h
decode_to_md5.SRCS += vpx_ports/msvc.h
decode_to_md5.GUID = 59120B9B-2735-4BFE-B022-146CA340FE42
decode_to_md5.DESCRIPTION = Frame by frame MD5 checksum
EXAMPLES-$(CONFIG_ENCODERS) += simple_encoder.c
@@ -147,6 +154,7 @@ simple_encoder.SRCS += ivfenc.h ivfenc.c
simple_encoder.SRCS += tools_common.h tools_common.c
simple_encoder.SRCS += video_common.h
simple_encoder.SRCS += video_writer.h video_writer.c
simple_encoder.SRCS += vpx_ports/msvc.h
simple_encoder.GUID = 4607D299-8A71-4D2C-9B1D-071899B6FBFD
simple_encoder.DESCRIPTION = Simplified encoder loop
EXAMPLES-$(CONFIG_VP9_ENCODER) += vp9_lossless_encoder.c
@@ -154,6 +162,7 @@ vp9_lossless_encoder.SRCS += ivfenc.h ivfenc.c
vp9_lossless_encoder.SRCS += tools_common.h tools_common.c
vp9_lossless_encoder.SRCS += video_common.h
vp9_lossless_encoder.SRCS += video_writer.h video_writer.c
vp9_lossless_encoder.SRCS += vpx_ports/msvc.h
vp9_lossless_encoder.GUID = B63C7C88-5348-46DC-A5A6-CC151EF93366
vp9_lossless_encoder.DESCRIPTION = Simplified lossless VP9 encoder
EXAMPLES-$(CONFIG_ENCODERS) += twopass_encoder.c
@@ -161,6 +170,7 @@ twopass_encoder.SRCS += ivfenc.h ivfenc.c
twopass_encoder.SRCS += tools_common.h tools_common.c
twopass_encoder.SRCS += video_common.h
twopass_encoder.SRCS += video_writer.h video_writer.c
twopass_encoder.SRCS += vpx_ports/msvc.h
twopass_encoder.GUID = 73494FA6-4AF9-4763-8FBB-265C92402FD8
twopass_encoder.DESCRIPTION = Two-pass encoder loop
EXAMPLES-$(CONFIG_DECODERS) += decode_with_drops.c
@@ -170,6 +180,7 @@ decode_with_drops.SRCS += video_common.h
decode_with_drops.SRCS += video_reader.h video_reader.c
decode_with_drops.SRCS += vpx_ports/mem_ops.h
decode_with_drops.SRCS += vpx_ports/mem_ops_aligned.h
decode_with_drops.SRCS += vpx_ports/msvc.h
decode_with_drops.GUID = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
decode_with_drops.DESCRIPTION = Drops frames while decoding
EXAMPLES-$(CONFIG_ENCODERS) += set_maps.c
@@ -177,6 +188,7 @@ set_maps.SRCS += ivfenc.h ivfenc.c
set_maps.SRCS += tools_common.h tools_common.c
set_maps.SRCS += video_common.h
set_maps.SRCS += video_writer.h video_writer.c
set_maps.SRCS += vpx_ports/msvc.h
set_maps.GUID = ECB2D24D-98B8-4015-A465-A4AF3DCC145F
set_maps.DESCRIPTION = Set active and ROI maps
EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8cx_set_ref.c
@@ -184,6 +196,7 @@ vp8cx_set_ref.SRCS += ivfenc.h ivfenc.c
vp8cx_set_ref.SRCS += tools_common.h tools_common.c
vp8cx_set_ref.SRCS += video_common.h
vp8cx_set_ref.SRCS += video_writer.h video_writer.c
vp8cx_set_ref.SRCS += vpx_ports/msvc.h
vp8cx_set_ref.GUID = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
vp8cx_set_ref.DESCRIPTION = VP8 set encoder reference frame
@@ -194,6 +207,7 @@ EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8_multi_resolution_encoder.c
vp8_multi_resolution_encoder.SRCS += ivfenc.h ivfenc.c
vp8_multi_resolution_encoder.SRCS += tools_common.h tools_common.c
vp8_multi_resolution_encoder.SRCS += video_writer.h video_writer.c
vp8_multi_resolution_encoder.SRCS += vpx_ports/msvc.h
vp8_multi_resolution_encoder.SRCS += $(LIBYUV_SRCS)
vp8_multi_resolution_encoder.GUID = 04f8738e-63c8-423b-90fa-7c2703a374de
vp8_multi_resolution_encoder.DESCRIPTION = VP8 Multiple-resolution Encoding
@@ -254,14 +268,6 @@ CODEC_EXTRA_LIBS=$(sort $(call enabled,CODEC_EXTRA_LIBS))
$(foreach ex,$(ALL_EXAMPLES),$(eval $(notdir $(ex:.c=)).SRCS += $(ex) examples.mk))
# If this is a universal (fat) binary, then all the subarchitectures have
# already been built and our job is to stitch them together. The
# BUILD_OBJS variable indicates whether we should be building
# (compiling, linking) the library. The LIPO_OBJS variable indicates
# that we're stitching.
$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_OBJS,BUILD_OBJS):=yes)
# Create build/install dependencies for all examples. The common case
# is handled here. The MSVS case is handled below.
NOT_MSVS = $(if $(CONFIG_MSVS),,yes)
@@ -269,24 +275,28 @@ DIST-BINS-$(NOT_MSVS) += $(addprefix bin/,$(ALL_EXAMPLES:.c=$(EXE_SFX)))
INSTALL-BINS-$(NOT_MSVS) += $(addprefix bin/,$(UTILS:.c=$(EXE_SFX)))
DIST-SRCS-yes += $(ALL_SRCS)
INSTALL-SRCS-yes += $(UTIL_SRCS)
OBJS-$(NOT_MSVS) += $(if $(BUILD_OBJS),$(call objs,$(ALL_SRCS)))
OBJS-$(NOT_MSVS) += $(call objs,$(ALL_SRCS))
BINS-$(NOT_MSVS) += $(addprefix $(BUILD_PFX),$(ALL_EXAMPLES:.c=$(EXE_SFX)))
# Instantiate linker template for all examples.
CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
SHARED_LIB_SUF=$(if $(filter darwin%,$(TGT_OS)),.dylib,.so)
ifneq ($(filter darwin%,$(TGT_OS)),)
SHARED_LIB_SUF=.dylib
else
ifneq ($(filter os2%,$(TGT_OS)),)
SHARED_LIB_SUF=_dll.a
else
SHARED_LIB_SUF=.so
endif
endif
CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
$(foreach bin,$(BINS-yes),\
$(if $(BUILD_OBJS),$(eval $(bin):\
$(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF)))\
$(if $(BUILD_OBJS),$(eval $(call linker_template,$(bin),\
$(eval $(bin):$(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF))\
$(eval $(call linker_template,$(bin),\
$(call objs,$($(notdir $(bin:$(EXE_SFX)=)).SRCS)) \
-l$(CODEC_LIB) $(addprefix -l,$(CODEC_EXTRA_LIBS))\
)))\
$(if $(LIPO_OBJS),$(eval $(call lipo_bin_template,$(bin))))\
)
)))
# The following pairs define a mapping of locations in the distribution
# tree to locations in the source/build trees.

View File

@@ -71,7 +71,7 @@ static void print_md5(FILE *stream, unsigned char digest[16]) {
static const char *exec_name;
void usage_exit() {
void usage_exit(void) {
fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
exit(EXIT_FAILURE);
}

View File

@@ -65,7 +65,7 @@
static const char *exec_name;
void usage_exit() {
void usage_exit(void) {
fprintf(stderr, "Usage: %s <infile> <outfile> <N-M|N/M>\n", exec_name);
exit(EXIT_FAILURE);
}

View File

@@ -52,7 +52,7 @@
static const char *exec_name;
void usage_exit() {
void usage_exit(void) {
fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
exit(EXIT_FAILURE);
}

View File

@@ -15,6 +15,7 @@
#include <stdlib.h>
#include <string.h>
#include "../tools_common.h"
#include "../vp9/encoder/vp9_resize.h"
static const char *exec_name = NULL;
@@ -26,7 +27,7 @@ static void usage() {
printf("<output_yuv> [<frames>]\n");
}
void usage_exit() {
void usage_exit(void) {
usage();
exit(EXIT_FAILURE);
}

View File

@@ -55,7 +55,7 @@
static const char *exec_name;
void usage_exit() {
void usage_exit(void) {
fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
exec_name);
exit(EXIT_FAILURE);

View File

@@ -88,7 +88,7 @@
static const char *exec_name;
void usage_exit() {
void usage_exit(void) {
fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
exit(EXIT_FAILURE);
}

View File

@@ -106,7 +106,7 @@
static const char *exec_name;
void usage_exit() {
void usage_exit(void) {
fprintf(stderr,
"Usage: %s <codec> <width> <height> <infile> <outfile> "
"<keyframe-interval> [<error-resilient>]\nSee comments in "

View File

@@ -58,7 +58,7 @@
static const char *exec_name;
void usage_exit() {
void usage_exit(void) {
fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
exec_name);
exit(EXIT_FAILURE);

View File

@@ -37,15 +37,14 @@
#include <unistd.h>
#endif
#include "vpx_ports/vpx_timer.h"
#define VPX_CODEC_DISABLE_COMPAT 1
#include "vpx/vpx_encoder.h"
#include "vpx/vp8cx.h"
#include "vpx_ports/mem_ops.h"
#include "./tools_common.h"
#include "../tools_common.h"
#define interface (vpx_codec_vp8_cx())
#define fourcc 0x30385056
void usage_exit() {
void usage_exit(void) {
exit(EXIT_FAILURE);
}

View File

@@ -58,7 +58,7 @@
static const char *exec_name;
void usage_exit() {
void usage_exit(void) {
fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile> <frame>\n",
exec_name);
exit(EXIT_FAILURE);

View File

@@ -20,7 +20,7 @@
static const char *exec_name;
void usage_exit() {
void usage_exit(void) {
fprintf(stderr, "vp9_lossless_encoder: Example demonstrating VP9 lossless "
"encoding feature. Supports raw input only.\n");
fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name);

View File

@@ -14,11 +14,13 @@
* that benefit from a scalable bitstream.
*/
#include <math.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "../args.h"
#include "../tools_common.h"
#include "../video_writer.h"
@@ -27,11 +29,18 @@
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
#include "../vpxstats.h"
#define OUTPUT_RC_STATS 1
static const arg_def_t skip_frames_arg =
ARG_DEF("s", "skip-frames", 1, "input frames to skip");
static const arg_def_t frames_arg =
ARG_DEF("f", "frames", 1, "number of frames to encode");
static const arg_def_t threads_arg =
ARG_DEF("th", "threads", 1, "number of threads to use");
#if OUTPUT_RC_STATS
static const arg_def_t output_rc_stats_arg =
ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
#endif
static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
static const arg_def_t timebase_arg =
@@ -42,6 +51,9 @@ static const arg_def_t spatial_layers_arg =
ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
static const arg_def_t temporal_layers_arg =
ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
static const arg_def_t temporal_layering_mode_arg =
ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme."
"VP9E_TEMPORAL_LAYERING_MODE");
static const arg_def_t kf_dist_arg =
ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
static const arg_def_t scale_factors_arg =
@@ -65,6 +77,8 @@ static const arg_def_t lag_in_frame_arg =
"generating any outputs");
static const arg_def_t rc_end_usage_arg =
ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
static const arg_def_t speed_arg =
ARG_DEF("sp", "speed", 1, "speed configuration");
#if CONFIG_VP9_HIGHBITDEPTH
static const struct arg_enum_list bitdepth_enum[] = {
@@ -85,10 +99,16 @@ static const arg_def_t *svc_args[] = {
&timebase_arg, &bitrate_arg, &skip_frames_arg, &spatial_layers_arg,
&kf_dist_arg, &scale_factors_arg, &passes_arg, &pass_arg,
&fpf_name_arg, &min_q_arg, &max_q_arg, &min_bitrate_arg,
&max_bitrate_arg, &temporal_layers_arg, &lag_in_frame_arg,
&max_bitrate_arg, &temporal_layers_arg, &temporal_layering_mode_arg,
&lag_in_frame_arg, &threads_arg,
#if OUTPUT_RC_STATS
&output_rc_stats_arg,
#endif
#if CONFIG_VP9_HIGHBITDEPTH
&bitdepth_arg,
#endif
&speed_arg,
&rc_end_usage_arg, NULL
};
@@ -102,6 +122,10 @@ static const uint32_t default_bitrate = 1000;
static const uint32_t default_spatial_layers = 5;
static const uint32_t default_temporal_layers = 1;
static const uint32_t default_kf_dist = 100;
static const uint32_t default_temporal_layering_mode = 0;
static const uint32_t default_output_rc_stats = 0;
static const int32_t default_speed = -1; // -1 means use library default.
static const uint32_t default_threads = 0; // zero means use library default.
typedef struct {
const char *input_filename;
@@ -116,7 +140,7 @@ typedef struct {
static const char *exec_name;
void usage_exit() {
void usage_exit(void) {
fprintf(stderr, "Usage: %s <options> input_filename output_filename\n",
exec_name);
fprintf(stderr, "Options:\n");
@@ -143,6 +167,12 @@ static void parse_command_line(int argc, const char **argv_,
svc_ctx->log_level = SVC_LOG_DEBUG;
svc_ctx->spatial_layers = default_spatial_layers;
svc_ctx->temporal_layers = default_temporal_layers;
svc_ctx->temporal_layering_mode = default_temporal_layering_mode;
#if OUTPUT_RC_STATS
svc_ctx->output_rc_stat = default_output_rc_stats;
#endif
svc_ctx->speed = default_speed;
svc_ctx->threads = default_threads;
// start with default encoder configuration
res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
@@ -184,6 +214,20 @@ static void parse_command_line(int argc, const char **argv_,
svc_ctx->spatial_layers = arg_parse_uint(&arg);
} else if (arg_match(&arg, &temporal_layers_arg, argi)) {
svc_ctx->temporal_layers = arg_parse_uint(&arg);
#if OUTPUT_RC_STATS
} else if (arg_match(&arg, &output_rc_stats_arg, argi)) {
svc_ctx->output_rc_stat = arg_parse_uint(&arg);
#endif
} else if (arg_match(&arg, &speed_arg, argi)) {
svc_ctx->speed = arg_parse_uint(&arg);
} else if (arg_match(&arg, &threads_arg, argi)) {
svc_ctx->threads = arg_parse_uint(&arg);
} else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
svc_ctx->temporal_layering_mode =
enc_cfg->temporal_layering_mode = arg_parse_int(&arg);
if (svc_ctx->temporal_layering_mode) {
enc_cfg->g_error_resilient = 1;
}
} else if (arg_match(&arg, &kf_dist_arg, argi)) {
enc_cfg->kf_min_dist = arg_parse_uint(&arg);
enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
@@ -316,6 +360,185 @@ static void parse_command_line(int argc, const char **argv_,
enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
}
#if OUTPUT_RC_STATS
// For rate control encoding stats.
struct RateControlStats {
// Number of input frames per layer.
int layer_input_frames[VPX_MAX_LAYERS];
// Total (cumulative) number of encoded frames per layer.
int layer_tot_enc_frames[VPX_MAX_LAYERS];
// Number of encoded non-key frames per layer.
int layer_enc_frames[VPX_MAX_LAYERS];
// Framerate per layer (cumulative).
double layer_framerate[VPX_MAX_LAYERS];
// Target average frame size per layer (per-frame-bandwidth per layer).
double layer_pfb[VPX_MAX_LAYERS];
// Actual average frame size per layer.
double layer_avg_frame_size[VPX_MAX_LAYERS];
// Average rate mismatch per layer (|target - actual| / target).
double layer_avg_rate_mismatch[VPX_MAX_LAYERS];
// Actual encoding bitrate per layer (cumulative).
double layer_encoding_bitrate[VPX_MAX_LAYERS];
// Average of the short-time encoder actual bitrate.
// TODO(marpan): Should we add these short-time stats for each layer?
double avg_st_encoding_bitrate;
// Variance of the short-time encoder actual bitrate.
double variance_st_encoding_bitrate;
// Window (number of frames) for computing short-time encoding bitrate.
int window_size;
// Number of window measurements.
int window_count;
};
// Note: these rate control stats assume only 1 key frame in the
// sequence (i.e., first frame only).
static void set_rate_control_stats(struct RateControlStats *rc,
vpx_codec_enc_cfg_t *cfg) {
unsigned int sl, tl;
// Set the layer (cumulative) framerate and the target layer (non-cumulative)
// per-frame-bandwidth, for the rate control encoding stats below.
const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
const int layer = sl * cfg->ts_number_layers + tl;
const int tlayer0 = sl * cfg->ts_number_layers;
rc->layer_framerate[layer] =
framerate / cfg->ts_rate_decimator[tl];
if (tl > 0) {
rc->layer_pfb[layer] = 1000.0 *
(cfg->layer_target_bitrate[layer] -
cfg->layer_target_bitrate[layer - 1]) /
(rc->layer_framerate[layer] -
rc->layer_framerate[layer - 1]);
} else {
rc->layer_pfb[tlayer0] = 1000.0 *
cfg->layer_target_bitrate[tlayer0] /
rc->layer_framerate[tlayer0];
}
rc->layer_input_frames[layer] = 0;
rc->layer_enc_frames[layer] = 0;
rc->layer_tot_enc_frames[layer] = 0;
rc->layer_encoding_bitrate[layer] = 0.0;
rc->layer_avg_frame_size[layer] = 0.0;
rc->layer_avg_rate_mismatch[layer] = 0.0;
}
}
rc->window_count = 0;
rc->window_size = 15;
rc->avg_st_encoding_bitrate = 0.0;
rc->variance_st_encoding_bitrate = 0.0;
}
static void printout_rate_control_summary(struct RateControlStats *rc,
vpx_codec_enc_cfg_t *cfg,
int frame_cnt) {
unsigned int sl, tl;
int tot_num_frames = 0;
double perc_fluctuation = 0.0;
printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
cfg->ss_number_layers, cfg->ts_number_layers);
for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
const int layer = sl * cfg->ts_number_layers + tl;
const int num_dropped = (tl > 0) ?
(rc->layer_input_frames[layer] - rc->layer_enc_frames[layer]) :
(rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] - 1);
if (!sl)
tot_num_frames += rc->layer_input_frames[layer];
rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
rc->layer_encoding_bitrate[layer] / tot_num_frames;
rc->layer_avg_frame_size[layer] = rc->layer_avg_frame_size[layer] /
rc->layer_enc_frames[layer];
rc->layer_avg_rate_mismatch[layer] =
100.0 * rc->layer_avg_rate_mismatch[layer] /
rc->layer_enc_frames[layer];
printf("For layer#: sl%d tl%d \n", sl, tl);
printf("Bitrate (target vs actual): %d %f.0 kbps\n",
cfg->layer_target_bitrate[layer],
rc->layer_encoding_bitrate[layer]);
printf("Average frame size (target vs actual): %f %f bits\n",
rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
printf("Average rate_mismatch: %f\n",
rc->layer_avg_rate_mismatch[layer]);
printf("Number of input frames, encoded (non-key) frames, "
"and percent dropped frames: %d %d %f.0 \n",
rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
100.0 * num_dropped / rc->layer_input_frames[layer]);
printf("\n");
}
}
rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
rc->variance_st_encoding_bitrate =
rc->variance_st_encoding_bitrate / rc->window_count -
(rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
rc->avg_st_encoding_bitrate;
printf("Short-time stats, for window of %d frames: \n", rc->window_size);
printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
rc->avg_st_encoding_bitrate,
sqrt(rc->variance_st_encoding_bitrate),
perc_fluctuation);
if (frame_cnt != tot_num_frames)
die("Error: Number of input frames not equal to output encoded frames != "
"%d tot_num_frames = %d\n", frame_cnt, tot_num_frames);
}
vpx_codec_err_t parse_superframe_index(const uint8_t *data,
size_t data_sz,
uint32_t sizes[8], int *count) {
// A chunk ending with a byte matching 0xc0 is an invalid chunk unless
// it is a super frame index. If the last byte of real video compression
// data is 0xc0 the encoder must add a 0 byte. If we have the marker but
// not the associated matching marker byte at the front of the index we have
// an invalid bitstream and need to return an error.
uint8_t marker;
marker = *(data + data_sz - 1);
*count = 0;
if ((marker & 0xe0) == 0xc0) {
const uint32_t frames = (marker & 0x7) + 1;
const uint32_t mag = ((marker >> 3) & 0x3) + 1;
const size_t index_sz = 2 + mag * frames;
// This chunk is marked as having a superframe index but doesn't have
// enough data for it, thus it's an invalid superframe index.
if (data_sz < index_sz)
return VPX_CODEC_CORRUPT_FRAME;
{
const uint8_t marker2 = *(data + data_sz - index_sz);
// This chunk is marked as having a superframe index but doesn't have
// the matching marker byte at the front of the index therefore it's an
// invalid chunk.
if (marker != marker2)
return VPX_CODEC_CORRUPT_FRAME;
}
{
// Found a valid superframe index.
uint32_t i, j;
const uint8_t *x = &data[data_sz - index_sz + 1];
for (i = 0; i < frames; ++i) {
uint32_t this_sz = 0;
for (j = 0; j < mag; ++j)
this_sz |= (*x++) << (j * 8);
sizes[i] = this_sz;
}
*count = frames;
}
}
return VPX_CODEC_OK;
}
#endif
int main(int argc, const char **argv) {
AppInput app_input = {0};
VpxVideoWriter *writer = NULL;
@@ -332,7 +555,15 @@ int main(int argc, const char **argv) {
FILE *infile = NULL;
int end_of_stream = 0;
int frames_received = 0;
#if OUTPUT_RC_STATS
VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
struct RateControlStats rc;
vpx_svc_layer_id_t layer_id;
int sl, tl;
double sum_bitrate = 0.0;
double sum_bitrate2 = 0.0;
double framerate = 30.0;
#endif
memset(&svc_ctx, 0, sizeof(svc_ctx));
svc_ctx.log_print = 1;
exec_name = argv[0];
@@ -359,6 +590,13 @@ int main(int argc, const char **argv) {
VPX_CODEC_OK)
die("Failed to initialize encoder\n");
#if OUTPUT_RC_STATS
if (svc_ctx.output_rc_stat) {
set_rate_control_stats(&rc, &enc_cfg);
framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num;
}
#endif
info.codec_fourcc = VP9_FOURCC;
info.time_base.numerator = enc_cfg.g_timebase.num;
info.time_base.denominator = enc_cfg.g_timebase.den;
@@ -370,11 +608,31 @@ int main(int argc, const char **argv) {
if (!writer)
die("Failed to open %s for writing\n", app_input.output_filename);
}
#if OUTPUT_RC_STATS
// For now, just write temporal layer streams.
// TODO(wonkap): do spatial by re-writing superframe.
if (svc_ctx.output_rc_stat) {
for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
char file_name[PATH_MAX];
snprintf(file_name, sizeof(file_name), "%s_t%d.ivf",
app_input.output_filename, tl);
outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
if (!outfile[tl])
die("Failed to open %s for writing", file_name);
}
}
#endif
// skip initial frames
for (i = 0; i < app_input.frames_to_skip; ++i)
vpx_img_read(&raw, infile);
if (svc_ctx.speed != -1)
vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
if (svc_ctx.threads)
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
// Encode frames
while (!end_of_stream) {
vpx_codec_iter_t iter = NULL;
@@ -386,7 +644,9 @@ int main(int argc, const char **argv) {
}
res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
pts, frame_duration, VPX_DL_GOOD_QUALITY);
pts, frame_duration, svc_ctx.speed >= 5 ?
VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
printf("%s", vpx_svc_get_message(&svc_ctx));
if (res != VPX_CODEC_OK) {
die_codec(&codec, "Failed to encode frame");
@@ -395,11 +655,90 @@ int main(int argc, const char **argv) {
while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {
switch (cx_pkt->kind) {
case VPX_CODEC_CX_FRAME_PKT: {
if (cx_pkt->data.frame.sz > 0)
if (cx_pkt->data.frame.sz > 0) {
#if OUTPUT_RC_STATS
uint32_t sizes[8];
int count = 0;
#endif
vpx_video_writer_write_frame(writer,
cx_pkt->data.frame.buf,
cx_pkt->data.frame.sz,
cx_pkt->data.frame.pts);
#if OUTPUT_RC_STATS
// TODO(marpan/wonkap): Put this (to line728) in separate function.
if (svc_ctx.output_rc_stat) {
vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
parse_superframe_index(cx_pkt->data.frame.buf,
cx_pkt->data.frame.sz, sizes, &count);
for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
layer_id.temporal_layer_id];
}
for (tl = layer_id.temporal_layer_id;
tl < enc_cfg.ts_number_layers; ++tl) {
vpx_video_writer_write_frame(outfile[tl],
cx_pkt->data.frame.buf,
cx_pkt->data.frame.sz,
cx_pkt->data.frame.pts);
}
for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
for (tl = layer_id.temporal_layer_id;
tl < enc_cfg.ts_number_layers; ++tl) {
const int layer = sl * enc_cfg.ts_number_layers + tl;
++rc.layer_tot_enc_frames[layer];
rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
// Keep count of rate control stats per layer, for non-key
// frames.
if (tl == layer_id.temporal_layer_id &&
!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
rc.layer_avg_rate_mismatch[layer] +=
fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
rc.layer_pfb[layer];
++rc.layer_enc_frames[layer];
}
}
}
// Update for short-time encoding bitrate states, for moving
// window of size rc->window, shifted by rc->window / 2.
// Ignore first window segment, due to key frame.
if (frame_cnt > rc.window_size) {
tl = layer_id.temporal_layer_id;
for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
}
if (frame_cnt % rc.window_size == 0) {
rc.window_count += 1;
rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
rc.variance_st_encoding_bitrate +=
(sum_bitrate / rc.window_size) *
(sum_bitrate / rc.window_size);
sum_bitrate = 0.0;
}
}
// Second shifted window.
if (frame_cnt > rc.window_size + rc.window_size / 2) {
tl = layer_id.temporal_layer_id;
for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
}
if (frame_cnt > 2 * rc.window_size &&
frame_cnt % rc.window_size == 0) {
rc.window_count += 1;
rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
rc.variance_st_encoding_bitrate +=
(sum_bitrate2 / rc.window_size) *
(sum_bitrate2 / rc.window_size);
sum_bitrate2 = 0.0;
}
}
}
#endif
}
printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
!!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
@@ -424,25 +763,30 @@ int main(int argc, const char **argv) {
pts += frame_duration;
}
}
printf("Processed %d frames\n", frame_cnt);
fclose(infile);
#if OUTPUT_RC_STATS
if (svc_ctx.output_rc_stat) {
printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
printf("\n");
}
#endif
if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
if (app_input.passes == 2)
stats_close(&app_input.rc_stats, 1);
if (writer) {
vpx_video_writer_close(writer);
}
#if OUTPUT_RC_STATS
if (svc_ctx.output_rc_stat) {
for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
vpx_video_writer_close(outfile[tl]);
}
}
#endif
vpx_img_free(&raw);
// display average size, psnr
printf("%s", vpx_svc_dump_statistics(&svc_ctx));
vpx_svc_release(&svc_ctx);
return EXIT_SUCCESS;
}

View File

@@ -28,7 +28,7 @@
static const char *exec_name;
void usage_exit() {
void usage_exit(void) {
exit(EXIT_FAILURE);
}
@@ -70,6 +70,7 @@ struct RateControlMetrics {
int window_size;
// Number of window measurements.
int window_count;
int layer_target_bitrate[VPX_MAX_LAYERS];
};
// Note: these rate control metrics assume only 1 key frame in the
@@ -85,13 +86,13 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
// per-frame-bandwidth, for the rate control encoding stats below.
const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0];
rc->layer_pfb[0] = 1000.0 * cfg->ts_target_bitrate[0] /
rc->layer_pfb[0] = 1000.0 * rc->layer_target_bitrate[0] /
rc->layer_framerate[0];
for (i = 0; i < cfg->ts_number_layers; ++i) {
if (i > 0) {
rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
rc->layer_pfb[i] = 1000.0 *
(cfg->ts_target_bitrate[i] - cfg->ts_target_bitrate[i - 1]) /
(rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
(rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
}
rc->layer_input_frames[i] = 0;
@@ -128,7 +129,7 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
rc->layer_avg_rate_mismatch[i] = 100.0 * rc->layer_avg_rate_mismatch[i] /
rc->layer_enc_frames[i];
printf("For layer#: %d \n", i);
printf("Bitrate (target vs actual): %d %f \n", cfg->ts_target_bitrate[i],
printf("Bitrate (target vs actual): %d %f \n", rc->layer_target_bitrate[i],
rc->layer_encoding_bitrate[i]);
printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i],
rc->layer_avg_frame_size[i]);
@@ -597,13 +598,16 @@ int main(int argc, char **argv) {
for (i = min_args_base;
(int)i < min_args_base + mode_to_num_layers[layering_mode];
++i) {
cfg.ts_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
rc.layer_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
if (strncmp(encoder->name, "vp8", 3) == 0)
cfg.ts_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
else if (strncmp(encoder->name, "vp9", 3) == 0)
cfg.layer_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
}
// Real time parameters.
cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0);
cfg.rc_end_usage = VPX_CBR;
cfg.rc_resize_allowed = 0;
cfg.rc_min_quantizer = 2;
cfg.rc_max_quantizer = 56;
if (strncmp(encoder->name, "vp9", 3) == 0)
@@ -614,6 +618,9 @@ int main(int argc, char **argv) {
cfg.rc_buf_optimal_sz = 600;
cfg.rc_buf_sz = 1000;
// Disable dynamic resizing by default.
cfg.rc_resize_allowed = 0;
// Use 1 thread as default.
cfg.g_threads = 1;
@@ -625,6 +632,8 @@ int main(int argc, char **argv) {
// Disable automatic keyframe placement.
cfg.kf_min_dist = cfg.kf_max_dist = 3000;
cfg.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
set_temporal_layer_pattern(layering_mode,
&cfg,
layer_flags,
@@ -633,8 +642,8 @@ int main(int argc, char **argv) {
set_rate_control_metrics(&rc, &cfg);
// Target bandwidth for the whole stream.
// Set to ts_target_bitrate for highest layer (total bitrate).
cfg.rc_target_bitrate = cfg.ts_target_bitrate[cfg.ts_number_layers - 1];
// Set to layer_target_bitrate for highest layer (total bitrate).
cfg.rc_target_bitrate = rc.layer_target_bitrate[cfg.ts_number_layers - 1];
// Open input file.
if (!(infile = fopen(argv[1], "rb"))) {
@@ -677,15 +686,22 @@ int main(int argc, char **argv) {
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
} else if (strncmp(encoder->name, "vp9", 3) == 0) {
vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0)) {
die_codec(&codec, "Failed to set SVC");
vpx_svc_extra_cfg_t svc_params;
vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0))
die_codec(&codec, "Failed to set SVC");
for (i = 0; i < cfg.ts_number_layers; ++i) {
svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
}
svc_params.scaling_factor_num[0] = cfg.g_h;
svc_params.scaling_factor_den[0] = cfg.g_h;
vpx_codec_control(&codec, VP9E_SET_SVC_PARAMETERS, &svc_params);
}
if (strncmp(encoder->name, "vp8", 3) == 0) {
vpx_codec_control(&codec, VP8E_SET_SCREEN_CONTENT_MODE, 0);

168
libs.mk
View File

@@ -25,7 +25,7 @@ $$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2)
@echo " [CREATE] $$@"
$$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.pl --arch=$$(TGT_ISA) \
--sym=$(1) \
--config=$$(CONFIG_DIR)$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \
--config=$$(CONFIG_DIR)$$(target)-$$(TOOLCHAIN).mk \
$$(RTCD_OPTIONS) $$^ > $$@
CLEAN-OBJS += $$(BUILD_PFX)$(1).h
RTCD += $$(BUILD_PFX)$(1).h
@@ -34,13 +34,6 @@ endef
CODEC_SRCS-yes += CHANGELOG
CODEC_SRCS-yes += libs.mk
# If this is a universal (fat) binary, then all the subarchitectures have
# already been built and our job is to stitch them together. The
# BUILD_LIBVPX variable indicates whether we should be building
# (compiling, linking) the library. The LIPO_LIBVPX variable indicates
# that we're stitching.
$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)
include $(SRC_PATH_BARE)/vpx/vpx_codec.mk
CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS))
CODEC_DOC_SRCS += $(addprefix vpx/,$(call enabled,API_DOC_SRCS))
@@ -54,6 +47,9 @@ CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS))
include $(SRC_PATH_BARE)/vpx_ports/vpx_ports.mk
CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS))
include $(SRC_PATH_BARE)/vpx_dsp/vpx_dsp.mk
CODEC_SRCS-yes += $(addprefix vpx_dsp/,$(call enabled,DSP_SRCS))
ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
VP8_PREFIX=vp8/
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
@@ -137,18 +133,18 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Release/%)
INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Debug/%)
endif
CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.pl
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops.h
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops_aligned.h
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_once.h
CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c
CODEC_SRCS-yes += build/make/version.sh
CODEC_SRCS-yes += build/make/rtcd.pl
CODEC_SRCS-yes += vpx_ports/emmintrin_compat.h
CODEC_SRCS-yes += vpx_ports/mem_ops.h
CODEC_SRCS-yes += vpx_ports/mem_ops_aligned.h
CODEC_SRCS-yes += vpx_ports/vpx_once.h
CODEC_SRCS-yes += $(BUILD_PFX)vpx_config.c
INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
endif
CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
CODEC_EXPORTS-yes += vpx/exports_com
CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
@@ -215,7 +211,7 @@ vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def
$(filter-out $(addprefix %, $(ASM_INCLUDES)), $^) \
--src-path-bare="$(SRC_PATH_BARE)" \
PROJECTS-$(BUILD_LIBVPX) += vpx.$(VCPROJ_SFX)
PROJECTS-yes += vpx.$(VCPROJ_SFX)
vpx.$(VCPROJ_SFX): vpx_config.asm
vpx.$(VCPROJ_SFX): $(RTCD)
@@ -223,31 +219,39 @@ vpx.$(VCPROJ_SFX): $(RTCD)
endif
else
LIBVPX_OBJS=$(call objs,$(CODEC_SRCS))
OBJS-$(BUILD_LIBVPX) += $(LIBVPX_OBJS)
LIBS-$(if $(BUILD_LIBVPX),$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
OBJS-yes += $(LIBVPX_OBJS)
LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
BUILD_LIBVPX_SO := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED))
SO_VERSION_MAJOR := 2
SO_VERSION_MINOR := 0
SO_VERSION_PATCH := 0
ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib
SHARED_LIB_SUF := .dylib
EXPORT_FILE := libvpx.syms
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
libvpx.dylib )
else
ifeq ($(filter os2%,$(TGT_OS)),$(TGT_OS))
LIBVPX_SO := libvpx$(SO_VERSION_MAJOR).dll
SHARED_LIB_SUF := _dll.a
EXPORT_FILE := libvpx.def
LIBVPX_SO_SYMLINKS :=
LIBVPX_SO_IMPLIB := libvpx_dll.a
else
LIBVPX_SO := libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH)
SHARED_LIB_SUF := .so
EXPORT_FILE := libvpx.ver
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
libvpx.so libvpx.so.$(SO_VERSION_MAJOR) \
libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR))
endif
endif
LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)\
$(notdir $(LIBVPX_SO_SYMLINKS))
LIBS-$(CONFIG_SHARED) += $(BUILD_PFX)$(LIBVPX_SO)\
$(notdir $(LIBVPX_SO_SYMLINKS)) \
$(if $(LIBVPX_SO_IMPLIB), $(BUILD_PFX)$(LIBVPX_SO_IMPLIB))
$(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) $(EXPORT_FILE)
$(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm
$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(SO_VERSION_MAJOR)
@@ -265,6 +269,19 @@ libvpx.syms: $(call enabled,CODEC_EXPORTS)
$(qexec)awk '{print "_"$$2}' $^ >$@
CLEAN-OBJS += libvpx.syms
libvpx.def: $(call enabled,CODEC_EXPORTS)
@echo " [CREATE] $@"
$(qexec)echo LIBRARY $(LIBVPX_SO:.dll=) INITINSTANCE TERMINSTANCE > $@
$(qexec)echo "DATA MULTIPLE NONSHARED" >> $@
$(qexec)echo "EXPORTS" >> $@
$(qexec)awk '!/vpx_svc_*/ {print "_"$$2}' $^ >>$@
CLEAN-OBJS += libvpx.def
libvpx_dll.a: $(LIBVPX_SO)
@echo " [IMPLIB] $@"
$(qexec)emximp -o $@ $<
CLEAN-OBJS += libvpx_dll.a
define libvpx_symlink_template
$(1): $(2)
@echo " [LN] $(2) $$@"
@@ -280,11 +297,12 @@ $(eval $(call libvpx_symlink_template,\
$(LIBVPX_SO)))
INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBVPX_SO_SYMLINKS)
INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBSUBDIR)/$(LIBVPX_SO)
INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBVPX_SO_SYMLINKS)
INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBSUBDIR)/$(LIBVPX_SO)
INSTALL-LIBS-$(CONFIG_SHARED) += $(if $(LIBVPX_SO_IMPLIB),$(LIBSUBDIR)/$(LIBVPX_SO_IMPLIB))
LIBS-$(BUILD_LIBVPX) += vpx.pc
LIBS-yes += vpx.pc
vpx.pc: config.mk libs.mk
@echo " [CREATE] $@"
$(qexec)echo '# pkg-config file from libvpx $(VERSION_STRING)' > $@
@@ -310,9 +328,6 @@ INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc
CLEAN-OBJS += vpx.pc
endif
LIBS-$(LIPO_LIBVPX) += libvpx.a
$(eval $(if $(LIPO_LIBVPX),$(call lipo_lib_template,libvpx.a)))
#
# Rule to make assembler configuration file from C configuration file
#
@@ -351,11 +366,15 @@ LIBVPX_TEST_DATA_PATH ?= .
include $(SRC_PATH_BARE)/test/test.mk
LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS))
LIBVPX_TEST_BINS=./test_libvpx$(EXE_SFX)
LIBVPX_TEST_BIN=./test_libvpx$(EXE_SFX)
LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
$(call enabled,LIBVPX_TEST_DATA))
libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)
TEST_INTRA_PRED_SPEED_BIN=./test_intra_pred_speed$(EXE_SFX)
TEST_INTRA_PRED_SPEED_SRCS=$(addprefix test/,$(call enabled,TEST_INTRA_PRED_SPEED_SRCS))
TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS)))
libvpx_test_srcs.txt:
@echo " [CREATE] $@"
@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@
@@ -419,7 +438,25 @@ test_libvpx.$(VCPROJ_SFX): $(LIBVPX_TEST_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_
PROJECTS-$(CONFIG_MSVS) += test_libvpx.$(VCPROJ_SFX)
LIBVPX_TEST_BINS := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BINS)))
LIBVPX_TEST_BIN := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BIN)))
ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
PROJECTS-$(CONFIG_MSVS) += test_intra_pred_speed.$(VCPROJ_SFX)
test_intra_pred_speed.$(VCPROJ_SFX): $(TEST_INTRA_PRED_SPEED_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_SFX)
@echo " [CREATE] $@"
$(qexec)$(GEN_VCPROJ) \
--exe \
--target=$(TOOLCHAIN) \
--name=test_intra_pred_speed \
-D_VARIADIC_MAX=10 \
--proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \
--ver=$(CONFIG_VS_VERSION) \
--src-path-bare="$(SRC_PATH_BARE)" \
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
--out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
-I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
-L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^
endif # TEST_INTRA_PRED_SPEED
endif
else
@@ -430,45 +467,54 @@ ifeq ($(filter win%,$(TGT_OS)),$(TGT_OS))
# Disabling pthreads globally will cause issues on darwin and possibly elsewhere
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -DGTEST_HAS_PTHREAD=0
endif
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS)
LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
GTEST_INCLUDES := -I$(SRC_PATH_BARE)/third_party/googletest/src
GTEST_INCLUDES += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
OBJS-yes += $(GTEST_OBJS)
LIBS-yes += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
$(BUILD_PFX)libgtest_g.a: $(GTEST_OBJS)
LIBVPX_TEST_OBJS=$(sort $(call objs,$(LIBVPX_TEST_SRCS)))
$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
OBJS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_OBJS)
BINS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_BINS)
$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
OBJS-yes += $(LIBVPX_TEST_OBJS)
BINS-yes += $(LIBVPX_TEST_BIN)
CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a)
$(foreach bin,$(LIBVPX_TEST_BINS),\
$(if $(BUILD_LIBVPX),$(eval $(bin): \
lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\
$(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\
$(LIBVPX_TEST_OBJS) \
-L. -lvpx -lgtest $(extralibs) -lm)\
)))\
$(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\
CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
TEST_LIBS := lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a
$(LIBVPX_TEST_BIN): $(TEST_LIBS)
$(eval $(call linkerxx_template,$(LIBVPX_TEST_BIN), \
$(LIBVPX_TEST_OBJS) \
-L. -lvpx -lgtest $(extralibs) -lm))
endif
ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
$(TEST_INTRA_PRED_SPEED_OBJS) $(TEST_INTRA_PRED_SPEED_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
OBJS-yes += $(TEST_INTRA_PRED_SPEED_OBJS)
BINS-yes += $(TEST_INTRA_PRED_SPEED_BIN)
$(TEST_INTRA_PRED_SPEED_BIN): $(TEST_LIBS)
$(eval $(call linkerxx_template,$(TEST_INTRA_PRED_SPEED_BIN), \
$(TEST_INTRA_PRED_SPEED_OBJS) \
-L. -lvpx -lgtest $(extralibs) -lm))
endif # TEST_INTRA_PRED_SPEED
endif # CONFIG_UNIT_TESTS
# Install test sources only if codec source is included
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\
$(shell find $(SRC_PATH_BARE)/third_party/googletest -type f))
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(LIBVPX_TEST_SRCS)
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(TEST_INTRA_PRED_SPEED_SRCS)
define test_shard_template
test:: test_shard.$(1)
test_shard.$(1): $(LIBVPX_TEST_BINS) testdata
test-no-data-check:: test_shard_ndc.$(1)
test_shard.$(1) test_shard_ndc.$(1): $(LIBVPX_TEST_BIN)
@set -e; \
for t in $(LIBVPX_TEST_BINS); do \
export GTEST_SHARD_INDEX=$(1); \
export GTEST_TOTAL_SHARDS=$(2); \
$$$$t; \
done
export GTEST_SHARD_INDEX=$(1); \
export GTEST_TOTAL_SHARDS=$(2); \
$(LIBVPX_TEST_BIN)
test_shard.$(1): testdata
.PHONY: test_shard.$(1)
endef
@@ -513,15 +559,16 @@ ifeq ($(CONFIG_MSVS),yes)
# TODO(tomfinegan): Support running the debug versions of tools?
TEST_BIN_PATH := $(addsuffix /$(TGT_OS:win64=x64)/Release, $(TEST_BIN_PATH))
endif
utiltest: testdata
utiltest utiltest-no-data-check:
$(qexec)$(SRC_PATH_BARE)/test/vpxdec.sh \
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
--bin-path $(TEST_BIN_PATH)
$(qexec)$(SRC_PATH_BARE)/test/vpxenc.sh \
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
--bin-path $(TEST_BIN_PATH)
utiltest: testdata
else
utiltest:
utiltest utiltest-no-data-check:
@echo Unit tests must be enabled to make the utiltest target.
endif
@@ -539,11 +586,12 @@ ifeq ($(CONFIG_MSVS),yes)
# TODO(tomfinegan): Support running the debug versions of tools?
EXAMPLES_BIN_PATH := $(TGT_OS:win64=x64)/Release
endif
exampletest: examples testdata
exampletest exampletest-no-data-check: examples
$(qexec)$(SRC_PATH_BARE)/test/examples.sh \
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
--bin-path $(EXAMPLES_BIN_PATH)
exampletest: testdata
else
exampletest:
exampletest exampletest-no-data-check:
@echo Unit tests must be enabled to make the exampletest target.
endif

View File

@@ -24,7 +24,7 @@
#include "md5_utils.h"
void
static void
byteSwap(UWORD32 *buf, unsigned words) {
md5byte *p;

View File

@@ -88,6 +88,9 @@ void update_rate_histogram(struct rate_hist *hist,
if (now < cfg->rc_buf_initial_sz)
return;
if (!cfg->rc_target_bitrate)
return;
then = now;
/* Sum the size over the past rc_buf_sz ms */

View File

@@ -51,4 +51,6 @@ include $(LOCAL_PATH)/test/test.mk
LOCAL_C_INCLUDES := $(BINDINGS_DIR)
FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBVPX_TEST_SRCS-yes)))
LOCAL_SRC_FILES := $(addprefix ./test/, $(FILTERED_SRC))
# some test files depend on *_rtcd.h, ensure they're generated first.
$(eval $(call rtcd_dep_template))
include $(BUILD_EXECUTABLE)

View File

@@ -1818,9 +1818,9 @@ INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
#if HAVE_MSA
const ConvolveFunctions convolve8_msa(
vp9_convolve_copy_msa, vp9_convolve_avg_msa,
vp9_convolve8_horiz_msa, vp9_convolve8_avg_horiz_c,
vp9_convolve8_vert_msa, vp9_convolve8_avg_vert_c,
vp9_convolve8_msa, vp9_convolve8_avg_c, 0);
vp9_convolve8_horiz_msa, vp9_convolve8_avg_horiz_msa,
vp9_convolve8_vert_msa, vp9_convolve8_avg_vert_msa,
vp9_convolve8_msa, vp9_convolve8_avg_msa, 0);
INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
make_tuple(4, 4, &convolve8_msa),

View File

@@ -14,6 +14,7 @@
#include "test/i420_video_source.h"
#include "test/util.h"
#include "test/y4m_video_source.h"
#include "vpx/vpx_codec.h"
namespace {
@@ -371,9 +372,7 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,
encoder->Control(VP9E_SET_SVC, 1);
}
vpx_svc_layer_id_t layer_id;
#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
layer_id.spatial_layer_id = 0;
#endif
frame_flags_ = SetFrameFlags(video->frame(), cfg_.ts_number_layers);
layer_id.temporal_layer_id = SetLayerId(video->frame(),
cfg_.ts_number_layers);
@@ -565,6 +564,8 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting2TemporalLayers) {
cfg_.ts_rate_decimator[0] = 2;
cfg_.ts_rate_decimator[1] = 1;
cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
if (deadline_ == VPX_DL_REALTIME)
cfg_.g_error_resilient = 1;
@@ -574,14 +575,14 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting2TemporalLayers) {
cfg_.rc_target_bitrate = i;
ResetModel();
// 60-40 bitrate allocation for 2 temporal layers.
cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
cfg_.layer_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
cfg_.layer_target_bitrate[1] = cfg_.rc_target_bitrate;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85)
ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85)
<< " The datarate for the file is lower than target by too much, "
"for layer: " << j;
ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15)
ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15)
<< " The datarate for the file is greater than target by too much, "
"for layer: " << j;
}
@@ -606,25 +607,27 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayers) {
cfg_.ts_rate_decimator[1] = 2;
cfg_.ts_rate_decimator[2] = 1;
cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 200);
for (int i = 200; i <= 800; i += 200) {
cfg_.rc_target_bitrate = i;
ResetModel();
// 40-20-40 bitrate allocation for 3 temporal layers.
cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
// TODO(yaowu): Work out more stable rc control strategy and
// Adjust the thresholds to be tighter than .75.
ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.75)
ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.75)
<< " The datarate for the file is lower than target by too much, "
"for layer: " << j;
// TODO(yaowu): Work out more stable rc control strategy and
// Adjust the thresholds to be tighter than 1.25.
ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.25)
ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.25)
<< " The datarate for the file is greater than target by too much, "
"for layer: " << j;
}
@@ -652,20 +655,22 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) {
cfg_.ts_rate_decimator[1] = 2;
cfg_.ts_rate_decimator[2] = 1;
cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 200);
cfg_.rc_target_bitrate = 200;
ResetModel();
// 40-20-40 bitrate allocation for 3 temporal layers.
cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85)
ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85)
<< " The datarate for the file is lower than target by too much, "
"for layer: " << j;
ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15)
ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15)
<< " The datarate for the file is greater than target by too much, "
"for layer: " << j;
// Expect some frame drops in this test: for this 200 frames test,
@@ -737,9 +742,178 @@ TEST_P(DatarateTestVP9Large, DenoiserOffOn) {
}
#endif // CONFIG_VP9_TEMPORAL_DENOISING
class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
public:
DatarateOnePassCbrSvc() : EncoderTest(GET_PARAM(0)) {}
virtual ~DatarateOnePassCbrSvc() {}
protected:
virtual void SetUp() {
InitializeConfig();
SetMode(GET_PARAM(1));
speed_setting_ = GET_PARAM(2);
ResetModel();
}
virtual void ResetModel() {
last_pts_ = 0;
bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
frame_number_ = 0;
first_drop_ = 0;
bits_total_ = 0;
duration_ = 0.0;
}
virtual void BeginPassHook(unsigned int /*pass*/) {
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
if (video->frame() == 0) {
int i;
for (i = 0; i < 2; ++i) {
svc_params_.max_quantizers[i] = 63;
svc_params_.min_quantizers[i] = 0;
}
svc_params_.scaling_factor_num[0] = 144;
svc_params_.scaling_factor_den[0] = 288;
svc_params_.scaling_factor_num[1] = 288;
svc_params_.scaling_factor_den[1] = 288;
encoder->Control(VP9E_SET_SVC, 1);
encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_);
encoder->Control(VP8E_SET_CPUUSED, speed_setting_);
encoder->Control(VP9E_SET_TILE_COLUMNS, 0);
encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 300);
}
const vpx_rational_t tb = video->timebase();
timebase_ = static_cast<double>(tb.num) / tb.den;
duration_ = 0;
}
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
if (last_pts_ == 0)
duration = 1;
bits_in_buffer_model_ += static_cast<int64_t>(
duration * timebase_ * cfg_.rc_target_bitrate * 1000);
const bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY)
? true: false;
if (!key_frame) {
ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
<< pkt->data.frame.pts;
}
const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
bits_in_buffer_model_ -= frame_size_in_bits;
bits_total_ += frame_size_in_bits;
if (!first_drop_ && duration > 1)
first_drop_ = last_pts_ + 1;
last_pts_ = pkt->data.frame.pts;
bits_in_last_frame_ = frame_size_in_bits;
++frame_number_;
}
virtual void EndPassHook(void) {
if (bits_total_) {
const double file_size_in_kb = bits_total_ / 1000.; // bits per kilobit
duration_ = (last_pts_ + 1) * timebase_;
effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0
/ (cfg_.rc_buf_initial_sz / 1000.0 + duration_);
file_datarate_ = file_size_in_kb / duration_;
}
}
vpx_codec_pts_t last_pts_;
int64_t bits_in_buffer_model_;
double timebase_;
int frame_number_;
vpx_codec_pts_t first_drop_;
int64_t bits_total_;
double duration_;
double file_datarate_;
double effective_datarate_;
size_t bits_in_last_frame_;
vpx_svc_extra_cfg_t svc_params_;
int speed_setting_;
};
static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
const vpx_svc_extra_cfg_t *svc_params,
int spatial_layers,
int temporal_layers,
int temporal_layering_mode,
unsigned int total_rate) {
int sl, spatial_layer_target;
float total = 0;
float alloc_ratio[VPX_MAX_LAYERS] = {0};
for (sl = 0; sl < spatial_layers; ++sl) {
if (svc_params->scaling_factor_den[sl] > 0) {
alloc_ratio[sl] = (float)(svc_params->scaling_factor_num[sl] *
1.0 / svc_params->scaling_factor_den[sl]);
total += alloc_ratio[sl];
}
}
for (sl = 0; sl < spatial_layers; ++sl) {
enc_cfg->ss_target_bitrate[sl] = spatial_layer_target =
(unsigned int)(enc_cfg->rc_target_bitrate *
alloc_ratio[sl] / total);
const int index = sl * temporal_layers;
if (temporal_layering_mode == 3) {
enc_cfg->layer_target_bitrate[index] =
spatial_layer_target >> 1;
enc_cfg->layer_target_bitrate[index + 1] =
(spatial_layer_target >> 1) + (spatial_layer_target >> 2);
enc_cfg->layer_target_bitrate[index + 2] =
spatial_layer_target;
} else if (temporal_layering_mode == 2) {
enc_cfg->layer_target_bitrate[index] =
spatial_layer_target * 2 / 3;
enc_cfg->layer_target_bitrate[index + 1] =
spatial_layer_target;
}
}
}
// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and
// 3 temporal layers.
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc) {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000;
cfg_.rc_min_quantizer = 0;
cfg_.rc_max_quantizer = 63;
cfg_.rc_end_usage = VPX_CBR;
cfg_.g_lag_in_frames = 0;
cfg_.ss_number_layers = 2;
cfg_.ts_number_layers = 3;
cfg_.ts_rate_decimator[0] = 4;
cfg_.ts_rate_decimator[1] = 2;
cfg_.ts_rate_decimator[2] = 1;
cfg_.g_error_resilient = 1;
cfg_.temporal_layering_mode = 3;
svc_params_.scaling_factor_num[0] = 144;
svc_params_.scaling_factor_den[0] = 288;
svc_params_.scaling_factor_num[1] = 288;
svc_params_.scaling_factor_den[1] = 288;
// TODO(wonkap/marpan): No frame drop for now, we need to implement correct
// frame dropping for SVC.
cfg_.rc_dropframe_thresh = 0;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 200);
// TODO(wonkap/marpan): Check that effective_datarate for each layer hits the
// layer target_bitrate. Also check if test can pass at lower bitrate (~200k).
for (int i = 400; i <= 800; i += 200) {
cfg_.rc_target_bitrate = i;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
cfg_.rc_target_bitrate);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.85)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
}
}
VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES);
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
::testing::Values(::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
::testing::Range(2, 7));
VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvc,
::testing::Values(::libvpx_test::kRealTime),
::testing::Range(5, 8));
} // namespace

View File

@@ -20,8 +20,10 @@
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_scan.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
using libvpx_test::ACMRandom;
@@ -356,13 +358,13 @@ class Trans16x16TestBase {
int64_t total_error = 0;
const int count_test_block = 10000;
for (int i = 0; i < count_test_block; ++i) {
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
#endif
// Initialize a test block with input range [-mask_, mask_].
@@ -416,9 +418,9 @@ class Trans16x16TestBase {
void RunCoeffCheck() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-mask_, mask_].
@@ -437,15 +439,13 @@ class Trans16x16TestBase {
void RunMemCheck() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-mask_, mask_].
for (int j = 0; j < kNumCoeffs; ++j) {
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
}
if (i == 0) {
@@ -472,24 +472,19 @@ class Trans16x16TestBase {
void RunQuantCheck(int dc_thred, int ac_thred) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 100000;
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
#endif
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-mask_, mask_].
for (int j = 0; j < kNumCoeffs; ++j) {
if (bit_depth_ == VPX_BITS_8)
input_block[j] = rnd.Rand8() - rnd.Rand8();
else
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
}
if (i == 0)
@@ -539,13 +534,13 @@ class Trans16x16TestBase {
void RunInvAccuracyCheck() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
#endif // CONFIG_VP9_HIGHBITDEPTH
for (int i = 0; i < count_test_block; ++i) {
@@ -599,12 +594,12 @@ class Trans16x16TestBase {
const int count_test_block = 10000;
const int eob = 10;
const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan;
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
#endif // CONFIG_VP9_HIGHBITDEPTH
for (int i = 0; i < count_test_block; ++i) {
@@ -933,4 +928,20 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&idct16x16_12,
&idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
MSA, Trans16x16DCT,
::testing::Values(
make_tuple(&vp9_fdct16x16_msa,
&vp9_idct16x16_256_add_msa, 0, VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
MSA, Trans16x16HT,
::testing::Values(
make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 0, VPX_BITS_8),
make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 1, VPX_BITS_8),
make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3,
VPX_BITS_8)));
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
} // namespace

View File

@@ -23,6 +23,7 @@
#include "vp9/common/vp9_entropy.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
using libvpx_test::ACMRandom;
@@ -119,13 +120,13 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
uint32_t max_error = 0;
int64_t total_error = 0;
const int count_test_block = 10000;
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
#endif
for (int i = 0; i < count_test_block; ++i) {
@@ -184,9 +185,9 @@ TEST_P(Trans32x32Test, CoeffCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
for (int i = 0; i < count_test_block; ++i) {
for (int j = 0; j < kNumCoeffs; ++j)
@@ -212,15 +213,13 @@ TEST_P(Trans32x32Test, MemCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 2000;
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-mask_, mask_].
for (int j = 0; j < kNumCoeffs; ++j) {
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
}
if (i == 0) {
@@ -257,13 +256,13 @@ TEST_P(Trans32x32Test, MemCheck) {
TEST_P(Trans32x32Test, InverseAccuracy) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
#endif
for (int i = 0; i < count_test_block; ++i) {
@@ -382,4 +381,14 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fdct32x32_rd_avx2,
&vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
MSA, Trans32x32Test,
::testing::Values(
make_tuple(&vp9_fdct32x32_msa,
&vp9_idct32x32_1024_add_msa, 0, VPX_BITS_8),
make_tuple(&vp9_fdct32x32_rd_msa,
&vp9_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
} // namespace

View File

@@ -133,6 +133,10 @@ class Encoder {
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
void Control(int ctrl_id, struct vpx_svc_parameters *arg) {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
void Control(int ctrl_id, vpx_active_map_t *arg) {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);

View File

@@ -22,6 +22,7 @@
#include "vp9/common/vp9_entropy.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
using libvpx_test::ACMRandom;
@@ -102,13 +103,13 @@ class Trans4x4TestBase {
int64_t total_error = 0;
const int count_test_block = 10000;
for (int i = 0; i < count_test_block; ++i) {
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
#endif
// Initialize a test block with input range [-255, 255].
@@ -142,6 +143,7 @@ class Trans4x4TestBase {
const uint32_t diff =
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
ASSERT_EQ(VPX_BITS_8, bit_depth_);
const uint32_t diff = dst[j] - src[j];
#endif
const uint32_t error = diff * diff;
@@ -163,9 +165,9 @@ class Trans4x4TestBase {
void RunCoeffCheck() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 5000;
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-mask_, mask_].
@@ -184,15 +186,13 @@ class Trans4x4TestBase {
void RunMemCheck() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 5000;
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-mask_, mask_].
for (int j = 0; j < kNumCoeffs; ++j) {
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
}
if (i == 0) {
@@ -219,13 +219,13 @@ class Trans4x4TestBase {
void RunInvAccuracyCheck(int limit) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
#endif
for (int i = 0; i < count_test_block; ++i) {
@@ -536,4 +536,18 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
MSA, Trans4x4DCT,
::testing::Values(
make_tuple(&vp9_fdct4x4_msa, &vp9_idct4x4_16_add_msa, 0, VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
MSA, Trans4x4HT,
::testing::Values(
make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 0, VPX_BITS_8),
make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 1, VPX_BITS_8),
make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 2, VPX_BITS_8),
make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 3, VPX_BITS_8)));
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
} // namespace

View File

@@ -20,11 +20,32 @@
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_scan.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
using libvpx_test::ACMRandom;
namespace {
const int kNumCoeffs = 64;
const double kPi = 3.141592653589793238462643383279502884;
const int kSignBiasMaxDiff255 = 1500;
const int kSignBiasMaxDiff15 = 10000;
typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
int tx_type);
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
int tx_type);
typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
void reference_8x8_dct_1d(const double in[8], double out[8], int stride) {
const double kInvSqrt2 = 0.707106781186547524400844362104;
for (int k = 0; k < 8; k++) {
@@ -59,23 +80,6 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
}
}
using libvpx_test::ACMRandom;
namespace {
const int kSignBiasMaxDiff255 = 1500;
const int kSignBiasMaxDiff15 = 10000;
typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
int tx_type);
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
int tx_type);
typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
vp9_fdct8x8_c(in, out, stride);
@@ -139,8 +143,8 @@ class FwdTrans8x8TestBase {
void RunSignBiasCheck() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_output_block, 64);
DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
int count_sign_block[64][2];
const int count_test_block = 100000;
@@ -210,13 +214,13 @@ class FwdTrans8x8TestBase {
int max_error = 0;
int total_error = 0;
const int count_test_block = 100000;
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
DECLARE_ALIGNED(16, uint8_t, dst[64]);
DECLARE_ALIGNED(16, uint8_t, src[64]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, 64);
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, 64);
DECLARE_ALIGNED(16, uint16_t, dst16[64]);
DECLARE_ALIGNED(16, uint16_t, src16[64]);
#endif
for (int i = 0; i < count_test_block; ++i) {
@@ -287,14 +291,14 @@ class FwdTrans8x8TestBase {
int total_error = 0;
int total_coeff_error = 0;
const int count_test_block = 100000;
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, 64);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_temp_block, 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
DECLARE_ALIGNED(16, uint8_t, dst[64]);
DECLARE_ALIGNED(16, uint8_t, src[64]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, 64);
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, 64);
DECLARE_ALIGNED(16, uint16_t, dst16[64]);
DECLARE_ALIGNED(16, uint16_t, src16[64]);
#endif
for (int i = 0; i < count_test_block; ++i) {
@@ -376,13 +380,13 @@ class FwdTrans8x8TestBase {
void RunInvAccuracyCheck() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
#endif
for (int i = 0; i < count_test_block; ++i) {
@@ -434,9 +438,9 @@ class FwdTrans8x8TestBase {
void RunFwdAccuracyCheck() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_r, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
for (int i = 0; i < count_test_block; ++i) {
double out_r[kNumCoeffs];
@@ -464,12 +468,12 @@ void CompareInvReference(IdctFunc ref_txfm, int thresh) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 10000;
const int eob = 12;
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
#endif
const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;
@@ -777,4 +781,18 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0,
VPX_BITS_8)));
#endif
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
MSA, FwdTrans8x8DCT,
::testing::Values(
make_tuple(&vp9_fdct8x8_msa, &vp9_idct8x8_64_add_msa, 0, VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
MSA, FwdTrans8x8HT,
::testing::Values(
make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8),
make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8),
make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
} // namespace

View File

@@ -137,6 +137,20 @@ void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_NEON_ASM
#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
vp9_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
}
#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
public:
virtual ~Loop8Test6Param() {}
@@ -182,11 +196,11 @@ TEST_P(Loop8Test6Param, OperationCheck) {
const int count_test_block = number_of_iterations;
#if CONFIG_VP9_HIGHBITDEPTH
int32_t bd = bit_depth_;
DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
#else
DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
#endif // CONFIG_VP9_HIGHBITDEPTH
int err_count_total = 0;
int first_failure = -1;
@@ -267,11 +281,11 @@ TEST_P(Loop8Test6Param, ValueCheck) {
const int count_test_block = number_of_iterations;
#if CONFIG_VP9_HIGHBITDEPTH
const int32_t bd = bit_depth_;
DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
#else
DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
#endif // CONFIG_VP9_HIGHBITDEPTH
int err_count_total = 0;
int first_failure = -1;
@@ -338,11 +352,11 @@ TEST_P(Loop8Test9Param, OperationCheck) {
const int count_test_block = number_of_iterations;
#if CONFIG_VP9_HIGHBITDEPTH
const int32_t bd = bit_depth_;
DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
#else
DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
#endif // CONFIG_VP9_HIGHBITDEPTH
int err_count_total = 0;
int first_failure = -1;
@@ -440,11 +454,11 @@ TEST_P(Loop8Test9Param, ValueCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = number_of_iterations;
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
#else
DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
#endif // CONFIG_VP9_HIGHBITDEPTH
int err_count_total = 0;
int first_failure = -1;
@@ -676,4 +690,27 @@ INSTANTIATE_TEST_CASE_P(
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_NEON
#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
INSTANTIATE_TEST_CASE_P(
MSA, Loop8Test6Param,
::testing::Values(
make_tuple(&vp9_lpf_horizontal_8_msa, &vp9_lpf_horizontal_8_c, 8, 1),
make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 1),
make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 2),
make_tuple(&vp9_lpf_vertical_8_msa, &vp9_lpf_vertical_8_c, 8, 1),
make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1)));
INSTANTIATE_TEST_CASE_P(
MSA, Loop8Test9Param,
::testing::Values(
make_tuple(&vp9_lpf_horizontal_4_dual_msa,
&vp9_lpf_horizontal_4_dual_c, 8),
make_tuple(&vp9_lpf_horizontal_8_dual_msa,
&vp9_lpf_horizontal_8_dual_c, 8),
make_tuple(&vp9_lpf_vertical_4_dual_msa,
&vp9_lpf_vertical_4_dual_c, 8),
make_tuple(&vp9_lpf_vertical_8_dual_msa,
&vp9_lpf_vertical_8_dual_c, 8)));
#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
} // namespace

View File

@@ -42,6 +42,10 @@ class MD5 {
}
}
void Add(const uint8_t *data, size_t size) {
MD5Update(&md5_, data, static_cast<uint32_t>(size));
}
const char *Get(void) {
static const char hex[16] = {
'0', '1', '2', '3', '4', '5', '6', '7',

View File

@@ -74,16 +74,16 @@ TEST_P(PartialIDctTest, RunQuantCheck) {
FAIL() << "Wrong Size!";
break;
}
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block1, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block2, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
const int count_test_block = 1000;
const int block_size = size * size;
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kMaxNumCoeffs);
DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
int max_error = 0;
for (int i = 0; i < count_test_block; ++i) {
@@ -153,10 +153,10 @@ TEST_P(PartialIDctTest, ResultsMatch) {
FAIL() << "Wrong Size!";
break;
}
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block1, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block2, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
const int count_test_block = 1000;
const int max_coeff = 32766 / 4;
const int block_size = size * size;
@@ -305,4 +305,38 @@ INSTANTIATE_TEST_CASE_P(
TX_8X8, 12)));
#endif
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
MSA, PartialIDctTest,
::testing::Values(
make_tuple(&vp9_fdct32x32_c,
&vp9_idct32x32_1024_add_c,
&vp9_idct32x32_34_add_msa,
TX_32X32, 34),
make_tuple(&vp9_fdct32x32_c,
&vp9_idct32x32_1024_add_c,
&vp9_idct32x32_1_add_msa,
TX_32X32, 1),
make_tuple(&vp9_fdct16x16_c,
&vp9_idct16x16_256_add_c,
&vp9_idct16x16_10_add_msa,
TX_16X16, 10),
make_tuple(&vp9_fdct16x16_c,
&vp9_idct16x16_256_add_c,
&vp9_idct16x16_1_add_msa,
TX_16X16, 1),
make_tuple(&vp9_fdct8x8_c,
&vp9_idct8x8_64_add_c,
&vp9_idct8x8_12_add_msa,
TX_8X8, 10),
make_tuple(&vp9_fdct8x8_c,
&vp9_idct8x8_64_add_c,
&vp9_idct8x8_1_add_msa,
TX_8X8, 1),
make_tuple(&vp9_fdct4x4_c,
&vp9_idct4x4_16_add_c,
&vp9_idct4x4_1_add_msa,
TX_4X4, 1)));
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
} // namespace

File diff suppressed because it is too large Load Diff

View File

@@ -453,6 +453,7 @@ TEST_F(SvcTest, OnePassEncodeOneFrame) {
TEST_F(SvcTest, OnePassEncodeThreeFrames) {
codec_enc_.g_pass = VPX_RC_ONE_PASS;
codec_enc_.g_lag_in_frames = 0;
vpx_fixed_buf outputs[3];
memset(&outputs[0], 0, sizeof(outputs));
Pass2EncodeNFrames(NULL, 3, 2, &outputs[0]);

View File

@@ -12,6 +12,7 @@ LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_440.yuv
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420_a10-1.y4m
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m

File diff suppressed because it is too large Load Diff

View File

@@ -66,6 +66,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../tools_common.h
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.cc
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.h
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += webm_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_skip_loopfilter_test.cc
endif
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += decode_api_test.cc
@@ -163,6 +164,9 @@ endif # VP9
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9_DECODER) := test_intra_pred_speed.cc
TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9_DECODER) += ../md5_utils.h ../md5_utils.c
endif # CONFIG_SHARED
include $(SRC_PATH_BARE)/test/test-data.mk

View File

@@ -0,0 +1,384 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Test and time VP9 intra-predictor functions
#include <stdio.h>
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vp9_rtcd.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/md5_helper.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/vpx_timer.h"
// -----------------------------------------------------------------------------
namespace {
typedef void (*VpxPredFunc)(uint8_t *dst, ptrdiff_t y_stride,
const uint8_t *above, const uint8_t *left);
const int kNumVp9IntraPredFuncs = 13;
const char *kVp9IntraPredNames[kNumVp9IntraPredFuncs] = {
"DC_PRED", "DC_LEFT_PRED", "DC_TOP_PRED", "DC_128_PRED", "V_PRED", "H_PRED",
"D45_PRED", "D135_PRED", "D117_PRED", "D153_PRED", "D207_PRED", "D63_PRED",
"TM_PRED"
};
void TestIntraPred(const char name[], VpxPredFunc const *pred_funcs,
const char *const pred_func_names[], int num_funcs,
const char *const signatures[], int block_size,
int num_pixels_per_test) {
libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
const int kBPS = 32;
const int kTotalPixels = 32 * kBPS;
DECLARE_ALIGNED(16, uint8_t, src[kTotalPixels]);
DECLARE_ALIGNED(16, uint8_t, ref_src[kTotalPixels]);
DECLARE_ALIGNED(16, uint8_t, left[kBPS]);
DECLARE_ALIGNED(16, uint8_t, above_mem[2 * kBPS + 16]);
uint8_t *const above = above_mem + 16;
for (int i = 0; i < kTotalPixels; ++i) ref_src[i] = rnd.Rand8();
for (int i = 0; i < kBPS; ++i) left[i] = rnd.Rand8();
for (int i = -1; i < kBPS; ++i) above[i] = rnd.Rand8();
const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);
// some code assumes the top row has been extended:
// d45/d63 C-code, for instance, but not the assembly.
// TODO(jzern): this style of extension isn't strictly necessary.
ASSERT_LE(block_size, kBPS);
memset(above + block_size, above[block_size - 1], 2 * kBPS - block_size);
for (int k = 0; k < num_funcs; ++k) {
if (pred_funcs[k] == NULL) continue;
memcpy(src, ref_src, sizeof(src));
vpx_usec_timer timer;
vpx_usec_timer_start(&timer);
for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
pred_funcs[k](src, kBPS, above, left);
}
libvpx_test::ClearSystemState();
vpx_usec_timer_mark(&timer);
const int elapsed_time =
static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
libvpx_test::MD5 md5;
md5.Add(src, sizeof(src));
printf("Mode %s[%12s]: %5d ms MD5: %s\n", name, pred_func_names[k],
elapsed_time, md5.Get());
EXPECT_STREQ(signatures[k], md5.Get());
}
}
void TestIntraPred4(VpxPredFunc const *pred_funcs) {
static const int kNumVp9IntraFuncs = 13;
static const char *const kSignatures[kNumVp9IntraFuncs] = {
"4334156168b34ab599d9b5b30f522fe9",
"bc4649d5ba47c7ff178d92e475960fb0",
"8d316e5933326dcac24e1064794b5d12",
"a27270fed024eafd762c95de85f4da51",
"c33dff000d4256c2b8f3bf9e9bab14d2",
"44d8cddc2ad8f79b8ed3306051722b4f",
"eb54839b2bad6699d8946f01ec041cd0",
"ecb0d56ae5f677ea45127ce9d5c058e4",
"0b7936841f6813da818275944895b574",
"9117972ef64f91a58ff73e1731c81db2",
"c56d5e8c729e46825f46dd5d3b5d508a",
"c0889e2039bcf7bcb5d2f33cdca69adc",
"309a618577b27c648f9c5ee45252bc8f",
};
TestIntraPred("Intra4", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
kSignatures, 4, 4 * 4 * kNumVp9IntraFuncs);
}
void TestIntraPred8(VpxPredFunc const *pred_funcs) {
static const int kNumVp9IntraFuncs = 13;
static const char *const kSignatures[kNumVp9IntraFuncs] = {
"7694ddeeefed887faf9d339d18850928",
"7d726b1213591b99f736be6dec65065b",
"19c5711281357a485591aaf9c96c0a67",
"ba6b66877a089e71cd938e3b8c40caac",
"802440c93317e0f8ba93fab02ef74265",
"9e09a47a15deb0b9d8372824f9805080",
"b7c2d8c662268c0c427da412d7b0311d",
"78339c1c60bb1d67d248ab8c4da08b7f",
"5c97d70f7d47de1882a6cd86c165c8a9",
"8182bf60688b42205acd95e59e967157",
"08323400005a297f16d7e57e7fe1eaac",
"95f7bfc262329a5849eda66d8f7c68ce",
"815b75c8e0d91cc1ae766dc5d3e445a3",
};
TestIntraPred("Intra8", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
kSignatures, 8, 8 * 8 * kNumVp9IntraFuncs);
}
void TestIntraPred16(VpxPredFunc const *pred_funcs) {
static const int kNumVp9IntraFuncs = 13;
static const char *const kSignatures[kNumVp9IntraFuncs] = {
"b40dbb555d5d16a043dc361e6694fe53",
"fb08118cee3b6405d64c1fd68be878c6",
"6c190f341475c837cc38c2e566b64875",
"db5c34ccbe2c7f595d9b08b0dc2c698c",
"a62cbfd153a1f0b9fed13e62b8408a7a",
"143df5b4c89335e281103f610f5052e4",
"d87feb124107cdf2cfb147655aa0bb3c",
"7841fae7d4d47b519322e6a03eeed9dc",
"f6ebed3f71cbcf8d6d0516ce87e11093",
"3cc480297dbfeed01a1c2d78dd03d0c5",
"b9f69fa6532b372c545397dcb78ef311",
"a8fe1c70432f09d0c20c67bdb6432c4d",
"b8a41aa968ec108af447af4217cba91b",
};
TestIntraPred("Intra16", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
kSignatures, 16, 16 * 16 * kNumVp9IntraFuncs);
}
void TestIntraPred32(VpxPredFunc const *pred_funcs) {
static const int kNumVp9IntraFuncs = 13;
static const char *const kSignatures[kNumVp9IntraFuncs] = {
"558541656d84f9ae7896db655826febe",
"b3587a1f9a01495fa38c8cd3c8e2a1bf",
"4c6501e64f25aacc55a2a16c7e8f0255",
"b3b01379ba08916ef6b1b35f7d9ad51c",
"0f1eb38b6cbddb3d496199ef9f329071",
"911c06efb9ed1c3b4c104b232b55812f",
"9225beb0ddfa7a1d24eaa1be430a6654",
"0a6d584a44f8db9aa7ade2e2fdb9fc9e",
"b01c9076525216925f3456f034fb6eee",
"d267e20ad9e5cd2915d1a47254d3d149",
"ed012a4a5da71f36c2393023184a0e59",
"f162b51ed618d28b936974cff4391da5",
"9e1370c6d42e08d357d9612c93a71cfc",
};
TestIntraPred("Intra32", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
kSignatures, 32, 32 * 32 * kNumVp9IntraFuncs);
}
} // namespace
// Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors
// to |test_func|. The test name is 'arch.test_func', e.g., C.TestIntraPred4.
#define INTRA_PRED_TEST(arch, test_func, dc, dc_left, dc_top, dc_128, v, h, \
d45, d135, d117, d153, d207, d63, tm) \
TEST(arch, test_func) { \
static const VpxPredFunc vp9_intra_pred[] = { \
dc, dc_left, dc_top, dc_128, v, h, d45, \
d135, d117, d153, d207, d63, tm}; \
test_func(vp9_intra_pred); \
}
// -----------------------------------------------------------------------------
// 4x4
INTRA_PRED_TEST(C, TestIntraPred4, vp9_dc_predictor_4x4_c,
vp9_dc_left_predictor_4x4_c, vp9_dc_top_predictor_4x4_c,
vp9_dc_128_predictor_4x4_c, vp9_v_predictor_4x4_c,
vp9_h_predictor_4x4_c, vp9_d45_predictor_4x4_c,
vp9_d135_predictor_4x4_c, vp9_d117_predictor_4x4_c,
vp9_d153_predictor_4x4_c, vp9_d207_predictor_4x4_c,
vp9_d63_predictor_4x4_c, vp9_tm_predictor_4x4_c)
#if HAVE_SSE
INTRA_PRED_TEST(SSE, TestIntraPred4, vp9_dc_predictor_4x4_sse,
vp9_dc_left_predictor_4x4_sse, vp9_dc_top_predictor_4x4_sse,
vp9_dc_128_predictor_4x4_sse, vp9_v_predictor_4x4_sse, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, vp9_tm_predictor_4x4_sse)
#endif // HAVE_SSE
#if HAVE_SSSE3
INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL,
vp9_h_predictor_4x4_ssse3, vp9_d45_predictor_4x4_ssse3, NULL,
NULL, vp9_d153_predictor_4x4_ssse3,
vp9_d207_predictor_4x4_ssse3, vp9_d63_predictor_4x4_ssse3, NULL)
#endif // HAVE_SSSE3
#if HAVE_DSPR2
INTRA_PRED_TEST(DSPR2, TestIntraPred4, vp9_dc_predictor_4x4_dspr2, NULL, NULL,
NULL, NULL, vp9_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL,
NULL, NULL, vp9_tm_predictor_4x4_dspr2)
#endif // HAVE_DSPR2
#if HAVE_NEON
INTRA_PRED_TEST(NEON, TestIntraPred4, vp9_dc_predictor_4x4_neon,
vp9_dc_left_predictor_4x4_neon, vp9_dc_top_predictor_4x4_neon,
vp9_dc_128_predictor_4x4_neon, vp9_v_predictor_4x4_neon,
vp9_h_predictor_4x4_neon, vp9_d45_predictor_4x4_neon,
vp9_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL,
vp9_tm_predictor_4x4_neon)
#endif // HAVE_NEON
#if HAVE_MSA
INTRA_PRED_TEST(MSA, TestIntraPred4, vp9_dc_predictor_4x4_msa,
vp9_dc_left_predictor_4x4_msa, vp9_dc_top_predictor_4x4_msa,
vp9_dc_128_predictor_4x4_msa, vp9_v_predictor_4x4_msa,
vp9_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL,
NULL, vp9_tm_predictor_4x4_msa)
#endif // HAVE_MSA
// -----------------------------------------------------------------------------
// 8x8
INTRA_PRED_TEST(C, TestIntraPred8, vp9_dc_predictor_8x8_c,
vp9_dc_left_predictor_8x8_c, vp9_dc_top_predictor_8x8_c,
vp9_dc_128_predictor_8x8_c, vp9_v_predictor_8x8_c,
vp9_h_predictor_8x8_c, vp9_d45_predictor_8x8_c,
vp9_d135_predictor_8x8_c, vp9_d117_predictor_8x8_c,
vp9_d153_predictor_8x8_c, vp9_d207_predictor_8x8_c,
vp9_d63_predictor_8x8_c, vp9_tm_predictor_8x8_c)
#if HAVE_SSE
INTRA_PRED_TEST(SSE, TestIntraPred8, vp9_dc_predictor_8x8_sse,
vp9_dc_left_predictor_8x8_sse, vp9_dc_top_predictor_8x8_sse,
vp9_dc_128_predictor_8x8_sse, vp9_v_predictor_8x8_sse, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL)
#endif // HAVE_SSE
#if HAVE_SSE2
INTRA_PRED_TEST(SSE2, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, vp9_tm_predictor_8x8_sse2)
#endif // HAVE_SSE2
#if HAVE_SSSE3
INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL,
vp9_h_predictor_8x8_ssse3, vp9_d45_predictor_8x8_ssse3, NULL,
NULL, vp9_d153_predictor_8x8_ssse3,
vp9_d207_predictor_8x8_ssse3, vp9_d63_predictor_8x8_ssse3, NULL)
#endif // HAVE_SSSE3
#if HAVE_DSPR2
INTRA_PRED_TEST(DSPR2, TestIntraPred8, vp9_dc_predictor_8x8_dspr2, NULL, NULL,
NULL, NULL, vp9_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL,
NULL, NULL, vp9_tm_predictor_8x8_c)
#endif // HAVE_DSPR2
#if HAVE_NEON
INTRA_PRED_TEST(NEON, TestIntraPred8, vp9_dc_predictor_8x8_neon,
vp9_dc_left_predictor_8x8_neon, vp9_dc_top_predictor_8x8_neon,
vp9_dc_128_predictor_8x8_neon, vp9_v_predictor_8x8_neon,
vp9_h_predictor_8x8_neon, vp9_d45_predictor_8x8_neon, NULL,
NULL, NULL, NULL, NULL, vp9_tm_predictor_8x8_neon)
#endif // HAVE_NEON
#if HAVE_MSA
INTRA_PRED_TEST(MSA, TestIntraPred8, vp9_dc_predictor_8x8_msa,
vp9_dc_left_predictor_8x8_msa, vp9_dc_top_predictor_8x8_msa,
vp9_dc_128_predictor_8x8_msa, vp9_v_predictor_8x8_msa,
vp9_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL,
NULL, vp9_tm_predictor_8x8_msa)
#endif // HAVE_MSA
// -----------------------------------------------------------------------------
// 16x16
INTRA_PRED_TEST(C, TestIntraPred16, vp9_dc_predictor_16x16_c,
vp9_dc_left_predictor_16x16_c, vp9_dc_top_predictor_16x16_c,
vp9_dc_128_predictor_16x16_c, vp9_v_predictor_16x16_c,
vp9_h_predictor_16x16_c, vp9_d45_predictor_16x16_c,
vp9_d135_predictor_16x16_c, vp9_d117_predictor_16x16_c,
vp9_d153_predictor_16x16_c, vp9_d207_predictor_16x16_c,
vp9_d63_predictor_16x16_c, vp9_tm_predictor_16x16_c)
#if HAVE_SSE2
INTRA_PRED_TEST(SSE2, TestIntraPred16, vp9_dc_predictor_16x16_sse2,
vp9_dc_left_predictor_16x16_sse2,
vp9_dc_top_predictor_16x16_sse2,
vp9_dc_128_predictor_16x16_sse2, vp9_v_predictor_16x16_sse2,
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
vp9_tm_predictor_16x16_sse2)
#endif // HAVE_SSE2
#if HAVE_SSSE3
INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL,
vp9_h_predictor_16x16_ssse3, vp9_d45_predictor_16x16_ssse3,
NULL, NULL, vp9_d153_predictor_16x16_ssse3,
vp9_d207_predictor_16x16_ssse3, vp9_d63_predictor_16x16_ssse3,
NULL)
#endif // HAVE_SSSE3
#if HAVE_DSPR2
INTRA_PRED_TEST(DSPR2, TestIntraPred16, vp9_dc_predictor_16x16_dspr2, NULL,
NULL, NULL, NULL, vp9_h_predictor_16x16_dspr2, NULL, NULL, NULL,
NULL, NULL, NULL, NULL)
#endif // HAVE_DSPR2
#if HAVE_NEON
INTRA_PRED_TEST(NEON, TestIntraPred16, vp9_dc_predictor_16x16_neon,
vp9_dc_left_predictor_16x16_neon,
vp9_dc_top_predictor_16x16_neon,
vp9_dc_128_predictor_16x16_neon, vp9_v_predictor_16x16_neon,
vp9_h_predictor_16x16_neon, vp9_d45_predictor_16x16_neon, NULL,
NULL, NULL, NULL, NULL, vp9_tm_predictor_16x16_neon)
#endif // HAVE_NEON
#if HAVE_MSA
INTRA_PRED_TEST(MSA, TestIntraPred16, vp9_dc_predictor_16x16_msa,
vp9_dc_left_predictor_16x16_msa, vp9_dc_top_predictor_16x16_msa,
vp9_dc_128_predictor_16x16_msa, vp9_v_predictor_16x16_msa,
vp9_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL,
NULL, vp9_tm_predictor_16x16_msa)
#endif // HAVE_MSA
// -----------------------------------------------------------------------------
// 32x32
INTRA_PRED_TEST(C, TestIntraPred32, vp9_dc_predictor_32x32_c,
vp9_dc_left_predictor_32x32_c, vp9_dc_top_predictor_32x32_c,
vp9_dc_128_predictor_32x32_c, vp9_v_predictor_32x32_c,
vp9_h_predictor_32x32_c, vp9_d45_predictor_32x32_c,
vp9_d135_predictor_32x32_c, vp9_d117_predictor_32x32_c,
vp9_d153_predictor_32x32_c, vp9_d207_predictor_32x32_c,
vp9_d63_predictor_32x32_c, vp9_tm_predictor_32x32_c)
#if HAVE_SSE2
#if ARCH_X86_64
INTRA_PRED_TEST(SSE2, TestIntraPred32, vp9_dc_predictor_32x32_sse2,
vp9_dc_left_predictor_32x32_sse2,
vp9_dc_top_predictor_32x32_sse2,
vp9_dc_128_predictor_32x32_sse2, vp9_v_predictor_32x32_sse2,
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
vp9_tm_predictor_32x32_sse2)
#else
INTRA_PRED_TEST(SSE2, TestIntraPred32, vp9_dc_predictor_32x32_sse2,
vp9_dc_left_predictor_32x32_sse2,
vp9_dc_top_predictor_32x32_sse2,
vp9_dc_128_predictor_32x32_sse2, vp9_v_predictor_32x32_sse2,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
#endif // ARCH_X86_64
#endif // HAVE_SSE2
#if HAVE_SSSE3
INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL,
vp9_h_predictor_32x32_ssse3, vp9_d45_predictor_32x32_ssse3,
NULL, NULL, vp9_d153_predictor_32x32_ssse3,
vp9_d207_predictor_32x32_ssse3, vp9_d63_predictor_32x32_ssse3,
NULL)
#endif // HAVE_SSSE3
#if HAVE_NEON
INTRA_PRED_TEST(NEON, TestIntraPred32, vp9_dc_predictor_32x32_neon,
vp9_dc_left_predictor_32x32_neon,
vp9_dc_top_predictor_32x32_neon,
vp9_dc_128_predictor_32x32_neon, vp9_v_predictor_32x32_neon,
vp9_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL,
vp9_tm_predictor_32x32_neon)
#endif // HAVE_NEON
#if HAVE_MSA
INTRA_PRED_TEST(MSA, TestIntraPred32, vp9_dc_predictor_32x32_msa,
vp9_dc_left_predictor_32x32_msa, vp9_dc_top_predictor_32x32_msa,
vp9_dc_128_predictor_32x32_msa, vp9_v_predictor_32x32_msa,
vp9_h_predictor_32x32_msa, NULL, NULL, NULL, NULL, NULL,
NULL, vp9_tm_predictor_32x32_msa)
#endif // HAVE_MSA
#include "test/test_libvpx.cc"

View File

@@ -19,6 +19,7 @@ extern void vp8_rtcd();
#if CONFIG_VP9
extern void vp9_rtcd();
#endif // CONFIG_VP9
extern void vpx_dsp_rtcd();
extern void vpx_scale_rtcd();
}
#include "third_party/googletest/src/include/gtest/gtest.h"
@@ -37,21 +38,21 @@ int main(int argc, char **argv) {
#if ARCH_X86 || ARCH_X86_64
const int simd_caps = x86_simd_caps();
if (!(simd_caps & HAS_MMX))
append_negative_gtest_filter(":MMX/*");
append_negative_gtest_filter(":MMX.*:MMX/*");
if (!(simd_caps & HAS_SSE))
append_negative_gtest_filter(":SSE/*");
append_negative_gtest_filter(":SSE.*:SSE/*");
if (!(simd_caps & HAS_SSE2))
append_negative_gtest_filter(":SSE2/*");
append_negative_gtest_filter(":SSE2.*:SSE2/*");
if (!(simd_caps & HAS_SSE3))
append_negative_gtest_filter(":SSE3/*");
append_negative_gtest_filter(":SSE3.*:SSE3/*");
if (!(simd_caps & HAS_SSSE3))
append_negative_gtest_filter(":SSSE3/*");
append_negative_gtest_filter(":SSSE3.*:SSSE3/*");
if (!(simd_caps & HAS_SSE4_1))
append_negative_gtest_filter(":SSE4_1/*");
append_negative_gtest_filter(":SSE4_1.*:SSE4_1/*");
if (!(simd_caps & HAS_AVX))
append_negative_gtest_filter(":AVX/*");
append_negative_gtest_filter(":AVX.*:AVX/*");
if (!(simd_caps & HAS_AVX2))
append_negative_gtest_filter(":AVX2/*");
append_negative_gtest_filter(":AVX2.*:AVX2/*");
#endif
#if !CONFIG_SHARED
@@ -64,6 +65,7 @@ int main(int argc, char **argv) {
#if CONFIG_VP9
vp9_rtcd();
#endif // CONFIG_VP9
vpx_dsp_rtcd();
vpx_scale_rtcd();
#endif // !CONFIG_SHARED

View File

@@ -165,7 +165,10 @@ const char *const kVP9TestVectors[] = {
"vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm",
"vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf",
"vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf",
#if !CONFIG_SIZE_LIMIT || \
(DECODE_WIDTH_LIMIT >= 20400 && DECODE_HEIGHT_LIMIT >= 120)
"vp90-2-13-largescaling.webm",
#endif
"vp90-2-14-resize-fp-tiles-1-16.webm",
"vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm",
"vp90-2-14-resize-fp-tiles-1-2.webm", "vp90-2-14-resize-fp-tiles-1-4.webm",

View File

@@ -408,6 +408,9 @@ YUV_RAW_INPUT="${LIBVPX_TEST_DATA_PATH}/hantro_collage_w352h288.yuv"
YUV_RAW_INPUT_WIDTH=352
YUV_RAW_INPUT_HEIGHT=288
Y4M_NOSQ_PAR_INPUT="${LIBVPX_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m"
Y4M_720P_INPUT="${LIBVPX_TEST_DATA_PATH}/niklas_1280_720_30.y4m"
# Setup a trap function to clean up after tests complete.
trap cleanup EXIT
@@ -429,6 +432,7 @@ vlog "$(basename "${0%.*}") test configuration:
VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT}
YUV_RAW_INPUT=${YUV_RAW_INPUT}
YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH}
YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}"
YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}
Y4M_NOSQ_PAR_INPUT=${Y4M_NOSQ_PAR_INPUT}"
fi # End $VPX_TEST_TOOLS_COMMON_SH pseudo include guard.

File diff suppressed because it is too large Load Diff

View File

@@ -52,13 +52,13 @@ TEST_P(VP8DenoiserTest, BitexactCheck) {
// mc_avg_block is the denoised reference block,
// avg_block_c is the denoised result from C code,
// avg_block_sse2 is the denoised result from SSE2 code.
DECLARE_ALIGNED_ARRAY(16, uint8_t, sig_block_c, kNumPixels);
DECLARE_ALIGNED(16, uint8_t, sig_block_c[kNumPixels]);
// Since in VP8 denoiser, the source signal will be changed,
// we need another copy of the source signal as the input of sse2 code.
DECLARE_ALIGNED_ARRAY(16, uint8_t, sig_block_sse2, kNumPixels);
DECLARE_ALIGNED_ARRAY(16, uint8_t, mc_avg_block, kNumPixels);
DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_c, kNumPixels);
DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_sse2, kNumPixels);
DECLARE_ALIGNED(16, uint8_t, sig_block_sse2[kNumPixels]);
DECLARE_ALIGNED(16, uint8_t, mc_avg_block[kNumPixels]);
DECLARE_ALIGNED(16, uint8_t, avg_block_c[kNumPixels]);
DECLARE_ALIGNED(16, uint8_t, avg_block_sse2[kNumPixels]);
for (int i = 0; i < count_test_block; ++i) {
// Generate random motion magnitude, 20% of which exceed the threshold.

View File

@@ -121,6 +121,79 @@ class AverageTest
}
};
typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref,
const int ref_stride, const int height);
typedef std::tr1::tuple<int, IntProRowFunc, IntProRowFunc> IntProRowParam;
class IntProRowTest
: public AverageTestBase,
public ::testing::WithParamInterface<IntProRowParam> {
public:
IntProRowTest()
: AverageTestBase(16, GET_PARAM(0)),
hbuf_asm_(NULL),
hbuf_c_(NULL) {
asm_func_ = GET_PARAM(1);
c_func_ = GET_PARAM(2);
}
protected:
virtual void SetUp() {
hbuf_asm_ = reinterpret_cast<int16_t*>(
vpx_memalign(kDataAlignment, sizeof(*hbuf_asm_) * 16));
hbuf_c_ = reinterpret_cast<int16_t*>(
vpx_memalign(kDataAlignment, sizeof(*hbuf_c_) * 16));
}
virtual void TearDown() {
vpx_free(hbuf_c_);
hbuf_c_ = NULL;
vpx_free(hbuf_asm_);
hbuf_asm_ = NULL;
}
void RunComparison() {
ASM_REGISTER_STATE_CHECK(c_func_(hbuf_c_, source_data_, 0, height_));
ASM_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, 0, height_));
EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * 16))
<< "Output mismatch";
}
private:
IntProRowFunc asm_func_;
IntProRowFunc c_func_;
int16_t *hbuf_asm_;
int16_t *hbuf_c_;
};
typedef int16_t (*IntProColFunc)(uint8_t const *ref, const int width);
typedef std::tr1::tuple<int, IntProColFunc, IntProColFunc> IntProColParam;
class IntProColTest
: public AverageTestBase,
public ::testing::WithParamInterface<IntProColParam> {
public:
IntProColTest() : AverageTestBase(GET_PARAM(0), 1), sum_asm_(0), sum_c_(0) {
asm_func_ = GET_PARAM(1);
c_func_ = GET_PARAM(2);
}
protected:
void RunComparison() {
ASM_REGISTER_STATE_CHECK(sum_c_ = c_func_(source_data_, width_));
ASM_REGISTER_STATE_CHECK(sum_asm_ = asm_func_(source_data_, width_));
EXPECT_EQ(sum_c_, sum_asm_) << "Output mismatch";
}
private:
IntProColFunc asm_func_;
IntProColFunc c_func_;
int16_t sum_asm_;
int16_t sum_c_;
};
uint8_t* AverageTestBase::source_data_ = NULL;
@@ -143,6 +216,36 @@ TEST_P(AverageTest, Random) {
}
}
TEST_P(IntProRowTest, MinValue) {
FillConstant(0);
RunComparison();
}
TEST_P(IntProRowTest, MaxValue) {
FillConstant(255);
RunComparison();
}
TEST_P(IntProRowTest, Random) {
FillRandom();
RunComparison();
}
TEST_P(IntProColTest, MinValue) {
FillConstant(0);
RunComparison();
}
TEST_P(IntProColTest, MaxValue) {
FillConstant(255);
RunComparison();
}
TEST_P(IntProColTest, Random) {
FillRandom();
RunComparison();
}
using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P(
@@ -151,7 +254,6 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(16, 16, 1, 8, &vp9_avg_8x8_c),
make_tuple(16, 16, 1, 4, &vp9_avg_4x4_c)));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
SSE2, AverageTest,
@@ -163,6 +265,17 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(16, 16, 5, 4, &vp9_avg_4x4_sse2),
make_tuple(32, 32, 15, 4, &vp9_avg_4x4_sse2)));
INSTANTIATE_TEST_CASE_P(
SSE2, IntProRowTest, ::testing::Values(
make_tuple(16, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c),
make_tuple(32, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c),
make_tuple(64, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c)));
INSTANTIATE_TEST_CASE_P(
SSE2, IntProColTest, ::testing::Values(
make_tuple(16, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c),
make_tuple(32, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c),
make_tuple(64, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c)));
#endif
#if HAVE_NEON
@@ -175,4 +288,16 @@ INSTANTIATE_TEST_CASE_P(
#endif
#if HAVE_MSA
INSTANTIATE_TEST_CASE_P(
MSA, AverageTest,
::testing::Values(
make_tuple(16, 16, 0, 8, &vp9_avg_8x8_msa),
make_tuple(16, 16, 5, 8, &vp9_avg_8x8_msa),
make_tuple(32, 32, 15, 8, &vp9_avg_8x8_msa),
make_tuple(16, 16, 0, 4, &vp9_avg_4x4_msa),
make_tuple(16, 16, 5, 4, &vp9_avg_4x4_msa),
make_tuple(32, 32, 15, 4, &vp9_avg_4x4_msa)));
#endif
} // namespace

View File

@@ -52,10 +52,10 @@ TEST_P(VP9DenoiserTest, BitexactCheck) {
// mc_avg_block is the denoised reference block,
// avg_block_c is the denoised result from C code,
// avg_block_sse2 is the denoised result from SSE2 code.
DECLARE_ALIGNED_ARRAY(16, uint8_t, sig_block, kNumPixels);
DECLARE_ALIGNED_ARRAY(16, uint8_t, mc_avg_block, kNumPixels);
DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_c, kNumPixels);
DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_sse2, kNumPixels);
DECLARE_ALIGNED(16, uint8_t, sig_block[kNumPixels]);
DECLARE_ALIGNED(16, uint8_t, mc_avg_block[kNumPixels]);
DECLARE_ALIGNED(16, uint8_t, avg_block_c[kNumPixels]);
DECLARE_ALIGNED(16, uint8_t, avg_block_sse2[kNumPixels]);
for (int i = 0; i < count_test_block; ++i) {
// Generate random motion magnitude, 20% of which exceed the threshold.

View File

@@ -21,6 +21,7 @@
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_entropy.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
using libvpx_test::ACMRandom;
@@ -57,8 +58,8 @@ class ErrorBlockTest
TEST_P(ErrorBlockTest, OperationCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, 4096);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff, 4096);
DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
int err_count_total = 0;
int first_failure = -1;
intptr_t block_size;
@@ -90,8 +91,8 @@ TEST_P(ErrorBlockTest, OperationCheck) {
TEST_P(ErrorBlockTest, ExtremeValues) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, 4096);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff, 4096);
DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
int err_count_total = 0;
int first_failure = -1;
intptr_t block_size;

View File

@@ -120,10 +120,10 @@ class VP9IntraPredTest
TEST_P(VP9IntraPredTest, IntraPredTests) {
// max block size is 32
DECLARE_ALIGNED_ARRAY(16, uint16_t, left_col, 2*32);
DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 2*32+32);
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst, 3 * 32 * 32);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_dst, 3 * 32 * 32);
DECLARE_ALIGNED(16, uint16_t, left_col[2*32]);
DECLARE_ALIGNED(16, uint16_t, above_data[2*32+32]);
DECLARE_ALIGNED(16, uint16_t, dst[3 * 32 * 32]);
DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 32 * 32]);
RunTest(left_col, above_data, dst, ref_dst);
}

View File

@@ -21,6 +21,8 @@
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_scan.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
using libvpx_test::ACMRandom;
@@ -80,18 +82,18 @@ class VP9Quantize32Test : public ::testing::TestWithParam<QuantizeParam> {
TEST_P(VP9QuantizeTest, OperationCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[256]);
DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[256]);
DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[256]);
DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[256]);
DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[256]);
DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
int err_count_total = 0;
int first_failure = -1;
for (int i = 0; i < number_of_iterations; ++i) {
@@ -139,18 +141,18 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
TEST_P(VP9Quantize32Test, OperationCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[1024]);
DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[1024]);
DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[1024]);
DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[1024]);
DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[1024]);
DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
int err_count_total = 0;
int first_failure = -1;
for (int i = 0; i < number_of_iterations; ++i) {
@@ -198,18 +200,18 @@ TEST_P(VP9Quantize32Test, OperationCheck) {
TEST_P(VP9QuantizeTest, EOBCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[256]);
DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[256]);
DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[256]);
DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[256]);
DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[256]);
DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
int err_count_total = 0;
int first_failure = -1;
for (int i = 0; i < number_of_iterations; ++i) {
@@ -262,18 +264,18 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
TEST_P(VP9Quantize32Test, EOBCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[1024]);
DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[1024]);
DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[1024]);
DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[1024]);
DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[1024]);
DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
int err_count_total = 0;
int first_failure = -1;
for (int i = 0; i < number_of_iterations; ++i) {

View File

@@ -0,0 +1,180 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <string>
#include "test/codec_factory.h"
#include "test/decode_test_driver.h"
#include "test/md5_helper.h"
#include "test/util.h"
#include "test/webm_video_source.h"
namespace {
const char kVp9TestFile[] = "vp90-2-08-tile_1x8_frame_parallel.webm";
const char kVp9Md5File[] = "vp90-2-08-tile_1x8_frame_parallel.webm.md5";
// Class for testing shutting off the loop filter.
class SkipLoopFilterTest {
public:
SkipLoopFilterTest()
: video_(NULL),
decoder_(NULL),
md5_file_(NULL) {}
~SkipLoopFilterTest() {
if (md5_file_ != NULL)
fclose(md5_file_);
delete decoder_;
delete video_;
}
// If |threads| > 0 then set the decoder with that number of threads.
void Init(int num_threads) {
expected_md5_[0] = '\0';
junk_[0] = '\0';
video_ = new libvpx_test::WebMVideoSource(kVp9TestFile);
ASSERT_TRUE(video_ != NULL);
video_->Init();
video_->Begin();
vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
if (num_threads > 0)
cfg.threads = num_threads;
decoder_ = new libvpx_test::VP9Decoder(cfg, 0);
ASSERT_TRUE(decoder_ != NULL);
OpenMd5File(kVp9Md5File);
}
// Set the VP9 skipLoopFilter control value.
void SetSkipLoopFilter(int value, vpx_codec_err_t expected_value) {
decoder_->Control(VP9_SET_SKIP_LOOP_FILTER, value, expected_value);
}
vpx_codec_err_t DecodeOneFrame() {
const vpx_codec_err_t res =
decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
if (res == VPX_CODEC_OK) {
ReadMd5();
video_->Next();
}
return res;
}
vpx_codec_err_t DecodeRemainingFrames() {
for (; video_->cxdata() != NULL; video_->Next()) {
const vpx_codec_err_t res =
decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
if (res != VPX_CODEC_OK)
return res;
ReadMd5();
}
return VPX_CODEC_OK;
}
// Checks if MD5 matches or doesn't.
void CheckMd5(bool matches) {
libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData();
const vpx_image_t *img = dec_iter.Next();
CheckMd5Vpx(*img, matches);
}
private:
// TODO(fgalligan): Move the MD5 testing code into another class.
void OpenMd5File(const std::string &md5_file_name) {
md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name);
ASSERT_TRUE(md5_file_ != NULL) << "MD5 file open failed. Filename: "
<< md5_file_name;
}
// Reads the next line of the MD5 file.
void ReadMd5() {
ASSERT_TRUE(md5_file_ != NULL);
const int res = fscanf(md5_file_, "%s %s", expected_md5_, junk_);
ASSERT_NE(EOF, res) << "Read md5 data failed";
expected_md5_[32] = '\0';
}
// Checks if the last read MD5 matches |img| or doesn't.
void CheckMd5Vpx(const vpx_image_t &img, bool matches) {
::libvpx_test::MD5 md5_res;
md5_res.Add(&img);
const char *const actual_md5 = md5_res.Get();
// Check MD5.
if (matches)
ASSERT_STREQ(expected_md5_, actual_md5) << "MD5 checksums don't match";
else
ASSERT_STRNE(expected_md5_, actual_md5) << "MD5 checksums match";
}
libvpx_test::WebMVideoSource *video_;
libvpx_test::VP9Decoder *decoder_;
FILE *md5_file_;
char expected_md5_[33];
char junk_[128];
};
TEST(SkipLoopFilterTest, ShutOffLoopFilter) {
const int non_zero_value = 1;
const int num_threads = 0;
SkipLoopFilterTest skip_loop_filter;
skip_loop_filter.Init(num_threads);
skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
skip_loop_filter.CheckMd5(false);
}
TEST(SkipLoopFilterTest, ShutOffLoopFilterSingleThread) {
const int non_zero_value = 1;
const int num_threads = 1;
SkipLoopFilterTest skip_loop_filter;
skip_loop_filter.Init(num_threads);
skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
skip_loop_filter.CheckMd5(false);
}
TEST(SkipLoopFilterTest, ShutOffLoopFilter8Threads) {
const int non_zero_value = 1;
const int num_threads = 8;
SkipLoopFilterTest skip_loop_filter;
skip_loop_filter.Init(num_threads);
skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
skip_loop_filter.CheckMd5(false);
}
TEST(SkipLoopFilterTest, WithLoopFilter) {
const int non_zero_value = 1;
const int num_threads = 0;
SkipLoopFilterTest skip_loop_filter;
skip_loop_filter.Init(num_threads);
skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
skip_loop_filter.SetSkipLoopFilter(0, VPX_CODEC_OK);
ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
skip_loop_filter.CheckMd5(true);
}
TEST(SkipLoopFilterTest, ToggleLoopFilter) {
const int num_threads = 0;
SkipLoopFilterTest skip_loop_filter;
skip_loop_filter.Init(num_threads);
for (int i = 0; i < 10; ++i) {
skip_loop_filter.SetSkipLoopFilter(i % 2, VPX_CODEC_OK);
ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeOneFrame());
}
ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
skip_loop_filter.CheckMd5(false);
}
} // namespace

View File

@@ -23,6 +23,13 @@ vpxenc_verify_environment() {
elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBVPX_TEST_DATA_PATH."
return 1
fi
if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then
if [ ! -e "${Y4M_NOSQ_PAR_INPUT}" ]; then
elog "The file ${Y4M_NOSQ_PAR_INPUT##*/} must exist in"
elog "LIBVPX_TEST_DATA_PATH."
return 1
fi
fi
if [ -z "$(vpx_tool_path vpxenc)" ]; then
elog "vpxenc not found. It must exist in LIBVPX_BIN_PATH or its parent."
return 1
@@ -49,6 +56,14 @@ yuv_input_hantro_collage() {
--height="${YUV_RAW_INPUT_HEIGHT}""
}
y4m_input_non_square_par() {
echo ""${Y4M_NOSQ_PAR_INPUT}""
}
y4m_input_720p() {
echo ""${Y4M_720P_INPUT}""
}
# Echo default vpxenc real time encoding params. $1 is the codec, which defaults
# to vp8 if unspecified.
vpxenc_rt_params() {
@@ -57,7 +72,7 @@ vpxenc_rt_params() {
--buf-initial-sz=500
--buf-optimal-sz=600
--buf-sz=1000
--cpu-used=-5
--cpu-used=-6
--end-usage=cbr
--error-resilient=1
--kf-max-dist=90000
@@ -247,6 +262,63 @@ vpxenc_vp9_webm_rt() {
fi
}
vpxenc_vp9_webm_rt_multithread_tiled() {
if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
[ "$(webm_io_available)" = "yes" ]; then
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_rt_multithread_tiled.webm"
local readonly tilethread_min=2
local readonly tilethread_max=4
local readonly num_threads="$(seq ${tilethread_min} ${tilethread_max})"
local readonly num_tile_cols="$(seq ${tilethread_min} ${tilethread_max})"
for threads in ${num_threads}; do
for tile_cols in ${num_tile_cols}; do
vpxenc $(y4m_input_720p) \
$(vpxenc_rt_params vp9) \
--threads=${threads} \
--tile-columns=${tile_cols} \
--output="${output}"
done
done
if [ ! -e "${output}" ]; then
elog "Output file does not exist."
return 1
fi
rm "${output}"
fi
}
vpxenc_vp9_webm_rt_multithread_tiled_frameparallel() {
if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
[ "$(webm_io_available)" = "yes" ]; then
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_rt_mt_t_fp.webm"
local readonly tilethread_min=2
local readonly tilethread_max=4
local readonly num_threads="$(seq ${tilethread_min} ${tilethread_max})"
local readonly num_tile_cols="$(seq ${tilethread_min} ${tilethread_max})"
for threads in ${num_threads}; do
for tile_cols in ${num_tile_cols}; do
vpxenc $(y4m_input_720p) \
$(vpxenc_rt_params vp9) \
--threads=${threads} \
--tile-columns=${tile_cols} \
--frame-parallel=1 \
--output="${output}"
done
done
if [ ! -e "${output}" ]; then
elog "Output file does not exist."
return 1
fi
rm "${output}"
fi
}
vpxenc_vp9_webm_2pass() {
if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
[ "$(webm_io_available)" = "yes" ]; then
@@ -320,6 +392,23 @@ vpxenc_vp9_webm_lag10_frames20() {
fi
}
# TODO(fgalligan): Test that DisplayWidth is different than video width.
vpxenc_vp9_webm_non_square_par() {
if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
[ "$(webm_io_available)" = "yes" ]; then
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_non_square_par.webm"
vpxenc $(y4m_input_non_square_par) \
--codec=vp9 \
--limit="${TEST_FRAMES}" \
--output="${output}"
if [ ! -e "${output}" ]; then
elog "Output file does not exist."
return 1
fi
fi
}
vpxenc_tests="vpxenc_vp8_ivf
vpxenc_vp8_webm
vpxenc_vp8_webm_rt
@@ -329,9 +418,12 @@ vpxenc_tests="vpxenc_vp8_ivf
vpxenc_vp9_ivf
vpxenc_vp9_webm
vpxenc_vp9_webm_rt
vpxenc_vp9_webm_rt_multithread_tiled
vpxenc_vp9_webm_rt_multithread_tiled_frameparallel
vpxenc_vp9_webm_2pass
vpxenc_vp9_ivf_lossless
vpxenc_vp9_ivf_minq0_maxq0
vpxenc_vp9_webm_lag10_frames20"
vpxenc_vp9_webm_lag10_frames20
vpxenc_vp9_webm_non_square_par"
run_tests vpxenc_verify_environment "${vpxenc_tests}"

View File

@@ -140,7 +140,7 @@ static const VpxInterface vpx_encoders[] = {
#endif
};
int get_vpx_encoder_count() {
int get_vpx_encoder_count(void) {
return sizeof(vpx_encoders) / sizeof(vpx_encoders[0]);
}
@@ -170,7 +170,7 @@ static const VpxInterface vpx_decoders[] = {
#endif
};
int get_vpx_decoder_count() {
int get_vpx_decoder_count(void) {
return sizeof(vpx_decoders) / sizeof(vpx_decoders[0]);
}

View File

@@ -16,6 +16,7 @@
#include "vpx/vpx_codec.h"
#include "vpx/vpx_image.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/msvc.h"
#if CONFIG_ENCODERS
#include "./y4minput.h"
@@ -34,7 +35,6 @@
#if CONFIG_OS_SUPPORT
#if defined(_MSC_VER)
#include <io.h> /* NOLINT */
#define snprintf _snprintf
#define isatty _isatty
#define fileno _fileno
#else
@@ -89,6 +89,7 @@ struct VpxInputContext {
enum VideoFileType file_type;
uint32_t width;
uint32_t height;
struct VpxRational pixel_aspect_ratio;
vpx_img_fmt_t fmt;
vpx_bit_depth_t bit_depth;
int only_i420;
@@ -119,7 +120,7 @@ void warn(const char *fmt, ...);
void die_codec(vpx_codec_ctx_t *ctx, const char *s) VPX_NO_RETURN;
/* The tool including this file must define usage_exit() */
void usage_exit() VPX_NO_RETURN;
void usage_exit(void) VPX_NO_RETURN;
#undef VPX_NO_RETURN
@@ -131,11 +132,11 @@ typedef struct VpxInterface {
vpx_codec_iface_t *(*const codec_interface)();
} VpxInterface;
int get_vpx_encoder_count();
int get_vpx_encoder_count(void);
const VpxInterface *get_vpx_encoder_by_index(int i);
const VpxInterface *get_vpx_encoder_by_name(const char *name);
int get_vpx_decoder_count();
int get_vpx_decoder_count(void);
const VpxInterface *get_vpx_decoder_by_index(int i);
const VpxInterface *get_vpx_decoder_by_name(const char *name);
const VpxInterface *get_vpx_decoder_by_fourcc(uint32_t fourcc);

View File

@@ -10,6 +10,7 @@
#include "vpx_config.h"
#include "alloccommon.h"
#include "blockd.h"
#include "vpx_mem/vpx_mem.h"
#include "onyxc_int.h"

View File

@@ -1,154 +0,0 @@
;
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_variance16x16_armv6|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; r0 unsigned char *src_ptr
; r1 int source_stride
; r2 unsigned char *ref_ptr
; r3 int recon_stride
; stack unsigned int *sse
|vp8_variance16x16_armv6| PROC
stmfd sp!, {r4-r12, lr}
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
mov r8, #0 ; initialize sum = 0
mov r11, #0 ; initialize sse = 0
mov r12, #16 ; set loop counter to 16 (=block height)
loop
; 1st 4 pixels
ldr r4, [r0, #0] ; load 4 src pixels
ldr r5, [r2, #0] ; load 4 ref pixels
mov lr, #0 ; constant zero
usub8 r6, r4, r5 ; calculate difference
pld [r0, r1, lsl #1]
sel r7, r6, lr ; select bytes with positive difference
usub8 r9, r5, r4 ; calculate difference with reversed operands
pld [r2, r3, lsl #1]
sel r6, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r4, r7, lr ; calculate sum of positive differences
usad8 r5, r6, lr ; calculate sum of negative differences
orr r6, r6, r7 ; differences of all 4 pixels
; calculate total sum
adds r8, r8, r4 ; add positive differences to sum
subs r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
; 2nd 4 pixels
ldr r4, [r0, #4] ; load 4 src pixels
ldr r5, [r2, #4] ; load 4 ref pixels
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
usub8 r6, r4, r5 ; calculate difference
sel r7, r6, lr ; select bytes with positive difference
usub8 r9, r5, r4 ; calculate difference with reversed operands
sel r6, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r4, r7, lr ; calculate sum of positive differences
usad8 r5, r6, lr ; calculate sum of negative differences
orr r6, r6, r7 ; differences of all 4 pixels
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
; 3rd 4 pixels
ldr r4, [r0, #8] ; load 4 src pixels
ldr r5, [r2, #8] ; load 4 ref pixels
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
usub8 r6, r4, r5 ; calculate difference
sel r7, r6, lr ; select bytes with positive difference
usub8 r9, r5, r4 ; calculate difference with reversed operands
sel r6, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r4, r7, lr ; calculate sum of positive differences
usad8 r5, r6, lr ; calculate sum of negative differences
orr r6, r6, r7 ; differences of all 4 pixels
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
; 4th 4 pixels
ldr r4, [r0, #12] ; load 4 src pixels
ldr r5, [r2, #12] ; load 4 ref pixels
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
usub8 r6, r4, r5 ; calculate difference
add r0, r0, r1 ; set src_ptr to next row
sel r7, r6, lr ; select bytes with positive difference
usub8 r9, r5, r4 ; calculate difference with reversed operands
add r2, r2, r3 ; set dst_ptr to next row
sel r6, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r4, r7, lr ; calculate sum of positive differences
usad8 r5, r6, lr ; calculate sum of negative differences
orr r6, r6, r7 ; differences of all 4 pixels
; calculate total sum
add r8, r8, r4 ; add positive differences to sum
sub r8, r8, r5 ; subtract negative differences from sum
; calculate sse
uxtb16 r5, r6 ; byte (two pixels) to halfwords
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
subs r12, r12, #1
bne loop
; return stuff
ldr r6, [sp, #40] ; get address of sse
mul r0, r8, r8 ; sum * sum
str r11, [r6] ; store sse
sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
ldmfd sp!, {r4-r12, pc}
ENDP
END

View File

@@ -1,101 +0,0 @@
;
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_variance8x8_armv6|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
; r0 unsigned char *src_ptr
; r1 int source_stride
; r2 unsigned char *ref_ptr
; r3 int recon_stride
; stack unsigned int *sse
|vp8_variance8x8_armv6| PROC
push {r4-r10, lr}
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
mov r12, #8 ; set loop counter to 8 (=block height)
mov r4, #0 ; initialize sum = 0
mov r5, #0 ; initialize sse = 0
loop
; 1st 4 pixels
ldr r6, [r0, #0x0] ; load 4 src pixels
ldr r7, [r2, #0x0] ; load 4 ref pixels
mov lr, #0 ; constant zero
usub8 r8, r6, r7 ; calculate difference
pld [r0, r1, lsl #1]
sel r10, r8, lr ; select bytes with positive difference
usub8 r9, r7, r6 ; calculate difference with reversed operands
pld [r2, r3, lsl #1]
sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r6, r10, lr ; calculate sum of positive differences
usad8 r7, r8, lr ; calculate sum of negative differences
orr r8, r8, r10 ; differences of all 4 pixels
; calculate total sum
add r4, r4, r6 ; add positive differences to sum
sub r4, r4, r7 ; subtract negative differences from sum
; calculate sse
uxtb16 r7, r8 ; byte (two pixels) to halfwords
uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
; 2nd 4 pixels
ldr r6, [r0, #0x4] ; load 4 src pixels
ldr r7, [r2, #0x4] ; load 4 ref pixels
smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
usub8 r8, r6, r7 ; calculate difference
add r0, r0, r1 ; set src_ptr to next row
sel r10, r8, lr ; select bytes with positive difference
usub8 r9, r7, r6 ; calculate difference with reversed operands
add r2, r2, r3 ; set dst_ptr to next row
sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r6, r10, lr ; calculate sum of positive differences
usad8 r7, r8, lr ; calculate sum of negative differences
orr r8, r8, r10 ; differences of all 4 pixels
; calculate total sum
add r4, r4, r6 ; add positive differences to sum
sub r4, r4, r7 ; subtract negative differences from sum
; calculate sse
uxtb16 r7, r8 ; byte (two pixels) to halfwords
uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
subs r12, r12, #1 ; next row
smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
bne loop
; return stuff
ldr r8, [sp, #32] ; get address of sse
mul r1, r4, r4 ; sum * sum
str r5, [r8] ; store sse
sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6))
pop {r4-r10, pc}
ENDP
END

View File

@@ -99,7 +99,7 @@ void vp8_sixtap_predict4x4_armv6
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data buffer used in filtering */
DECLARE_ALIGNED(4, short, FData[12*4]); /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
@@ -147,7 +147,7 @@ void vp8_sixtap_predict8x8_armv6
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data buffer used in filtering */
DECLARE_ALIGNED(4, short, FData[16*8]); /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
@@ -189,7 +189,7 @@ void vp8_sixtap_predict16x16_armv6
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data buffer used in filtering */
DECLARE_ALIGNED(4, short, FData[24*16]); /* Temp data buffer used in filtering */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */

View File

@@ -1,184 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
unsigned int vp8_sad8x8_neon(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
int ref_stride) {
uint8x8_t d0, d8;
uint16x8_t q12;
uint32x4_t q1;
uint64x2_t q3;
uint32x2_t d5;
int i;
d0 = vld1_u8(src_ptr);
src_ptr += src_stride;
d8 = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabdl_u8(d0, d8);
for (i = 0; i < 7; i++) {
d0 = vld1_u8(src_ptr);
src_ptr += src_stride;
d8 = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabal_u8(q12, d0, d8);
}
q1 = vpaddlq_u16(q12);
q3 = vpaddlq_u32(q1);
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
vreinterpret_u32_u64(vget_high_u64(q3)));
return vget_lane_u32(d5, 0);
}
unsigned int vp8_sad8x16_neon(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
int ref_stride) {
uint8x8_t d0, d8;
uint16x8_t q12;
uint32x4_t q1;
uint64x2_t q3;
uint32x2_t d5;
int i;
d0 = vld1_u8(src_ptr);
src_ptr += src_stride;
d8 = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabdl_u8(d0, d8);
for (i = 0; i < 15; i++) {
d0 = vld1_u8(src_ptr);
src_ptr += src_stride;
d8 = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabal_u8(q12, d0, d8);
}
q1 = vpaddlq_u16(q12);
q3 = vpaddlq_u32(q1);
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
vreinterpret_u32_u64(vget_high_u64(q3)));
return vget_lane_u32(d5, 0);
}
unsigned int vp8_sad4x4_neon(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
int ref_stride) {
uint8x8_t d0, d8;
uint16x8_t q12;
uint32x2_t d1;
uint64x1_t d3;
int i;
d0 = vld1_u8(src_ptr);
src_ptr += src_stride;
d8 = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabdl_u8(d0, d8);
for (i = 0; i < 3; i++) {
d0 = vld1_u8(src_ptr);
src_ptr += src_stride;
d8 = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabal_u8(q12, d0, d8);
}
d1 = vpaddl_u16(vget_low_u16(q12));
d3 = vpaddl_u32(d1);
return vget_lane_u32(vreinterpret_u32_u64(d3), 0);
}
unsigned int vp8_sad16x16_neon(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
int ref_stride) {
uint8x16_t q0, q4;
uint16x8_t q12, q13;
uint32x4_t q1;
uint64x2_t q3;
uint32x2_t d5;
int i;
q0 = vld1q_u8(src_ptr);
src_ptr += src_stride;
q4 = vld1q_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
for (i = 0; i < 15; i++) {
q0 = vld1q_u8(src_ptr);
src_ptr += src_stride;
q4 = vld1q_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
}
q12 = vaddq_u16(q12, q13);
q1 = vpaddlq_u16(q12);
q3 = vpaddlq_u32(q1);
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
vreinterpret_u32_u64(vget_high_u64(q3)));
return vget_lane_u32(d5, 0);
}
unsigned int vp8_sad16x8_neon(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
int ref_stride) {
uint8x16_t q0, q4;
uint16x8_t q12, q13;
uint32x4_t q1;
uint64x2_t q3;
uint32x2_t d5;
int i;
q0 = vld1q_u8(src_ptr);
src_ptr += src_stride;
q4 = vld1q_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
for (i = 0; i < 7; i++) {
q0 = vld1q_u8(src_ptr);
src_ptr += src_stride;
q4 = vld1q_u8(ref_ptr);
ref_ptr += ref_stride;
q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
}
q12 = vaddq_u16(q12, q13);
q1 = vpaddlq_u16(q12);
q3 = vpaddlq_u32(q1);
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
vreinterpret_u32_u64(vget_high_u64(q3)));
return vget_lane_u32(d5, 0);
}

View File

@@ -1,320 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include "vpx_ports/mem.h"
unsigned int vp8_variance16x16_neon(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
int i;
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
uint32x2_t d0u32, d10u32;
int64x1_t d0s64, d1s64;
uint8x16_t q0u8, q1u8, q2u8, q3u8;
uint16x8_t q11u16, q12u16, q13u16, q14u16;
int32x4_t q8s32, q9s32, q10s32;
int64x2_t q0s64, q1s64, q5s64;
q8s32 = vdupq_n_s32(0);
q9s32 = vdupq_n_s32(0);
q10s32 = vdupq_n_s32(0);
for (i = 0; i < 8; i++) {
q0u8 = vld1q_u8(src_ptr);
src_ptr += source_stride;
q1u8 = vld1q_u8(src_ptr);
src_ptr += source_stride;
__builtin_prefetch(src_ptr);
q2u8 = vld1q_u8(ref_ptr);
ref_ptr += recon_stride;
q3u8 = vld1q_u8(ref_ptr);
ref_ptr += recon_stride;
__builtin_prefetch(ref_ptr);
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
}
q10s32 = vaddq_s32(q10s32, q9s32);
q0s64 = vpaddlq_s32(q8s32);
q1s64 = vpaddlq_s32(q10s32);
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
vreinterpret_s32_s64(d0s64));
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8);
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
return vget_lane_u32(d0u32, 0);
}
unsigned int vp8_variance16x8_neon(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
int i;
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
uint32x2_t d0u32, d10u32;
int64x1_t d0s64, d1s64;
uint8x16_t q0u8, q1u8, q2u8, q3u8;
uint16x8_t q11u16, q12u16, q13u16, q14u16;
int32x4_t q8s32, q9s32, q10s32;
int64x2_t q0s64, q1s64, q5s64;
q8s32 = vdupq_n_s32(0);
q9s32 = vdupq_n_s32(0);
q10s32 = vdupq_n_s32(0);
for (i = 0; i < 4; i++) { // variance16x8_neon_loop
q0u8 = vld1q_u8(src_ptr);
src_ptr += source_stride;
q1u8 = vld1q_u8(src_ptr);
src_ptr += source_stride;
__builtin_prefetch(src_ptr);
q2u8 = vld1q_u8(ref_ptr);
ref_ptr += recon_stride;
q3u8 = vld1q_u8(ref_ptr);
ref_ptr += recon_stride;
__builtin_prefetch(ref_ptr);
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
}
q10s32 = vaddq_s32(q10s32, q9s32);
q0s64 = vpaddlq_s32(q8s32);
q1s64 = vpaddlq_s32(q10s32);
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
vreinterpret_s32_s64(d0s64));
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
return vget_lane_u32(d0u32, 0);
}
unsigned int vp8_variance8x16_neon(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
int i;
uint8x8_t d0u8, d2u8, d4u8, d6u8;
int16x4_t d22s16, d23s16, d24s16, d25s16;
uint32x2_t d0u32, d10u32;
int64x1_t d0s64, d1s64;
uint16x8_t q11u16, q12u16;
int32x4_t q8s32, q9s32, q10s32;
int64x2_t q0s64, q1s64, q5s64;
q8s32 = vdupq_n_s32(0);
q9s32 = vdupq_n_s32(0);
q10s32 = vdupq_n_s32(0);
for (i = 0; i < 8; i++) { // variance8x16_neon_loop
d0u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d2u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
__builtin_prefetch(src_ptr);
d4u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
d6u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
__builtin_prefetch(ref_ptr);
q11u16 = vsubl_u8(d0u8, d4u8);
q12u16 = vsubl_u8(d2u8, d6u8);
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
}
q10s32 = vaddq_s32(q10s32, q9s32);
q0s64 = vpaddlq_s32(q8s32);
q1s64 = vpaddlq_s32(q10s32);
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
vreinterpret_s32_s64(d0s64));
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
return vget_lane_u32(d0u32, 0);
}
unsigned int vp8_variance8x8_neon(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
int i;
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
uint32x2_t d0u32, d10u32;
int64x1_t d0s64, d1s64;
uint16x8_t q11u16, q12u16, q13u16, q14u16;
int32x4_t q8s32, q9s32, q10s32;
int64x2_t q0s64, q1s64, q5s64;
q8s32 = vdupq_n_s32(0);
q9s32 = vdupq_n_s32(0);
q10s32 = vdupq_n_s32(0);
for (i = 0; i < 2; i++) { // variance8x8_neon_loop
d0u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d1u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d2u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d3u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d4u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
d5u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
d6u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
d7u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
q11u16 = vsubl_u8(d0u8, d4u8);
q12u16 = vsubl_u8(d1u8, d5u8);
q13u16 = vsubl_u8(d2u8, d6u8);
q14u16 = vsubl_u8(d3u8, d7u8);
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
}
q10s32 = vaddq_s32(q10s32, q9s32);
q0s64 = vpaddlq_s32(q8s32);
q1s64 = vpaddlq_s32(q10s32);
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
vreinterpret_s32_s64(d0s64));
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 6);
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
return vget_lane_u32(d0u32, 0);
}

View File

@@ -12,7 +12,7 @@
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
static const uint16_t bilinear_taps_coeff[8][2] = {
static const uint8_t bilinear_taps_coeff[8][2] = {
{128, 0},
{112, 16},
{ 96, 32},
@@ -32,7 +32,7 @@ unsigned int vp8_sub_pixel_variance16x16_neon_func(
int dst_pixels_per_line,
unsigned int *sse) {
int i;
DECLARE_ALIGNED_ARRAY(16, unsigned char, tmp, 528);
DECLARE_ALIGNED(16, unsigned char, tmp[528]);
unsigned char *tmpp;
unsigned char *tmpp2;
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
@@ -911,12 +911,6 @@ unsigned int vp8_variance_halfpixvar16x16_hv_neon(
return vget_lane_u32(d0u32, 0);
}
enum { kWidth8 = 8 };
enum { kHeight8 = 8 };
enum { kHeight8PlusOne = 9 };
enum { kPixelStepOne = 1 };
enum { kAlign16 = 16 };
#define FILTER_BITS 7
static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) {
@@ -968,8 +962,8 @@ static unsigned int variance8x8_neon(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
unsigned int *sse) {
int sum;
variance_neon_w8(a, a_stride, b, b_stride, kWidth8, kHeight8, sse, &sum);
return *sse - (((int64_t)sum * sum) / (kWidth8 * kHeight8));
variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum);
return *sse - (((int64_t)sum * sum) / (8 * 8));
}
static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
@@ -978,9 +972,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
int pixel_step,
unsigned int output_height,
unsigned int output_width,
const uint16_t *vpx_filter) {
const uint8x8_t f0 = vmov_n_u8((uint8_t)vpx_filter[0]);
const uint8x8_t f1 = vmov_n_u8((uint8_t)vpx_filter[1]);
const uint8_t *vpx_filter) {
const uint8x8_t f0 = vmov_n_u8(vpx_filter[0]);
const uint8x8_t f1 = vmov_n_u8(vpx_filter[1]);
unsigned int i;
for (i = 0; i < output_height; ++i) {
const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
@@ -1003,21 +997,21 @@ unsigned int vp8_sub_pixel_variance8x8_neon(
const unsigned char *dst,
int dst_stride,
unsigned int *sse) {
DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight8PlusOne * kWidth8);
DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight8PlusOne * kWidth8);
DECLARE_ALIGNED(16, uint8_t, temp2[9 * 8]);
DECLARE_ALIGNED(16, uint8_t, fdata3[9 * 8]);
if (xoffset == 0) {
var_filter_block2d_bil_w8(src, temp2, src_stride, kWidth8, kHeight8,
kWidth8, bilinear_taps_coeff[yoffset]);
var_filter_block2d_bil_w8(src, temp2, src_stride, 8, 8,
8, bilinear_taps_coeff[yoffset]);
} else if (yoffset == 0) {
var_filter_block2d_bil_w8(src, temp2, src_stride, kPixelStepOne,
kHeight8PlusOne, kWidth8,
var_filter_block2d_bil_w8(src, temp2, src_stride, 1,
9, 8,
bilinear_taps_coeff[xoffset]);
} else {
var_filter_block2d_bil_w8(src, fdata3, src_stride, kPixelStepOne,
kHeight8PlusOne, kWidth8,
var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
9, 8,
bilinear_taps_coeff[xoffset]);
var_filter_block2d_bil_w8(fdata3, temp2, kWidth8, kWidth8, kHeight8,
kWidth8, bilinear_taps_coeff[yoffset]);
var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
8, bilinear_taps_coeff[yoffset]);
}
return variance8x8_neon(temp2, kWidth8, dst, dst_stride, sse);
return variance8x8_neon(temp2, 8, dst, dst_stride, sse);
}

View File

@@ -9,10 +9,14 @@
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "./vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "vp8/common/variance.h"
#include "vp8/common/filter.h"
// TODO(johannkoenig): Move this to vpx_dsp or vp8/encoder
#if CONFIG_VP8_ENCODER
#if HAVE_MEDIA
#include "vp8/common/arm/bilinearfilter_arm.h"
@@ -40,8 +44,8 @@ unsigned int vp8_sub_pixel_variance8x8_armv6
vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
8, 8, 8, VFilter);
return vp8_variance8x8_armv6(second_pass, 8, dst_ptr,
dst_pixels_per_line, sse);
return vpx_variance8x8_media(second_pass, 8, dst_ptr,
dst_pixels_per_line, sse);
}
unsigned int vp8_sub_pixel_variance16x16_armv6
@@ -86,13 +90,13 @@ unsigned int vp8_sub_pixel_variance16x16_armv6
vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
16, 16, 16, VFilter);
var = vp8_variance16x16_armv6(second_pass, 16, dst_ptr,
dst_pixels_per_line, sse);
var = vpx_variance16x16_media(second_pass, 16, dst_ptr,
dst_pixels_per_line, sse);
}
return var;
}
#endif /* HAVE_MEDIA */
#endif // HAVE_MEDIA
#if HAVE_NEON
@@ -129,4 +133,5 @@ unsigned int vp8_sub_pixel_variance16x16_neon
return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
}
#endif
#endif // HAVE_NEON
#endif // CONFIG_VP8_ENCODER

32
vp8/common/copy_c.c Normal file
View File

@@ -0,0 +1,32 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <string.h>
#include "./vp8_rtcd.h"
#include "vpx/vpx_integer.h"
/* Copy 2 macroblocks to a buffer */
void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride,
unsigned char *dst_ptr, int dst_stride,
int height)
{
int r;
for (r = 0; r < height; r++)
{
memcpy(dst_ptr, src_ptr, 32);
src_ptr += src_stride;
dst_ptr += dst_stride;
}
}

View File

@@ -10,6 +10,7 @@
#include "filter.h"
#include "./vp8_rtcd.h"
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
{

View File

@@ -17,6 +17,7 @@
#include "vpx_ports/x86.h"
#endif
#include "vp8/common/onyxc_int.h"
#include "vp8/common/systemdependent.h"
#if CONFIG_MULTITHREAD
#if HAVE_UNISTD_H && !defined(__OS2__)

View File

@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
/****************************************************************************
* Notes:

View File

@@ -17,10 +17,11 @@
* higher quality.
*/
#include "postproc.h"
#include "variance.h"
#include "./vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "vp8/common/postproc.h"
#include "vp8/common/variance.h"
#include "vpx_mem/vpx_mem.h"
#include "vp8_rtcd.h"
#include "vpx_scale/yv12config.h"
#include <limits.h>
@@ -150,36 +151,36 @@ static void multiframe_quality_enhance_block
if (blksize == 16)
{
actd = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
act = (vp8_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
#ifdef USE_SSD
vp8_variance16x16(y, y_stride, yd, yd_stride, &sse);
vpx_variance16x16(y, y_stride, yd, yd_stride, &sse);
sad = (sse + 128)>>8;
vp8_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
usad = (sse + 32)>>6;
vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
vsad = (sse + 32)>>6;
#else
sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8;
usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, UINT_MAX) + 32) >> 6;
vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, UINT_MAX)+ 32) >> 6;
sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6;
vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride)+ 32) >> 6;
#endif
}
else /* if (blksize == 8) */
{
actd = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
act = (vp8_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
#ifdef USE_SSD
vp8_variance8x8(y, y_stride, yd, yd_stride, &sse);
vpx_variance8x8(y, y_stride, yd, yd_stride, &sse);
sad = (sse + 32)>>6;
vp8_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
usad = (sse + 8)>>4;
vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
vsad = (sse + 8)>>4;
#else
sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6;
usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, UINT_MAX) + 8) >> 4;
vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, UINT_MAX) + 8) >> 4;
sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4;
vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4;
#endif
}

View File

@@ -427,7 +427,7 @@ void vp8_de_noise(VP8_COMMON *cm,
}
}
double vp8_gaussian(double sigma, double mu, double x)
static double gaussian(double sigma, double mu, double x)
{
return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
(exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
@@ -455,7 +455,7 @@ static void fillrd(struct postproc_state *state, int q, int a)
for (i = -32; i < 32; i++)
{
const int v = (int)(.5 + 256 * vp8_gaussian(sigma, 0, i));
const int v = (int)(.5 + 256 * gaussian(sigma, 0, i));
if (v)
{

View File

@@ -10,6 +10,8 @@
#include <limits.h>
#include <string.h>
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vpx/vpx_integer.h"
@@ -30,31 +32,8 @@ void vp8_copy_mem16x16_c(
for (r = 0; r < 16; r++)
{
#if !(CONFIG_FAST_UNALIGNED)
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
dst[4] = src[4];
dst[5] = src[5];
dst[6] = src[6];
dst[7] = src[7];
dst[8] = src[8];
dst[9] = src[9];
dst[10] = src[10];
dst[11] = src[11];
dst[12] = src[12];
dst[13] = src[13];
dst[14] = src[14];
dst[15] = src[15];
memcpy(dst, src, 16);
#else
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
((uint32_t *)dst)[2] = ((uint32_t *)src)[2] ;
((uint32_t *)dst)[3] = ((uint32_t *)src)[3] ;
#endif
src += src_stride;
dst += dst_stride;
@@ -72,19 +51,8 @@ void vp8_copy_mem8x8_c(
for (r = 0; r < 8; r++)
{
#if !(CONFIG_FAST_UNALIGNED)
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
dst[4] = src[4];
dst[5] = src[5];
dst[6] = src[6];
dst[7] = src[7];
#else
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
#endif
memcpy(dst, src, 8);
src += src_stride;
dst += dst_stride;
@@ -102,19 +70,8 @@ void vp8_copy_mem8x4_c(
for (r = 0; r < 4; r++)
{
#if !(CONFIG_FAST_UNALIGNED)
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
dst[4] = src[4];
dst[5] = src[5];
dst[6] = src[6];
dst[7] = src[7];
#else
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
#endif
memcpy(dst, src, 8);
src += src_stride;
dst += dst_stride;

View File

@@ -215,7 +215,7 @@ $vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6;
$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
# Disable neon while investigating https://code.google.com/p/webm/issues/detail?id=817
#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=817
specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media dspr2/;
$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
@@ -233,34 +233,10 @@ specialize qw/vp8_bilinear_predict8x4 mmx media neon/;
$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6;
add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
specialize qw/vp8_bilinear_predict4x4 mmx media neon/;
#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=892
specialize qw/vp8_bilinear_predict4x4 mmx media/;
$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6;
#
# Whole-pixel Variance
#
add_proto qw/unsigned int vp8_variance4x4/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp8_variance4x4 mmx sse2/;
$vp8_variance4x4_sse2=vp8_variance4x4_wmt;
add_proto qw/unsigned int vp8_variance8x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp8_variance8x8 mmx sse2 media neon/;
$vp8_variance8x8_sse2=vp8_variance8x8_wmt;
$vp8_variance8x8_media=vp8_variance8x8_armv6;
add_proto qw/unsigned int vp8_variance8x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp8_variance8x16 mmx sse2 neon/;
$vp8_variance8x16_sse2=vp8_variance8x16_wmt;
add_proto qw/unsigned int vp8_variance16x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp8_variance16x8 mmx sse2 neon/;
$vp8_variance16x8_sse2=vp8_variance16x8_wmt;
add_proto qw/unsigned int vp8_variance16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp8_variance16x16 mmx sse2 media neon/;
$vp8_variance16x16_sse2=vp8_variance16x16_wmt;
$vp8_variance16x16_media=vp8_variance16x16_armv6;
#
# Sub-pixel Variance
#
@@ -269,10 +245,9 @@ specialize qw/vp8_sub_pixel_variance4x4 mmx sse2/;
$vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt;
add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon_asm/;
specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media/;
$vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt;
$vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6;
$vp8_sub_pixel_variance8x8_neon_asm=vp8_sub_pixel_variance8x8_neon;
add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/;
@@ -303,119 +278,16 @@ specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon/;
$vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt;
$vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6;
#
# Single block SAD
#
add_proto qw/unsigned int vp8_sad4x4/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
specialize qw/vp8_sad4x4 mmx sse2 neon/;
$vp8_sad4x4_sse2=vp8_sad4x4_wmt;
add_proto qw/unsigned int vp8_sad8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
specialize qw/vp8_sad8x8 mmx sse2 neon/;
$vp8_sad8x8_sse2=vp8_sad8x8_wmt;
add_proto qw/unsigned int vp8_sad8x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
specialize qw/vp8_sad8x16 mmx sse2 neon/;
$vp8_sad8x16_sse2=vp8_sad8x16_wmt;
add_proto qw/unsigned int vp8_sad16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
specialize qw/vp8_sad16x8 mmx sse2 neon/;
$vp8_sad16x8_sse2=vp8_sad16x8_wmt;
add_proto qw/unsigned int vp8_sad16x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
specialize qw/vp8_sad16x16 mmx sse2 sse3 media neon/;
$vp8_sad16x16_sse2=vp8_sad16x16_wmt;
$vp8_sad16x16_media=vp8_sad16x16_armv6;
#
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
#
add_proto qw/void vp8_sad4x4x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
specialize qw/vp8_sad4x4x3 sse3/;
add_proto qw/void vp8_sad8x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
specialize qw/vp8_sad8x8x3 sse3/;
add_proto qw/void vp8_sad8x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
specialize qw/vp8_sad8x16x3 sse3/;
add_proto qw/void vp8_sad16x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
specialize qw/vp8_sad16x8x3 sse3 ssse3/;
add_proto qw/void vp8_sad16x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
specialize qw/vp8_sad16x16x3 sse3 ssse3/;
# Note the only difference in the following prototypes is that they return into
# an array of short
add_proto qw/void vp8_sad4x4x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
specialize qw/vp8_sad4x4x8 sse4_1/;
$vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4;
add_proto qw/void vp8_sad8x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
specialize qw/vp8_sad8x8x8 sse4_1/;
$vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4;
add_proto qw/void vp8_sad8x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
specialize qw/vp8_sad8x16x8 sse4_1/;
$vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4;
add_proto qw/void vp8_sad16x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
specialize qw/vp8_sad16x8x8 sse4_1/;
$vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4;
add_proto qw/void vp8_sad16x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
specialize qw/vp8_sad16x16x8 sse4_1/;
$vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4;
#
# Multi-block SAD, comparing a reference to N independent blocks
#
add_proto qw/void vp8_sad4x4x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp8_sad4x4x4d sse3/;
add_proto qw/void vp8_sad8x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp8_sad8x8x4d sse3/;
add_proto qw/void vp8_sad8x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp8_sad8x16x4d sse3/;
add_proto qw/void vp8_sad16x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp8_sad16x8x4d sse3/;
add_proto qw/void vp8_sad16x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp8_sad16x16x4d sse3/;
#
# Encoder functions below this point.
#
if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") {
#
# Sum of squares (vector)
#
add_proto qw/unsigned int vp8_get_mb_ss/, "const short *";
specialize qw/vp8_get_mb_ss mmx sse2/;
#
# SSE (Sum Squared Error)
#
add_proto qw/unsigned int vp8_sub_pixel_mse16x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
specialize qw/vp8_sub_pixel_mse16x16 mmx sse2/;
$vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt;
add_proto qw/unsigned int vp8_mse16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp8_mse16x16 mmx sse2 media neon/;
$vp8_mse16x16_sse2=vp8_mse16x16_wmt;
$vp8_mse16x16_media=vp8_mse16x16_armv6;
add_proto qw/unsigned int vp8_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
specialize qw/vp8_get4x4sse_cs mmx neon/;
#
# Block copy
#
if ($opts{arch} =~ /x86/) {
add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n";
add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n";
specialize qw/vp8_copy32xn sse2 sse3/;
}

View File

@@ -1,302 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <limits.h>
#include <stdlib.h>
#include "vpx_config.h"
#include "vpx/vpx_integer.h"
static unsigned int sad_mx_n_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int max_sad, int m, int n)
{
int r, c;
unsigned int sad = 0;
for (r = 0; r < n; r++)
{
for (c = 0; c < m; c++)
{
sad += abs(src_ptr[c] - ref_ptr[c]);
}
if (sad > max_sad)
break;
src_ptr += src_stride;
ref_ptr += ref_stride;
}
return sad;
}
/* max_sad is provided as an optional optimization point. Alternative
* implementations of these functions are not required to check it.
*/
unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int max_sad)
{
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 16);
}
unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int max_sad)
{
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 8);
}
unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int max_sad)
{
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 8);
}
unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int max_sad)
{
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 16);
}
unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int max_sad)
{
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 4, 4);
}
void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned short *sad_array)
{
sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned short *sad_array)
{
sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned short *sad_array)
{
sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned short *sad_array)
{
sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
}
void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned short *sad_array)
{
sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
}
void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride,
const unsigned char * const ref_ptr[], int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride,
const unsigned char * const ref_ptr[], int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride,
const unsigned char * const ref_ptr[], int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride,
const unsigned char * const ref_ptr[], int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride,
const unsigned char * const ref_ptr[], int ref_stride,
unsigned int *sad_array)
{
sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
}
/* Copy 2 macroblocks to a buffer */
void vp8_copy32xn_c(unsigned char *src_ptr, int src_stride,
unsigned char *dst_ptr, int dst_stride,
int height)
{
int r;
for (r = 0; r < height; r++)
{
#if !(CONFIG_FAST_UNALIGNED)
dst_ptr[0] = src_ptr[0];
dst_ptr[1] = src_ptr[1];
dst_ptr[2] = src_ptr[2];
dst_ptr[3] = src_ptr[3];
dst_ptr[4] = src_ptr[4];
dst_ptr[5] = src_ptr[5];
dst_ptr[6] = src_ptr[6];
dst_ptr[7] = src_ptr[7];
dst_ptr[8] = src_ptr[8];
dst_ptr[9] = src_ptr[9];
dst_ptr[10] = src_ptr[10];
dst_ptr[11] = src_ptr[11];
dst_ptr[12] = src_ptr[12];
dst_ptr[13] = src_ptr[13];
dst_ptr[14] = src_ptr[14];
dst_ptr[15] = src_ptr[15];
dst_ptr[16] = src_ptr[16];
dst_ptr[17] = src_ptr[17];
dst_ptr[18] = src_ptr[18];
dst_ptr[19] = src_ptr[19];
dst_ptr[20] = src_ptr[20];
dst_ptr[21] = src_ptr[21];
dst_ptr[22] = src_ptr[22];
dst_ptr[23] = src_ptr[23];
dst_ptr[24] = src_ptr[24];
dst_ptr[25] = src_ptr[25];
dst_ptr[26] = src_ptr[26];
dst_ptr[27] = src_ptr[27];
dst_ptr[28] = src_ptr[28];
dst_ptr[29] = src_ptr[29];
dst_ptr[30] = src_ptr[30];
dst_ptr[31] = src_ptr[31];
#else
((uint32_t *)dst_ptr)[0] = ((uint32_t *)src_ptr)[0] ;
((uint32_t *)dst_ptr)[1] = ((uint32_t *)src_ptr)[1] ;
((uint32_t *)dst_ptr)[2] = ((uint32_t *)src_ptr)[2] ;
((uint32_t *)dst_ptr)[3] = ((uint32_t *)src_ptr)[3] ;
((uint32_t *)dst_ptr)[4] = ((uint32_t *)src_ptr)[4] ;
((uint32_t *)dst_ptr)[5] = ((uint32_t *)src_ptr)[5] ;
((uint32_t *)dst_ptr)[6] = ((uint32_t *)src_ptr)[6] ;
((uint32_t *)dst_ptr)[7] = ((uint32_t *)src_ptr)[7] ;
#endif
src_ptr += src_stride;
dst_ptr += dst_stride;
}
}

View File

@@ -14,50 +14,42 @@
#include "vpx_config.h"
#include "vpx/vpx_integer.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef unsigned int(*vp8_sad_fn_t)(
const unsigned char *src_ptr,
typedef unsigned int(*vpx_sad_fn_t)(
const uint8_t *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int ref_stride,
unsigned int max_sad);
const uint8_t *ref_ptr,
int ref_stride);
typedef void (*vp8_copy32xn_fn_t)(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
unsigned char *ref_ptr,
int ref_stride,
int n);
typedef void (*vp8_sad_multi_fn_t)(
typedef void (*vpx_sad_multi_fn_t)(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
const unsigned char *ref_array,
int ref_stride,
unsigned int *sad_array);
typedef void (*vp8_sad_multi1_fn_t)
typedef void (*vpx_sad_multi_d_fn_t)
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int ref_stride,
unsigned short *sad_array
);
typedef void (*vp8_sad_multi_d_fn_t)
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char * const ref_ptr[],
const unsigned char * const ref_array[],
int ref_stride,
unsigned int *sad_array
);
typedef unsigned int (*vp8_variance_fn_t)
typedef unsigned int (*vpx_variance_fn_t)
(
const unsigned char *src_ptr,
int source_stride,
@@ -77,40 +69,17 @@ typedef unsigned int (*vp8_subpixvariance_fn_t)
unsigned int *sse
);
typedef void (*vp8_ssimpf_fn_t)
(
unsigned char *s,
int sp,
unsigned char *r,
int rp,
unsigned long *sum_s,
unsigned long *sum_r,
unsigned long *sum_sq_s,
unsigned long *sum_sq_r,
unsigned long *sum_sxr
);
typedef unsigned int (*vp8_getmbss_fn_t)(const short *);
typedef unsigned int (*vp8_get16x16prederror_fn_t)
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int ref_stride
);
typedef struct variance_vtable
{
vp8_sad_fn_t sdf;
vp8_variance_fn_t vf;
vpx_sad_fn_t sdf;
vpx_variance_fn_t vf;
vp8_subpixvariance_fn_t svf;
vp8_variance_fn_t svf_halfpix_h;
vp8_variance_fn_t svf_halfpix_v;
vp8_variance_fn_t svf_halfpix_hv;
vp8_sad_multi_fn_t sdx3f;
vp8_sad_multi1_fn_t sdx8f;
vp8_sad_multi_d_fn_t sdx4df;
vpx_variance_fn_t svf_halfpix_h;
vpx_variance_fn_t svf_halfpix_v;
vpx_variance_fn_t svf_halfpix_hv;
vpx_sad_multi_fn_t sdx3f;
vpx_sad_multi_fn_t sdx8f;
vpx_sad_multi_d_fn_t sdx4df;
#if ARCH_X86 || ARCH_X86_64
vp8_copy32xn_fn_t copymem;
#endif

View File

@@ -8,43 +8,34 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "variance.h"
#include "./vp8_rtcd.h"
#include "filter.h"
#include "variance.h"
unsigned int vp8_get_mb_ss_c
(
const short *src_ptr
)
{
unsigned int i = 0, sum = 0;
do
{
sum += (src_ptr[i] * src_ptr[i]);
i++;
}
while (i < 256);
return sum;
/* This is a bad idea.
* ctz = count trailing zeros */
static int ctz(int a) {
int b = 0;
while (a != 1) {
a >>= 1;
b++;
}
return b;
}
static void variance(
static unsigned int variance(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
int w,
int h,
unsigned int *sse,
int *sum)
unsigned int *sse)
{
int i, j;
int diff;
int diff, sum;
*sum = 0;
sum = 0;
*sse = 0;
for (i = 0; i < h; i++)
@@ -52,114 +43,17 @@ static void variance(
for (j = 0; j < w; j++)
{
diff = src_ptr[j] - ref_ptr[j];
*sum += diff;
sum += diff;
*sse += diff * diff;
}
src_ptr += source_stride;
ref_ptr += recon_stride;
}
return (*sse - (((unsigned int)sum * sum) >> (int)((ctz(w) + ctz(h)))));
}
unsigned int vp8_variance16x16_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 8));
}
unsigned int vp8_variance8x16_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 7));
}
unsigned int vp8_variance16x8_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 7));
}
unsigned int vp8_variance8x8_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 6));
}
unsigned int vp8_variance4x4_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 4));
}
unsigned int vp8_mse16x16_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
*sse = var;
return var;
}
/****************************************************************************
*
* ROUTINE : filter_block2d_bil_first_pass
@@ -303,7 +197,7 @@ unsigned int vp8_sub_pixel_variance4x4_c
/* Now filter Verticaly */
var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter);
return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
return variance(temp2, 4, dst_ptr, dst_pixels_per_line, 4, 4, sse);
}
@@ -328,7 +222,7 @@ unsigned int vp8_sub_pixel_variance8x8_c
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
return variance(temp2, 8, dst_ptr, dst_pixels_per_line, 8, 8, sse);
}
unsigned int vp8_sub_pixel_variance16x16_c
@@ -352,7 +246,7 @@ unsigned int vp8_sub_pixel_variance16x16_c
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
return variance(temp2, 16, dst_ptr, dst_pixels_per_line, 16, 16, sse);
}
@@ -392,21 +286,6 @@ unsigned int vp8_variance_halfpixvar16x16_hv_c(
}
unsigned int vp8_sub_pixel_mse16x16_c
(
const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
{
vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
return *sse;
}
unsigned int vp8_sub_pixel_variance16x8_c
(
const unsigned char *src_ptr,
@@ -428,7 +307,7 @@ unsigned int vp8_sub_pixel_variance16x8_c
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
return variance(temp2, 16, dst_ptr, dst_pixels_per_line, 16, 8, sse);
}
unsigned int vp8_sub_pixel_variance8x16_c
@@ -454,5 +333,5 @@ unsigned int vp8_sub_pixel_variance8x16_c
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
return variance(temp2, 8, dst_ptr, dst_pixels_per_line, 8, 16, sse);
}

View File

@@ -0,0 +1,93 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;void vp8_copy32xn_sse2(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *dst_ptr,
; int dst_stride,
; int height);
global sym(vp8_copy32xn_sse2) PRIVATE
sym(vp8_copy32xn_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM 7
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;dst_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;dst_stride
movsxd rcx, dword ptr arg(4) ;height
.block_copy_sse2_loopx4:
movdqu xmm0, XMMWORD PTR [rsi]
movdqu xmm1, XMMWORD PTR [rsi + 16]
movdqu xmm2, XMMWORD PTR [rsi + rax]
movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
lea rsi, [rsi+rax*2]
movdqu xmm4, XMMWORD PTR [rsi]
movdqu xmm5, XMMWORD PTR [rsi + 16]
movdqu xmm6, XMMWORD PTR [rsi + rax]
movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
lea rsi, [rsi+rax*2]
movdqa XMMWORD PTR [rdi], xmm0
movdqa XMMWORD PTR [rdi + 16], xmm1
movdqa XMMWORD PTR [rdi + rdx], xmm2
movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
lea rdi, [rdi+rdx*2]
movdqa XMMWORD PTR [rdi], xmm4
movdqa XMMWORD PTR [rdi + 16], xmm5
movdqa XMMWORD PTR [rdi + rdx], xmm6
movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
lea rdi, [rdi+rdx*2]
sub rcx, 4
cmp rcx, 4
jge .block_copy_sse2_loopx4
cmp rcx, 0
je .copy_is_done
.block_copy_sse2_loop:
movdqu xmm0, XMMWORD PTR [rsi]
movdqu xmm1, XMMWORD PTR [rsi + 16]
lea rsi, [rsi+rax]
movdqa XMMWORD PTR [rdi], xmm0
movdqa XMMWORD PTR [rdi + 16], xmm1
lea rdi, [rdi+rdx]
sub rcx, 1
jne .block_copy_sse2_loop
.copy_is_done:
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret

View File

@@ -0,0 +1,146 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
%macro STACK_FRAME_CREATE_X3 0
%if ABI_IS_32BIT
%define src_ptr rsi
%define src_stride rax
%define ref_ptr rdi
%define ref_stride rdx
%define end_ptr rcx
%define ret_var rbx
%define result_ptr arg(4)
%define max_sad arg(4)
%define height dword ptr arg(4)
push rbp
mov rbp, rsp
push rsi
push rdi
push rbx
mov rsi, arg(0) ; src_ptr
mov rdi, arg(2) ; ref_ptr
movsxd rax, dword ptr arg(1) ; src_stride
movsxd rdx, dword ptr arg(3) ; ref_stride
%else
%if LIBVPX_YASM_WIN64
SAVE_XMM 7, u
%define src_ptr rcx
%define src_stride rdx
%define ref_ptr r8
%define ref_stride r9
%define end_ptr r10
%define ret_var r11
%define result_ptr [rsp+xmm_stack_space+8+4*8]
%define max_sad [rsp+xmm_stack_space+8+4*8]
%define height dword ptr [rsp+xmm_stack_space+8+4*8]
%else
%define src_ptr rdi
%define src_stride rsi
%define ref_ptr rdx
%define ref_stride rcx
%define end_ptr r9
%define ret_var r10
%define result_ptr r8
%define max_sad r8
%define height r8
%endif
%endif
%endmacro
%macro STACK_FRAME_DESTROY_X3 0
%define src_ptr
%define src_stride
%define ref_ptr
%define ref_stride
%define end_ptr
%define ret_var
%define result_ptr
%define max_sad
%define height
%if ABI_IS_32BIT
pop rbx
pop rdi
pop rsi
pop rbp
%else
%if LIBVPX_YASM_WIN64
RESTORE_XMM
%endif
%endif
ret
%endmacro
;void vp8_copy32xn_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *dst_ptr,
; int dst_stride,
; int height);
global sym(vp8_copy32xn_sse3) PRIVATE
sym(vp8_copy32xn_sse3):
STACK_FRAME_CREATE_X3
.block_copy_sse3_loopx4:
lea end_ptr, [src_ptr+src_stride*2]
movdqu xmm0, XMMWORD PTR [src_ptr]
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
movdqu xmm4, XMMWORD PTR [end_ptr]
movdqu xmm5, XMMWORD PTR [end_ptr + 16]
movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
lea src_ptr, [src_ptr+src_stride*4]
lea end_ptr, [ref_ptr+ref_stride*2]
movdqa XMMWORD PTR [ref_ptr], xmm0
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
movdqa XMMWORD PTR [end_ptr], xmm4
movdqa XMMWORD PTR [end_ptr + 16], xmm5
movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
lea ref_ptr, [ref_ptr+ref_stride*4]
sub height, 4
cmp height, 4
jge .block_copy_sse3_loopx4
;Check to see if there is more rows need to be copied.
cmp height, 0
je .copy_is_done
.block_copy_sse3_loop:
movdqu xmm0, XMMWORD PTR [src_ptr]
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
lea src_ptr, [src_ptr+src_stride]
movdqa XMMWORD PTR [ref_ptr], xmm0
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
lea ref_ptr, [ref_ptr+ref_stride]
sub height, 1
jne .block_copy_sse3_loop
.copy_is_done:
STACK_FRAME_DESTROY_X3

View File

@@ -1,410 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;unsigned int vp8_sad16x16_wmt(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
global sym(vp8_sad16x16_wmt) PRIVATE
sym(vp8_sad16x16_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SAVE_XMM 6
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
lea rcx, [rsi+rax*8]
lea rcx, [rcx+rax*8]
pxor xmm6, xmm6
.x16x16sad_wmt_loop:
movq xmm0, QWORD PTR [rsi]
movq xmm2, QWORD PTR [rsi+8]
movq xmm1, QWORD PTR [rdi]
movq xmm3, QWORD PTR [rdi+8]
movq xmm4, QWORD PTR [rsi+rax]
movq xmm5, QWORD PTR [rdi+rdx]
punpcklbw xmm0, xmm2
punpcklbw xmm1, xmm3
psadbw xmm0, xmm1
movq xmm2, QWORD PTR [rsi+rax+8]
movq xmm3, QWORD PTR [rdi+rdx+8]
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
punpcklbw xmm4, xmm2
punpcklbw xmm5, xmm3
psadbw xmm4, xmm5
paddw xmm6, xmm0
paddw xmm6, xmm4
cmp rsi, rcx
jne .x16x16sad_wmt_loop
movq xmm0, xmm6
psrldq xmm6, 8
paddw xmm0, xmm6
movq rax, xmm0
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
;unsigned int vp8_sad8x16_wmt(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int max_sad)
global sym(vp8_sad8x16_wmt) PRIVATE
sym(vp8_sad8x16_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rbx
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rbx, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
lea rcx, [rsi+rbx*8]
lea rcx, [rcx+rbx*8]
pxor mm7, mm7
.x8x16sad_wmt_loop:
movq rax, mm7
cmp eax, arg(4)
ja .x8x16sad_wmt_early_exit
movq mm0, QWORD PTR [rsi]
movq mm1, QWORD PTR [rdi]
movq mm2, QWORD PTR [rsi+rbx]
movq mm3, QWORD PTR [rdi+rdx]
psadbw mm0, mm1
psadbw mm2, mm3
lea rsi, [rsi+rbx*2]
lea rdi, [rdi+rdx*2]
paddw mm7, mm0
paddw mm7, mm2
cmp rsi, rcx
jne .x8x16sad_wmt_loop
movq rax, mm7
.x8x16sad_wmt_early_exit:
; begin epilog
pop rdi
pop rsi
pop rbx
UNSHADOW_ARGS
pop rbp
ret
;unsigned int vp8_sad8x8_wmt(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
global sym(vp8_sad8x8_wmt) PRIVATE
sym(vp8_sad8x8_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rbx
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rbx, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
lea rcx, [rsi+rbx*8]
pxor mm7, mm7
.x8x8sad_wmt_loop:
movq rax, mm7
cmp eax, arg(4)
ja .x8x8sad_wmt_early_exit
movq mm0, QWORD PTR [rsi]
movq mm1, QWORD PTR [rdi]
psadbw mm0, mm1
lea rsi, [rsi+rbx]
add rdi, rdx
paddw mm7, mm0
cmp rsi, rcx
jne .x8x8sad_wmt_loop
movq rax, mm7
.x8x8sad_wmt_early_exit:
; begin epilog
pop rdi
pop rsi
pop rbx
UNSHADOW_ARGS
pop rbp
ret
;unsigned int vp8_sad4x4_wmt(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
global sym(vp8_sad4x4_wmt) PRIVATE
sym(vp8_sad4x4_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
movd mm0, DWORD PTR [rsi]
movd mm1, DWORD PTR [rdi]
movd mm2, DWORD PTR [rsi+rax]
movd mm3, DWORD PTR [rdi+rdx]
punpcklbw mm0, mm2
punpcklbw mm1, mm3
psadbw mm0, mm1
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
movd mm4, DWORD PTR [rsi]
movd mm5, DWORD PTR [rdi]
movd mm6, DWORD PTR [rsi+rax]
movd mm7, DWORD PTR [rdi+rdx]
punpcklbw mm4, mm6
punpcklbw mm5, mm7
psadbw mm4, mm5
paddw mm0, mm4
movq rax, mm0
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;unsigned int vp8_sad16x8_wmt(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
global sym(vp8_sad16x8_wmt) PRIVATE
sym(vp8_sad16x8_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rbx
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rbx, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
lea rcx, [rsi+rbx*8]
pxor mm7, mm7
.x16x8sad_wmt_loop:
movq rax, mm7
cmp eax, arg(4)
ja .x16x8sad_wmt_early_exit
movq mm0, QWORD PTR [rsi]
movq mm2, QWORD PTR [rsi+8]
movq mm1, QWORD PTR [rdi]
movq mm3, QWORD PTR [rdi+8]
movq mm4, QWORD PTR [rsi+rbx]
movq mm5, QWORD PTR [rdi+rdx]
psadbw mm0, mm1
psadbw mm2, mm3
movq mm1, QWORD PTR [rsi+rbx+8]
movq mm3, QWORD PTR [rdi+rdx+8]
psadbw mm4, mm5
psadbw mm1, mm3
lea rsi, [rsi+rbx*2]
lea rdi, [rdi+rdx*2]
paddw mm0, mm2
paddw mm4, mm1
paddw mm7, mm0
paddw mm7, mm4
cmp rsi, rcx
jne .x16x8sad_wmt_loop
movq rax, mm7
.x16x8sad_wmt_early_exit:
; begin epilog
pop rdi
pop rsi
pop rbx
UNSHADOW_ARGS
pop rbp
ret
;void vp8_copy32xn_sse2(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *dst_ptr,
; int dst_stride,
; int height);
global sym(vp8_copy32xn_sse2) PRIVATE
sym(vp8_copy32xn_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM 7
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;dst_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;dst_stride
movsxd rcx, dword ptr arg(4) ;height
.block_copy_sse2_loopx4:
movdqu xmm0, XMMWORD PTR [rsi]
movdqu xmm1, XMMWORD PTR [rsi + 16]
movdqu xmm2, XMMWORD PTR [rsi + rax]
movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
lea rsi, [rsi+rax*2]
movdqu xmm4, XMMWORD PTR [rsi]
movdqu xmm5, XMMWORD PTR [rsi + 16]
movdqu xmm6, XMMWORD PTR [rsi + rax]
movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
lea rsi, [rsi+rax*2]
movdqa XMMWORD PTR [rdi], xmm0
movdqa XMMWORD PTR [rdi + 16], xmm1
movdqa XMMWORD PTR [rdi + rdx], xmm2
movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
lea rdi, [rdi+rdx*2]
movdqa XMMWORD PTR [rdi], xmm4
movdqa XMMWORD PTR [rdi + 16], xmm5
movdqa XMMWORD PTR [rdi + rdx], xmm6
movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
lea rdi, [rdi+rdx*2]
sub rcx, 4
cmp rcx, 4
jge .block_copy_sse2_loopx4
cmp rcx, 0
je .copy_is_done
.block_copy_sse2_loop:
movdqu xmm0, XMMWORD PTR [rsi]
movdqu xmm1, XMMWORD PTR [rsi + 16]
lea rsi, [rsi+rax]
movdqa XMMWORD PTR [rdi], xmm0
movdqa XMMWORD PTR [rdi + 16], xmm1
lea rdi, [rdi+rdx]
sub rcx, 1
jne .block_copy_sse2_loop
.copy_is_done:
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret

View File

@@ -1,960 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
%macro STACK_FRAME_CREATE_X3 0
%if ABI_IS_32BIT
%define src_ptr rsi
%define src_stride rax
%define ref_ptr rdi
%define ref_stride rdx
%define end_ptr rcx
%define ret_var rbx
%define result_ptr arg(4)
%define max_sad arg(4)
%define height dword ptr arg(4)
push rbp
mov rbp, rsp
push rsi
push rdi
push rbx
mov rsi, arg(0) ; src_ptr
mov rdi, arg(2) ; ref_ptr
movsxd rax, dword ptr arg(1) ; src_stride
movsxd rdx, dword ptr arg(3) ; ref_stride
%else
%if LIBVPX_YASM_WIN64
SAVE_XMM 7, u
%define src_ptr rcx
%define src_stride rdx
%define ref_ptr r8
%define ref_stride r9
%define end_ptr r10
%define ret_var r11
%define result_ptr [rsp+xmm_stack_space+8+4*8]
%define max_sad [rsp+xmm_stack_space+8+4*8]
%define height dword ptr [rsp+xmm_stack_space+8+4*8]
%else
%define src_ptr rdi
%define src_stride rsi
%define ref_ptr rdx
%define ref_stride rcx
%define end_ptr r9
%define ret_var r10
%define result_ptr r8
%define max_sad r8
%define height r8
%endif
%endif
%endmacro
%macro STACK_FRAME_DESTROY_X3 0
%define src_ptr
%define src_stride
%define ref_ptr
%define ref_stride
%define end_ptr
%define ret_var
%define result_ptr
%define max_sad
%define height
%if ABI_IS_32BIT
pop rbx
pop rdi
pop rsi
pop rbp
%else
%if LIBVPX_YASM_WIN64
RESTORE_XMM
%endif
%endif
ret
%endmacro
%macro STACK_FRAME_CREATE_X4 0
%if ABI_IS_32BIT
%define src_ptr rsi
%define src_stride rax
%define r0_ptr rcx
%define r1_ptr rdx
%define r2_ptr rbx
%define r3_ptr rdi
%define ref_stride rbp
%define result_ptr arg(4)
push rbp
mov rbp, rsp
push rsi
push rdi
push rbx
push rbp
mov rdi, arg(2) ; ref_ptr_base
LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
mov rsi, arg(0) ; src_ptr
movsxd rbx, dword ptr arg(1) ; src_stride
movsxd rbp, dword ptr arg(3) ; ref_stride
xchg rbx, rax
%else
%if LIBVPX_YASM_WIN64
SAVE_XMM 7, u
%define src_ptr rcx
%define src_stride rdx
%define r0_ptr rsi
%define r1_ptr r10
%define r2_ptr r11
%define r3_ptr r8
%define ref_stride r9
%define result_ptr [rsp+xmm_stack_space+16+4*8]
push rsi
LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
%else
%define src_ptr rdi
%define src_stride rsi
%define r0_ptr r9
%define r1_ptr r10
%define r2_ptr r11
%define r3_ptr rdx
%define ref_stride rcx
%define result_ptr r8
LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr
%endif
%endif
%endmacro
%macro STACK_FRAME_DESTROY_X4 0
%define src_ptr
%define src_stride
%define r0_ptr
%define r1_ptr
%define r2_ptr
%define r3_ptr
%define ref_stride
%define result_ptr
%if ABI_IS_32BIT
pop rbx
pop rdi
pop rsi
pop rbp
%else
%if LIBVPX_YASM_WIN64
pop rsi
RESTORE_XMM
%endif
%endif
ret
%endmacro
%macro PROCESS_16X2X3 5
%if %1==0
movdqa xmm0, XMMWORD PTR [%2]
lddqu xmm5, XMMWORD PTR [%3]
lddqu xmm6, XMMWORD PTR [%3+1]
lddqu xmm7, XMMWORD PTR [%3+2]
psadbw xmm5, xmm0
psadbw xmm6, xmm0
psadbw xmm7, xmm0
%else
movdqa xmm0, XMMWORD PTR [%2]
lddqu xmm1, XMMWORD PTR [%3]
lddqu xmm2, XMMWORD PTR [%3+1]
lddqu xmm3, XMMWORD PTR [%3+2]
psadbw xmm1, xmm0
psadbw xmm2, xmm0
psadbw xmm3, xmm0
paddw xmm5, xmm1
paddw xmm6, xmm2
paddw xmm7, xmm3
%endif
movdqa xmm0, XMMWORD PTR [%2+%4]
lddqu xmm1, XMMWORD PTR [%3+%5]
lddqu xmm2, XMMWORD PTR [%3+%5+1]
lddqu xmm3, XMMWORD PTR [%3+%5+2]
%if %1==0 || %1==1
lea %2, [%2+%4*2]
lea %3, [%3+%5*2]
%endif
psadbw xmm1, xmm0
psadbw xmm2, xmm0
psadbw xmm3, xmm0
paddw xmm5, xmm1
paddw xmm6, xmm2
paddw xmm7, xmm3
%endmacro
%macro PROCESS_8X2X3 5
%if %1==0
movq mm0, QWORD PTR [%2]
movq mm5, QWORD PTR [%3]
movq mm6, QWORD PTR [%3+1]
movq mm7, QWORD PTR [%3+2]
psadbw mm5, mm0
psadbw mm6, mm0
psadbw mm7, mm0
%else
movq mm0, QWORD PTR [%2]
movq mm1, QWORD PTR [%3]
movq mm2, QWORD PTR [%3+1]
movq mm3, QWORD PTR [%3+2]
psadbw mm1, mm0
psadbw mm2, mm0
psadbw mm3, mm0
paddw mm5, mm1
paddw mm6, mm2
paddw mm7, mm3
%endif
movq mm0, QWORD PTR [%2+%4]
movq mm1, QWORD PTR [%3+%5]
movq mm2, QWORD PTR [%3+%5+1]
movq mm3, QWORD PTR [%3+%5+2]
%if %1==0 || %1==1
lea %2, [%2+%4*2]
lea %3, [%3+%5*2]
%endif
psadbw mm1, mm0
psadbw mm2, mm0
psadbw mm3, mm0
paddw mm5, mm1
paddw mm6, mm2
paddw mm7, mm3
%endmacro
%macro LOAD_X4_ADDRESSES 5
mov %2, [%1+REG_SZ_BYTES*0]
mov %3, [%1+REG_SZ_BYTES*1]
mov %4, [%1+REG_SZ_BYTES*2]
mov %5, [%1+REG_SZ_BYTES*3]
%endmacro
%macro PROCESS_16X2X4 8
%if %1==0
movdqa xmm0, XMMWORD PTR [%2]
lddqu xmm4, XMMWORD PTR [%3]
lddqu xmm5, XMMWORD PTR [%4]
lddqu xmm6, XMMWORD PTR [%5]
lddqu xmm7, XMMWORD PTR [%6]
psadbw xmm4, xmm0
psadbw xmm5, xmm0
psadbw xmm6, xmm0
psadbw xmm7, xmm0
%else
movdqa xmm0, XMMWORD PTR [%2]
lddqu xmm1, XMMWORD PTR [%3]
lddqu xmm2, XMMWORD PTR [%4]
lddqu xmm3, XMMWORD PTR [%5]
psadbw xmm1, xmm0
psadbw xmm2, xmm0
psadbw xmm3, xmm0
paddw xmm4, xmm1
lddqu xmm1, XMMWORD PTR [%6]
paddw xmm5, xmm2
paddw xmm6, xmm3
psadbw xmm1, xmm0
paddw xmm7, xmm1
%endif
movdqa xmm0, XMMWORD PTR [%2+%7]
lddqu xmm1, XMMWORD PTR [%3+%8]
lddqu xmm2, XMMWORD PTR [%4+%8]
lddqu xmm3, XMMWORD PTR [%5+%8]
psadbw xmm1, xmm0
psadbw xmm2, xmm0
psadbw xmm3, xmm0
paddw xmm4, xmm1
lddqu xmm1, XMMWORD PTR [%6+%8]
paddw xmm5, xmm2
paddw xmm6, xmm3
%if %1==0 || %1==1
lea %2, [%2+%7*2]
lea %3, [%3+%8*2]
lea %4, [%4+%8*2]
lea %5, [%5+%8*2]
lea %6, [%6+%8*2]
%endif
psadbw xmm1, xmm0
paddw xmm7, xmm1
%endmacro
%macro PROCESS_8X2X4 8
%if %1==0
movq mm0, QWORD PTR [%2]
movq mm4, QWORD PTR [%3]
movq mm5, QWORD PTR [%4]
movq mm6, QWORD PTR [%5]
movq mm7, QWORD PTR [%6]
psadbw mm4, mm0
psadbw mm5, mm0
psadbw mm6, mm0
psadbw mm7, mm0
%else
movq mm0, QWORD PTR [%2]
movq mm1, QWORD PTR [%3]
movq mm2, QWORD PTR [%4]
movq mm3, QWORD PTR [%5]
psadbw mm1, mm0
psadbw mm2, mm0
psadbw mm3, mm0
paddw mm4, mm1
movq mm1, QWORD PTR [%6]
paddw mm5, mm2
paddw mm6, mm3
psadbw mm1, mm0
paddw mm7, mm1
%endif
movq mm0, QWORD PTR [%2+%7]
movq mm1, QWORD PTR [%3+%8]
movq mm2, QWORD PTR [%4+%8]
movq mm3, QWORD PTR [%5+%8]
psadbw mm1, mm0
psadbw mm2, mm0
psadbw mm3, mm0
paddw mm4, mm1
movq mm1, QWORD PTR [%6+%8]
paddw mm5, mm2
paddw mm6, mm3
%if %1==0 || %1==1
lea %2, [%2+%7*2]
lea %3, [%3+%8*2]
lea %4, [%4+%8*2]
lea %5, [%5+%8*2]
lea %6, [%6+%8*2]
%endif
psadbw mm1, mm0
paddw mm7, mm1
%endmacro
;void int vp8_sad16x16x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad16x16x3_sse3) PRIVATE
sym(vp8_sad16x16x3_sse3):
STACK_FRAME_CREATE_X3
PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
mov rcx, result_ptr
movq xmm0, xmm5
psrldq xmm5, 8
paddw xmm0, xmm5
movd [rcx], xmm0
;-
movq xmm0, xmm6
psrldq xmm6, 8
paddw xmm0, xmm6
movd [rcx+4], xmm0
;-
movq xmm0, xmm7
psrldq xmm7, 8
paddw xmm0, xmm7
movd [rcx+8], xmm0
STACK_FRAME_DESTROY_X3
;void int vp8_sad16x8x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad16x8x3_sse3) PRIVATE
sym(vp8_sad16x8x3_sse3):
STACK_FRAME_CREATE_X3
PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
mov rcx, result_ptr
movq xmm0, xmm5
psrldq xmm5, 8
paddw xmm0, xmm5
movd [rcx], xmm0
;-
movq xmm0, xmm6
psrldq xmm6, 8
paddw xmm0, xmm6
movd [rcx+4], xmm0
;-
movq xmm0, xmm7
psrldq xmm7, 8
paddw xmm0, xmm7
movd [rcx+8], xmm0
STACK_FRAME_DESTROY_X3
;void int vp8_sad8x16x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad8x16x3_sse3) PRIVATE
sym(vp8_sad8x16x3_sse3):
STACK_FRAME_CREATE_X3
PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
mov rcx, result_ptr
punpckldq mm5, mm6
movq [rcx], mm5
movd [rcx+8], mm7
STACK_FRAME_DESTROY_X3
;void int vp8_sad8x8x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad8x8x3_sse3) PRIVATE
sym(vp8_sad8x8x3_sse3):
STACK_FRAME_CREATE_X3
PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
mov rcx, result_ptr
punpckldq mm5, mm6
movq [rcx], mm5
movd [rcx+8], mm7
STACK_FRAME_DESTROY_X3
;void int vp8_sad4x4x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad4x4x3_sse3) PRIVATE
sym(vp8_sad4x4x3_sse3):
STACK_FRAME_CREATE_X3
movd mm0, DWORD PTR [src_ptr]
movd mm1, DWORD PTR [ref_ptr]
movd mm2, DWORD PTR [src_ptr+src_stride]
movd mm3, DWORD PTR [ref_ptr+ref_stride]
punpcklbw mm0, mm2
punpcklbw mm1, mm3
movd mm4, DWORD PTR [ref_ptr+1]
movd mm5, DWORD PTR [ref_ptr+2]
movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
movd mm3, DWORD PTR [ref_ptr+ref_stride+2]
psadbw mm1, mm0
punpcklbw mm4, mm2
punpcklbw mm5, mm3
psadbw mm4, mm0
psadbw mm5, mm0
lea src_ptr, [src_ptr+src_stride*2]
lea ref_ptr, [ref_ptr+ref_stride*2]
movd mm0, DWORD PTR [src_ptr]
movd mm2, DWORD PTR [ref_ptr]
movd mm3, DWORD PTR [src_ptr+src_stride]
movd mm6, DWORD PTR [ref_ptr+ref_stride]
punpcklbw mm0, mm3
punpcklbw mm2, mm6
movd mm3, DWORD PTR [ref_ptr+1]
movd mm7, DWORD PTR [ref_ptr+2]
psadbw mm2, mm0
paddw mm1, mm2
movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
movd mm6, DWORD PTR [ref_ptr+ref_stride+2]
punpcklbw mm3, mm2
punpcklbw mm7, mm6
psadbw mm3, mm0
psadbw mm7, mm0
paddw mm3, mm4
paddw mm7, mm5
mov rcx, result_ptr
punpckldq mm1, mm3
movq [rcx], mm1
movd [rcx+8], mm7
STACK_FRAME_DESTROY_X3
;unsigned int vp8_sad16x16_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int max_sad)
;%define lddqu movdqu
global sym(vp8_sad16x16_sse3) PRIVATE
sym(vp8_sad16x16_sse3):
STACK_FRAME_CREATE_X3
mov end_ptr, 4
pxor xmm7, xmm7
.vp8_sad16x16_sse3_loop:
movdqa xmm0, XMMWORD PTR [src_ptr]
movdqu xmm1, XMMWORD PTR [ref_ptr]
movdqa xmm2, XMMWORD PTR [src_ptr+src_stride]
movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride]
lea src_ptr, [src_ptr+src_stride*2]
lea ref_ptr, [ref_ptr+ref_stride*2]
movdqa xmm4, XMMWORD PTR [src_ptr]
movdqu xmm5, XMMWORD PTR [ref_ptr]
movdqa xmm6, XMMWORD PTR [src_ptr+src_stride]
psadbw xmm0, xmm1
movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride]
psadbw xmm2, xmm3
psadbw xmm4, xmm5
psadbw xmm6, xmm1
lea src_ptr, [src_ptr+src_stride*2]
lea ref_ptr, [ref_ptr+ref_stride*2]
paddw xmm7, xmm0
paddw xmm7, xmm2
paddw xmm7, xmm4
paddw xmm7, xmm6
sub end_ptr, 1
jne .vp8_sad16x16_sse3_loop
movq xmm0, xmm7
psrldq xmm7, 8
paddw xmm0, xmm7
movq rax, xmm0
STACK_FRAME_DESTROY_X3
;void vp8_copy32xn_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *dst_ptr,
; int dst_stride,
; int height);
global sym(vp8_copy32xn_sse3) PRIVATE
sym(vp8_copy32xn_sse3):
STACK_FRAME_CREATE_X3
.block_copy_sse3_loopx4:
lea end_ptr, [src_ptr+src_stride*2]
movdqu xmm0, XMMWORD PTR [src_ptr]
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
movdqu xmm4, XMMWORD PTR [end_ptr]
movdqu xmm5, XMMWORD PTR [end_ptr + 16]
movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
lea src_ptr, [src_ptr+src_stride*4]
lea end_ptr, [ref_ptr+ref_stride*2]
movdqa XMMWORD PTR [ref_ptr], xmm0
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
movdqa XMMWORD PTR [end_ptr], xmm4
movdqa XMMWORD PTR [end_ptr + 16], xmm5
movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
lea ref_ptr, [ref_ptr+ref_stride*4]
sub height, 4
cmp height, 4
jge .block_copy_sse3_loopx4
;Check to see if there is more rows need to be copied.
cmp height, 0
je .copy_is_done
.block_copy_sse3_loop:
movdqu xmm0, XMMWORD PTR [src_ptr]
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
lea src_ptr, [src_ptr+src_stride]
movdqa XMMWORD PTR [ref_ptr], xmm0
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
lea ref_ptr, [ref_ptr+ref_stride]
sub height, 1
jne .block_copy_sse3_loop
.copy_is_done:
STACK_FRAME_DESTROY_X3
;void vp8_sad16x16x4d_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr_base,
; int ref_stride,
; int *results)
global sym(vp8_sad16x16x4d_sse3) PRIVATE
sym(vp8_sad16x16x4d_sse3):
STACK_FRAME_CREATE_X4
PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
%if ABI_IS_32BIT
pop rbp
%endif
mov rcx, result_ptr
movq xmm0, xmm4
psrldq xmm4, 8
paddw xmm0, xmm4
movd [rcx], xmm0
;-
movq xmm0, xmm5
psrldq xmm5, 8
paddw xmm0, xmm5
movd [rcx+4], xmm0
;-
movq xmm0, xmm6
psrldq xmm6, 8
paddw xmm0, xmm6
movd [rcx+8], xmm0
;-
movq xmm0, xmm7
psrldq xmm7, 8
paddw xmm0, xmm7
movd [rcx+12], xmm0
STACK_FRAME_DESTROY_X4
;void vp8_sad16x8x4d_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr_base,
; int ref_stride,
; int *results)
global sym(vp8_sad16x8x4d_sse3) PRIVATE
sym(vp8_sad16x8x4d_sse3):
STACK_FRAME_CREATE_X4
PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
%if ABI_IS_32BIT
pop rbp
%endif
mov rcx, result_ptr
movq xmm0, xmm4
psrldq xmm4, 8
paddw xmm0, xmm4
movd [rcx], xmm0
;-
movq xmm0, xmm5
psrldq xmm5, 8
paddw xmm0, xmm5
movd [rcx+4], xmm0
;-
movq xmm0, xmm6
psrldq xmm6, 8
paddw xmm0, xmm6
movd [rcx+8], xmm0
;-
movq xmm0, xmm7
psrldq xmm7, 8
paddw xmm0, xmm7
movd [rcx+12], xmm0
STACK_FRAME_DESTROY_X4
;void int vp8_sad8x16x4d_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad8x16x4d_sse3) PRIVATE
sym(vp8_sad8x16x4d_sse3):
STACK_FRAME_CREATE_X4
PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
%if ABI_IS_32BIT
pop rbp
%endif
mov rcx, result_ptr
punpckldq mm4, mm5
punpckldq mm6, mm7
movq [rcx], mm4
movq [rcx+8], mm6
STACK_FRAME_DESTROY_X4
;void int vp8_sad8x8x4d_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad8x8x4d_sse3) PRIVATE
sym(vp8_sad8x8x4d_sse3):
STACK_FRAME_CREATE_X4
PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
%if ABI_IS_32BIT
pop rbp
%endif
mov rcx, result_ptr
punpckldq mm4, mm5
punpckldq mm6, mm7
movq [rcx], mm4
movq [rcx+8], mm6
STACK_FRAME_DESTROY_X4
;void int vp8_sad4x4x4d_sse3(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
global sym(vp8_sad4x4x4d_sse3) PRIVATE
sym(vp8_sad4x4x4d_sse3):
STACK_FRAME_CREATE_X4
movd mm0, DWORD PTR [src_ptr]
movd mm1, DWORD PTR [r0_ptr]
movd mm2, DWORD PTR [src_ptr+src_stride]
movd mm3, DWORD PTR [r0_ptr+ref_stride]
punpcklbw mm0, mm2
punpcklbw mm1, mm3
movd mm4, DWORD PTR [r1_ptr]
movd mm5, DWORD PTR [r2_ptr]
movd mm6, DWORD PTR [r3_ptr]
movd mm2, DWORD PTR [r1_ptr+ref_stride]
movd mm3, DWORD PTR [r2_ptr+ref_stride]
movd mm7, DWORD PTR [r3_ptr+ref_stride]
psadbw mm1, mm0
punpcklbw mm4, mm2
punpcklbw mm5, mm3
punpcklbw mm6, mm7
psadbw mm4, mm0
psadbw mm5, mm0
psadbw mm6, mm0
lea src_ptr, [src_ptr+src_stride*2]
lea r0_ptr, [r0_ptr+ref_stride*2]
lea r1_ptr, [r1_ptr+ref_stride*2]
lea r2_ptr, [r2_ptr+ref_stride*2]
lea r3_ptr, [r3_ptr+ref_stride*2]
movd mm0, DWORD PTR [src_ptr]
movd mm2, DWORD PTR [r0_ptr]
movd mm3, DWORD PTR [src_ptr+src_stride]
movd mm7, DWORD PTR [r0_ptr+ref_stride]
punpcklbw mm0, mm3
punpcklbw mm2, mm7
movd mm3, DWORD PTR [r1_ptr]
movd mm7, DWORD PTR [r2_ptr]
psadbw mm2, mm0
%if ABI_IS_32BIT
mov rax, rbp
pop rbp
%define ref_stride rax
%endif
mov rsi, result_ptr
paddw mm1, mm2
movd [rsi], mm1
movd mm2, DWORD PTR [r1_ptr+ref_stride]
movd mm1, DWORD PTR [r2_ptr+ref_stride]
punpcklbw mm3, mm2
punpcklbw mm7, mm1
psadbw mm3, mm0
psadbw mm7, mm0
movd mm2, DWORD PTR [r3_ptr]
movd mm1, DWORD PTR [r3_ptr+ref_stride]
paddw mm3, mm4
paddw mm7, mm5
movd [rsi+4], mm3
punpcklbw mm2, mm1
movd [rsi+8], mm7
psadbw mm2, mm0
paddw mm2, mm6
movd [rsi+12], mm2
STACK_FRAME_DESTROY_X4

View File

@@ -1,353 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
%macro PROCESS_16X2X8 1
%if %1
movdqa xmm0, XMMWORD PTR [rsi]
movq xmm1, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
movq xmm2, MMWORD PTR [rdi+16]
punpcklqdq xmm1, xmm3
punpcklqdq xmm3, xmm2
movdqa xmm2, xmm1
mpsadbw xmm1, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
psrldq xmm0, 8
movdqa xmm4, xmm3
mpsadbw xmm3, xmm0, 0x0
mpsadbw xmm4, xmm0, 0x5
paddw xmm1, xmm2
paddw xmm1, xmm3
paddw xmm1, xmm4
%else
movdqa xmm0, XMMWORD PTR [rsi]
movq xmm5, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
movq xmm2, MMWORD PTR [rdi+16]
punpcklqdq xmm5, xmm3
punpcklqdq xmm3, xmm2
movdqa xmm2, xmm5
mpsadbw xmm5, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
psrldq xmm0, 8
movdqa xmm4, xmm3
mpsadbw xmm3, xmm0, 0x0
mpsadbw xmm4, xmm0, 0x5
paddw xmm5, xmm2
paddw xmm5, xmm3
paddw xmm5, xmm4
paddw xmm1, xmm5
%endif
movdqa xmm0, XMMWORD PTR [rsi + rax]
movq xmm5, MMWORD PTR [rdi+ rdx]
movq xmm3, MMWORD PTR [rdi+ rdx+8]
movq xmm2, MMWORD PTR [rdi+ rdx+16]
punpcklqdq xmm5, xmm3
punpcklqdq xmm3, xmm2
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
movdqa xmm2, xmm5
mpsadbw xmm5, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
psrldq xmm0, 8
movdqa xmm4, xmm3
mpsadbw xmm3, xmm0, 0x0
mpsadbw xmm4, xmm0, 0x5
paddw xmm5, xmm2
paddw xmm5, xmm3
paddw xmm5, xmm4
paddw xmm1, xmm5
%endmacro
%macro PROCESS_8X2X8 1
%if %1
movq xmm0, MMWORD PTR [rsi]
movq xmm1, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
punpcklqdq xmm1, xmm3
movdqa xmm2, xmm1
mpsadbw xmm1, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
paddw xmm1, xmm2
%else
movq xmm0, MMWORD PTR [rsi]
movq xmm5, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
punpcklqdq xmm5, xmm3
movdqa xmm2, xmm5
mpsadbw xmm5, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
paddw xmm5, xmm2
paddw xmm1, xmm5
%endif
movq xmm0, MMWORD PTR [rsi + rax]
movq xmm5, MMWORD PTR [rdi+ rdx]
movq xmm3, MMWORD PTR [rdi+ rdx+8]
punpcklqdq xmm5, xmm3
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
movdqa xmm2, xmm5
mpsadbw xmm5, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
paddw xmm5, xmm2
paddw xmm1, xmm5
%endmacro
%macro PROCESS_4X2X8 1
%if %1
movd xmm0, [rsi]
movq xmm1, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
punpcklqdq xmm1, xmm3
mpsadbw xmm1, xmm0, 0x0
%else
movd xmm0, [rsi]
movq xmm5, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
punpcklqdq xmm5, xmm3
mpsadbw xmm5, xmm0, 0x0
paddw xmm1, xmm5
%endif
movd xmm0, [rsi + rax]
movq xmm5, MMWORD PTR [rdi+ rdx]
movq xmm3, MMWORD PTR [rdi+ rdx+8]
punpcklqdq xmm5, xmm3
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
mpsadbw xmm5, xmm0, 0x0
paddw xmm1, xmm5
%endmacro
;void vp8_sad16x16x8_sse4(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array);
global sym(vp8_sad16x16x8_sse4) PRIVATE
sym(vp8_sad16x16x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_16X2X8 1
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
mov rdi, arg(4) ;Results
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_sad16x8x8_sse4(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
global sym(vp8_sad16x8x8_sse4) PRIVATE
sym(vp8_sad16x8x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_16X2X8 1
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
mov rdi, arg(4) ;Results
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_sad8x8x8_sse4(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
global sym(vp8_sad8x8x8_sse4) PRIVATE
sym(vp8_sad8x8x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_8X2X8 1
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
mov rdi, arg(4) ;Results
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_sad8x16x8_sse4(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
global sym(vp8_sad8x16x8_sse4) PRIVATE
sym(vp8_sad8x16x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_8X2X8 1
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
mov rdi, arg(4) ;Results
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_sad4x4x8_c(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
global sym(vp8_sad4x4x8_sse4) PRIVATE
sym(vp8_sad4x4x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_4X2X8 1
PROCESS_4X2X8 0
mov rdi, arg(4) ;Results
movdqa XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret

View File

@@ -13,393 +13,6 @@
%define xmm_filter_shift 7
;unsigned int vp8_get_mb_ss_sse2
;(
; short *src_ptr
;)
global sym(vp8_get_mb_ss_sse2) PRIVATE
sym(vp8_get_mb_ss_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 1
GET_GOT rbx
push rsi
push rdi
sub rsp, 16
; end prolog
mov rax, arg(0) ;[src_ptr]
mov rcx, 8
pxor xmm4, xmm4
.NEXTROW:
movdqa xmm0, [rax]
movdqa xmm1, [rax+16]
movdqa xmm2, [rax+32]
movdqa xmm3, [rax+48]
pmaddwd xmm0, xmm0
pmaddwd xmm1, xmm1
pmaddwd xmm2, xmm2
pmaddwd xmm3, xmm3
paddd xmm0, xmm1
paddd xmm2, xmm3
paddd xmm4, xmm0
paddd xmm4, xmm2
add rax, 0x40
dec rcx
ja .NEXTROW
movdqa xmm3,xmm4
psrldq xmm4,8
paddd xmm4,xmm3
movdqa xmm3,xmm4
psrldq xmm4,4
paddd xmm4,xmm3
movq rax,xmm4
; begin epilog
add rsp, 16
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;unsigned int vp8_get16x16var_sse2
;(
; unsigned char * src_ptr,
; int source_stride,
; unsigned char * ref_ptr,
; int recon_stride,
; unsigned int * SSE,
; int * Sum
;)
global sym(vp8_get16x16var_sse2) PRIVATE
sym(vp8_get16x16var_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM 7
push rbx
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;[src_ptr]
mov rdi, arg(2) ;[ref_ptr]
movsxd rax, DWORD PTR arg(1) ;[source_stride]
movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
; Prefetch data
lea rcx, [rax+rax*2]
prefetcht0 [rsi]
prefetcht0 [rsi+rax]
prefetcht0 [rsi+rax*2]
prefetcht0 [rsi+rcx]
lea rbx, [rsi+rax*4]
prefetcht0 [rbx]
prefetcht0 [rbx+rax]
prefetcht0 [rbx+rax*2]
prefetcht0 [rbx+rcx]
lea rcx, [rdx+rdx*2]
prefetcht0 [rdi]
prefetcht0 [rdi+rdx]
prefetcht0 [rdi+rdx*2]
prefetcht0 [rdi+rcx]
lea rbx, [rdi+rdx*4]
prefetcht0 [rbx]
prefetcht0 [rbx+rdx]
prefetcht0 [rbx+rdx*2]
prefetcht0 [rbx+rcx]
pxor xmm0, xmm0 ; clear xmm0 for unpack
pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
pxor xmm6, xmm6 ; clear xmm6 for accumulating sse
mov rcx, 16
.var16loop:
movdqu xmm1, XMMWORD PTR [rsi]
movdqu xmm2, XMMWORD PTR [rdi]
prefetcht0 [rsi+rax*8]
prefetcht0 [rdi+rdx*8]
movdqa xmm3, xmm1
movdqa xmm4, xmm2
punpcklbw xmm1, xmm0
punpckhbw xmm3, xmm0
punpcklbw xmm2, xmm0
punpckhbw xmm4, xmm0
psubw xmm1, xmm2
psubw xmm3, xmm4
paddw xmm7, xmm1
pmaddwd xmm1, xmm1
paddw xmm7, xmm3
pmaddwd xmm3, xmm3
paddd xmm6, xmm1
paddd xmm6, xmm3
add rsi, rax
add rdi, rdx
sub rcx, 1
jnz .var16loop
movdqa xmm1, xmm6
pxor xmm6, xmm6
pxor xmm5, xmm5
punpcklwd xmm6, xmm7
punpckhwd xmm5, xmm7
psrad xmm5, 16
psrad xmm6, 16
paddd xmm6, xmm5
movdqa xmm2, xmm1
punpckldq xmm1, xmm0
punpckhdq xmm2, xmm0
movdqa xmm7, xmm6
paddd xmm1, xmm2
punpckldq xmm6, xmm0
punpckhdq xmm7, xmm0
paddd xmm6, xmm7
movdqa xmm2, xmm1
movdqa xmm7, xmm6
psrldq xmm1, 8
psrldq xmm6, 8
paddd xmm7, xmm6
paddd xmm1, xmm2
mov rax, arg(5) ;[Sum]
mov rdi, arg(4) ;[SSE]
movd DWORD PTR [rax], xmm7
movd DWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
pop rbx
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
;unsigned int vp8_get8x8var_sse2
;(
; unsigned char * src_ptr,
; int source_stride,
; unsigned char * ref_ptr,
; int recon_stride,
; unsigned int * SSE,
; int * Sum
;)
global sym(vp8_get8x8var_sse2) PRIVATE
sym(vp8_get8x8var_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
sub rsp, 16
; end prolog
mov rsi, arg(0) ;[src_ptr]
mov rdi, arg(2) ;[ref_ptr]
movsxd rax, DWORD PTR arg(1) ;[source_stride]
movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
pxor xmm0, xmm0 ; clear xmm0 for unpack
pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
movq xmm1, QWORD PTR [rsi]
movq xmm2, QWORD PTR [rdi]
punpcklbw xmm1, xmm0
punpcklbw xmm2, xmm0
psubsw xmm1, xmm2
paddw xmm7, xmm1
pmaddwd xmm1, xmm1
movq xmm2, QWORD PTR[rsi + rax]
movq xmm3, QWORD PTR[rdi + rdx]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
psubsw xmm2, xmm3
paddw xmm7, xmm2
pmaddwd xmm2, xmm2
paddd xmm1, xmm2
movq xmm2, QWORD PTR[rsi + rax * 2]
movq xmm3, QWORD PTR[rdi + rdx * 2]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
psubsw xmm2, xmm3
paddw xmm7, xmm2
pmaddwd xmm2, xmm2
paddd xmm1, xmm2
lea rsi, [rsi + rax * 2]
lea rdi, [rdi + rdx * 2]
movq xmm2, QWORD PTR[rsi + rax]
movq xmm3, QWORD PTR[rdi + rdx]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
psubsw xmm2, xmm3
paddw xmm7, xmm2
pmaddwd xmm2, xmm2
paddd xmm1, xmm2
movq xmm2, QWORD PTR[rsi + rax *2]
movq xmm3, QWORD PTR[rdi + rdx *2]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
psubsw xmm2, xmm3
paddw xmm7, xmm2
pmaddwd xmm2, xmm2
paddd xmm1, xmm2
lea rsi, [rsi + rax * 2]
lea rdi, [rdi + rdx * 2]
movq xmm2, QWORD PTR[rsi + rax]
movq xmm3, QWORD PTR[rdi + rdx]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
psubsw xmm2, xmm3
paddw xmm7, xmm2
pmaddwd xmm2, xmm2
paddd xmm1, xmm2
movq xmm2, QWORD PTR[rsi + rax *2]
movq xmm3, QWORD PTR[rdi + rdx *2]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
psubsw xmm2, xmm3
paddw xmm7, xmm2
pmaddwd xmm2, xmm2
paddd xmm1, xmm2
lea rsi, [rsi + rax * 2]
lea rdi, [rdi + rdx * 2]
movq xmm2, QWORD PTR[rsi + rax]
movq xmm3, QWORD PTR[rdi + rdx]
punpcklbw xmm2, xmm0
punpcklbw xmm3, xmm0
psubsw xmm2, xmm3
paddw xmm7, xmm2
pmaddwd xmm2, xmm2
paddd xmm1, xmm2
movdqa xmm6, xmm7
punpcklwd xmm6, xmm0
punpckhwd xmm7, xmm0
movdqa xmm2, xmm1
paddw xmm6, xmm7
punpckldq xmm1, xmm0
punpckhdq xmm2, xmm0
movdqa xmm7, xmm6
paddd xmm1, xmm2
punpckldq xmm6, xmm0
punpckhdq xmm7, xmm0
paddw xmm6, xmm7
movdqa xmm2, xmm1
movdqa xmm7, xmm6
psrldq xmm1, 8
psrldq xmm6, 8
paddw xmm7, xmm6
paddd xmm1, xmm2
mov rax, arg(5) ;[Sum]
mov rdi, arg(4) ;[SSE]
movq rdx, xmm7
movsx rcx, dx
mov dword ptr [rax], ecx
movd DWORD PTR [rdi], xmm1
; begin epilog
add rsp, 16
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
;void vp8_filter_block2d_bil_var_sse2
;(
; unsigned char *ref_ptr,

View File

@@ -8,19 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
#include "vpx_config.h"
#include "vp8/common/variance.h"
#include "vpx_ports/mem.h"
extern unsigned int vp8_get16x16var_sse2
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
);
extern void vp8_half_horiz_vert_variance16x_h_sse2
(
const unsigned char *ref_ptr,

View File

@@ -127,7 +127,7 @@ void vp8_sixtap_predict4x4_mmx
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED(16, unsigned short, FData2[16*16]); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
HFilter = vp8_six_tap_mmx[xoffset];
vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter);
@@ -148,7 +148,7 @@ void vp8_sixtap_predict16x16_mmx
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
@@ -180,7 +180,7 @@ void vp8_sixtap_predict8x8_mmx
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
@@ -206,7 +206,7 @@ void vp8_sixtap_predict8x4_mmx
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
@@ -252,7 +252,7 @@ void vp8_sixtap_predict16x16_sse2
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
@@ -292,7 +292,7 @@ void vp8_sixtap_predict8x8_sse2
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
if (xoffset)
@@ -330,7 +330,7 @@ void vp8_sixtap_predict8x4_sse2
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
const short *HFilter, *VFilter;
if (xoffset)
@@ -432,7 +432,7 @@ void vp8_sixtap_predict16x16_ssse3
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
DECLARE_ALIGNED(16, unsigned char, FData2[24*24]);
if (xoffset)
{
@@ -480,7 +480,7 @@ void vp8_sixtap_predict8x8_ssse3
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
DECLARE_ALIGNED(16, unsigned char, FData2[256]);
if (xoffset)
{
@@ -528,7 +528,7 @@ void vp8_sixtap_predict8x4_ssse3
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
DECLARE_ALIGNED(16, unsigned char, FData2[256]);
if (xoffset)
{
@@ -576,7 +576,7 @@ void vp8_sixtap_predict4x4_ssse3
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
DECLARE_ALIGNED(16, unsigned char, FData2[4*9]);
if (xoffset)
{

View File

@@ -0,0 +1,353 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
%define mmx_filter_shift 7
;void vp8_filter_block2d_bil4x4_var_mmx
;(
; unsigned char *ref_ptr,
; int ref_pixels_per_line,
; unsigned char *src_ptr,
; int src_pixels_per_line,
; unsigned short *HFilter,
; unsigned short *VFilter,
; int *sum,
; unsigned int *sumsquared
;)
global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
sym(vp8_filter_block2d_bil4x4_var_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
GET_GOT rbx
push rsi
push rdi
sub rsp, 16
; end prolog
pxor mm6, mm6 ;
pxor mm7, mm7 ;
mov rax, arg(4) ;HFilter ;
mov rdx, arg(5) ;VFilter ;
mov rsi, arg(0) ;ref_ptr ;
mov rdi, arg(2) ;src_ptr ;
mov rcx, 4 ;
pxor mm0, mm0 ;
movd mm1, [rsi] ;
movd mm3, [rsi+1] ;
punpcklbw mm1, mm0 ;
pmullw mm1, [rax] ;
punpcklbw mm3, mm0 ;
pmullw mm3, [rax+8] ;
paddw mm1, mm3 ;
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
movq mm5, mm1
%if ABI_IS_32BIT
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
%else
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
add rsi, r8
%endif
.filter_block2d_bil4x4_var_mmx_loop:
movd mm1, [rsi] ;
movd mm3, [rsi+1] ;
punpcklbw mm1, mm0 ;
pmullw mm1, [rax] ;
punpcklbw mm3, mm0 ;
pmullw mm3, [rax+8] ;
paddw mm1, mm3 ;
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
movq mm3, mm5 ;
movq mm5, mm1 ;
pmullw mm3, [rdx] ;
pmullw mm1, [rdx+8] ;
paddw mm1, mm3 ;
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
movd mm3, [rdi] ;
punpcklbw mm3, mm0 ;
psubw mm1, mm3 ;
paddw mm6, mm1 ;
pmaddwd mm1, mm1 ;
paddd mm7, mm1 ;
%if ABI_IS_32BIT
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
add rdi, dword ptr arg(3) ;src_pixels_per_line ;
%else
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
movsxd r9, dword ptr arg(3) ;src_pixels_per_line
add rsi, r8
add rdi, r9
%endif
sub rcx, 1 ;
jnz .filter_block2d_bil4x4_var_mmx_loop ;
pxor mm3, mm3 ;
pxor mm2, mm2 ;
punpcklwd mm2, mm6 ;
punpckhwd mm3, mm6 ;
paddd mm2, mm3 ;
movq mm6, mm2 ;
psrlq mm6, 32 ;
paddd mm2, mm6 ;
psrad mm2, 16 ;
movq mm4, mm7 ;
psrlq mm4, 32 ;
paddd mm4, mm7 ;
mov rdi, arg(6) ;sum
mov rsi, arg(7) ;sumsquared
movd dword ptr [rdi], mm2 ;
movd dword ptr [rsi], mm4 ;
; begin epilog
add rsp, 16
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_filter_block2d_bil_var_mmx
;(
; unsigned char *ref_ptr,
; int ref_pixels_per_line,
; unsigned char *src_ptr,
; int src_pixels_per_line,
; unsigned int Height,
; unsigned short *HFilter,
; unsigned short *VFilter,
; int *sum,
; unsigned int *sumsquared
;)
global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
sym(vp8_filter_block2d_bil_var_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
GET_GOT rbx
push rsi
push rdi
sub rsp, 16
; end prolog
pxor mm6, mm6 ;
pxor mm7, mm7 ;
mov rax, arg(5) ;HFilter ;
mov rdx, arg(6) ;VFilter ;
mov rsi, arg(0) ;ref_ptr ;
mov rdi, arg(2) ;src_ptr ;
movsxd rcx, dword ptr arg(4) ;Height ;
pxor mm0, mm0 ;
movq mm1, [rsi] ;
movq mm3, [rsi+1] ;
movq mm2, mm1 ;
movq mm4, mm3 ;
punpcklbw mm1, mm0 ;
punpckhbw mm2, mm0 ;
pmullw mm1, [rax] ;
pmullw mm2, [rax] ;
punpcklbw mm3, mm0 ;
punpckhbw mm4, mm0 ;
pmullw mm3, [rax+8] ;
pmullw mm4, [rax+8] ;
paddw mm1, mm3 ;
paddw mm2, mm4 ;
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
psraw mm2, mmx_filter_shift ;
movq mm5, mm1
packuswb mm5, mm2 ;
%if ABI_IS_32BIT
add rsi, dword ptr arg(1) ;ref_pixels_per_line
%else
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
add rsi, r8
%endif
.filter_block2d_bil_var_mmx_loop:
movq mm1, [rsi] ;
movq mm3, [rsi+1] ;
movq mm2, mm1 ;
movq mm4, mm3 ;
punpcklbw mm1, mm0 ;
punpckhbw mm2, mm0 ;
pmullw mm1, [rax] ;
pmullw mm2, [rax] ;
punpcklbw mm3, mm0 ;
punpckhbw mm4, mm0 ;
pmullw mm3, [rax+8] ;
pmullw mm4, [rax+8] ;
paddw mm1, mm3 ;
paddw mm2, mm4 ;
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
psraw mm2, mmx_filter_shift ;
movq mm3, mm5 ;
movq mm4, mm5 ;
punpcklbw mm3, mm0 ;
punpckhbw mm4, mm0 ;
movq mm5, mm1 ;
packuswb mm5, mm2 ;
pmullw mm3, [rdx] ;
pmullw mm4, [rdx] ;
pmullw mm1, [rdx+8] ;
pmullw mm2, [rdx+8] ;
paddw mm1, mm3 ;
paddw mm2, mm4 ;
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
psraw mm1, mmx_filter_shift ;
psraw mm2, mmx_filter_shift ;
movq mm3, [rdi] ;
movq mm4, mm3 ;
punpcklbw mm3, mm0 ;
punpckhbw mm4, mm0 ;
psubw mm1, mm3 ;
psubw mm2, mm4 ;
paddw mm6, mm1 ;
pmaddwd mm1, mm1 ;
paddw mm6, mm2 ;
pmaddwd mm2, mm2 ;
paddd mm7, mm1 ;
paddd mm7, mm2 ;
%if ABI_IS_32BIT
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
add rdi, dword ptr arg(3) ;src_pixels_per_line ;
%else
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
movsxd r9, dword ptr arg(3) ;src_pixels_per_line ;
add rsi, r8
add rdi, r9
%endif
sub rcx, 1 ;
jnz .filter_block2d_bil_var_mmx_loop ;
pxor mm3, mm3 ;
pxor mm2, mm2 ;
punpcklwd mm2, mm6 ;
punpckhwd mm3, mm6 ;
paddd mm2, mm3 ;
movq mm6, mm2 ;
psrlq mm6, 32 ;
paddd mm2, mm6 ;
psrad mm2, 16 ;
movq mm4, mm7 ;
psrlq mm4, 32 ;
paddd mm4, mm7 ;
mov rdi, arg(7) ;sum
mov rsi, arg(8) ;sumsquared
movd dword ptr [rdi], mm2 ;
movd dword ptr [rsi], mm4 ;
; begin epilog
add rsp, 16
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
;short mmx_bi_rd[4] = { 64, 64, 64, 64};
align 16
mmx_bi_rd:
times 4 dw 64

View File

@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
#include "vpx_config.h"
#include "vp8/common/variance.h"
#include "vpx_ports/mem.h"
@@ -34,25 +35,6 @@ extern void filter_block1d_v6_mmx
short *filter
);
extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr);
extern unsigned int vp8_get8x8var_mmx
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
);
extern unsigned int vp8_get4x4var_mmx
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
);
extern void vp8_filter_block2d_bil4x4_var_mmx
(
const unsigned char *ref_ptr,
@@ -77,127 +59,6 @@ extern void vp8_filter_block2d_bil_var_mmx
unsigned int *sumsquared
);
unsigned int vp8_variance4x4_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 4));
}
unsigned int vp8_variance8x8_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 6));
}
unsigned int vp8_mse16x16_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0, sse1, sse2, sse3, var;
int sum0, sum1, sum2, sum3;
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
var = sse0 + sse1 + sse2 + sse3;
*sse = var;
return var;
}
unsigned int vp8_variance16x16_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0, sse1, sse2, sse3, var;
int sum0, sum1, sum2, sum3, avg;
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
var = sse0 + sse1 + sse2 + sse3;
avg = sum0 + sum1 + sum2 + sum3;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 8));
}
unsigned int vp8_variance16x8_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0, sse1, var;
int sum0, sum1, avg;
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
var = sse0 + sse1;
avg = sum0 + sum1;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 7));
}
unsigned int vp8_variance8x16_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0, sse1, var;
int sum0, sum1, avg;
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
var = sse0 + sse1;
avg = sum0 + sum1;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 7));
}
unsigned int vp8_sub_pixel_variance4x4_mmx
(
const unsigned char *src_ptr,
@@ -286,20 +147,6 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
}
unsigned int vp8_sub_pixel_mse16x16_mmx(
const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
{
vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
return *sse;
}
unsigned int vp8_sub_pixel_variance16x8_mmx
(
const unsigned char *src_ptr,

View File

@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
#include "vpx_config.h"
#include "vp8/common/variance.h"
#include "vpx_ports/mem.h"
@@ -30,38 +31,6 @@ extern void vp8_filter_block2d_bil4x4_var_mmx
unsigned int *sumsquared
);
extern unsigned int vp8_get4x4var_mmx
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
);
unsigned int vp8_get_mb_ss_sse2
(
const short *src_ptr
);
unsigned int vp8_get16x16var_sse2
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
);
unsigned int vp8_get8x8var_sse2
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
);
void vp8_filter_block2d_bil_var_sse2
(
const unsigned char *ref_ptr,
@@ -135,115 +104,6 @@ void vp8_half_vert_variance16x_h_sse2
unsigned int *sumsquared
);
unsigned int vp8_variance4x4_wmt(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 4));
}
unsigned int vp8_variance8x8_wmt
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int var;
int avg;
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 6));
}
unsigned int vp8_variance16x16_wmt
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0;
int sum0;
vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
*sse = sse0;
return (sse0 - (((unsigned int)sum0 * sum0) >> 8));
}
unsigned int vp8_mse16x16_wmt(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0;
int sum0;
vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
*sse = sse0;
return sse0;
}
unsigned int vp8_variance16x8_wmt
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0, sse1, var;
int sum0, sum1, avg;
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
var = sse0 + sse1;
avg = sum0 + sum1;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 7));
}
unsigned int vp8_variance8x16_wmt
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
unsigned int sse0, sse1, var;
int sum0, sum1, avg;
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
var = sse0 + sse1;
avg = sum0 + sum1;
*sse = var;
return (var - (((unsigned int)avg * avg) >> 7));
}
unsigned int vp8_sub_pixel_variance4x4_wmt
(
const unsigned char *src_ptr,
@@ -378,20 +238,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
}
unsigned int vp8_sub_pixel_mse16x16_wmt(
const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse
)
{
vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
return *sse;
}
unsigned int vp8_sub_pixel_variance16x8_wmt
(
const unsigned char *src_ptr,

View File

@@ -259,7 +259,7 @@ static int swap_frame_buffers (VP8_COMMON *cm)
return err;
}
int check_fragments_for_errors(VP8D_COMP *pbi)
static int check_fragments_for_errors(VP8D_COMP *pbi)
{
if (!pbi->ec_active &&
pbi->fragments.count <= 1 && pbi->fragments.sizes[0] == 0)

View File

@@ -1,138 +0,0 @@
;
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_mse16x16_armv6|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
; r0 unsigned char *src_ptr
; r1 int source_stride
; r2 unsigned char *ref_ptr
; r3 int recon_stride
; stack unsigned int *sse
;
;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
; So, we can remove this part of calculation.
|vp8_mse16x16_armv6| PROC
push {r4-r9, lr}
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
mov r12, #16 ; set loop counter to 16 (=block height)
mov r4, #0 ; initialize sse = 0
loop
; 1st 4 pixels
ldr r5, [r0, #0x0] ; load 4 src pixels
ldr r6, [r2, #0x0] ; load 4 ref pixels
mov lr, #0 ; constant zero
usub8 r8, r5, r6 ; calculate difference
pld [r0, r1, lsl #1]
sel r7, r8, lr ; select bytes with positive difference
usub8 r9, r6, r5 ; calculate difference with reversed operands
pld [r2, r3, lsl #1]
sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r5, r7, lr ; calculate sum of positive differences
usad8 r6, r8, lr ; calculate sum of negative differences
orr r8, r8, r7 ; differences of all 4 pixels
ldr r5, [r0, #0x4] ; load 4 src pixels
; calculate sse
uxtb16 r6, r8 ; byte (two pixels) to halfwords
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
; 2nd 4 pixels
ldr r6, [r2, #0x4] ; load 4 ref pixels
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
usub8 r8, r5, r6 ; calculate difference
sel r7, r8, lr ; select bytes with positive difference
usub8 r9, r6, r5 ; calculate difference with reversed operands
sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r5, r7, lr ; calculate sum of positive differences
usad8 r6, r8, lr ; calculate sum of negative differences
orr r8, r8, r7 ; differences of all 4 pixels
ldr r5, [r0, #0x8] ; load 4 src pixels
; calculate sse
uxtb16 r6, r8 ; byte (two pixels) to halfwords
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
; 3rd 4 pixels
ldr r6, [r2, #0x8] ; load 4 ref pixels
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
usub8 r8, r5, r6 ; calculate difference
sel r7, r8, lr ; select bytes with positive difference
usub8 r9, r6, r5 ; calculate difference with reversed operands
sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r5, r7, lr ; calculate sum of positive differences
usad8 r6, r8, lr ; calculate sum of negative differences
orr r8, r8, r7 ; differences of all 4 pixels
ldr r5, [r0, #0xc] ; load 4 src pixels
; calculate sse
uxtb16 r6, r8 ; byte (two pixels) to halfwords
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
; 4th 4 pixels
ldr r6, [r2, #0xc] ; load 4 ref pixels
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
usub8 r8, r5, r6 ; calculate difference
add r0, r0, r1 ; set src_ptr to next row
sel r7, r8, lr ; select bytes with positive difference
usub8 r9, r6, r5 ; calculate difference with reversed operands
add r2, r2, r3 ; set dst_ptr to next row
sel r8, r9, lr ; select bytes with negative difference
; calculate partial sums
usad8 r5, r7, lr ; calculate sum of positive differences
usad8 r6, r8, lr ; calculate sum of negative differences
orr r8, r8, r7 ; differences of all 4 pixels
subs r12, r12, #1 ; next row
; calculate sse
uxtb16 r6, r8 ; byte (two pixels) to halfwords
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
bne loop
; return stuff
ldr r1, [sp, #28] ; get address of sse
mov r0, r4 ; return sse
str r4, [r1] ; store sse
pop {r4-r9, pc}
ENDP
END

View File

@@ -1,131 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
unsigned int vp8_mse16x16_neon(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
int i;
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
int64x1_t d0s64;
uint8x16_t q0u8, q1u8, q2u8, q3u8;
int32x4_t q7s32, q8s32, q9s32, q10s32;
uint16x8_t q11u16, q12u16, q13u16, q14u16;
int64x2_t q1s64;
q7s32 = vdupq_n_s32(0);
q8s32 = vdupq_n_s32(0);
q9s32 = vdupq_n_s32(0);
q10s32 = vdupq_n_s32(0);
for (i = 0; i < 8; i++) { // mse16x16_neon_loop
q0u8 = vld1q_u8(src_ptr);
src_ptr += source_stride;
q1u8 = vld1q_u8(src_ptr);
src_ptr += source_stride;
q2u8 = vld1q_u8(ref_ptr);
ref_ptr += recon_stride;
q3u8 = vld1q_u8(ref_ptr);
ref_ptr += recon_stride;
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
q7s32 = vmlal_s16(q7s32, d22s16, d22s16);
q8s32 = vmlal_s16(q8s32, d23s16, d23s16);
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
q7s32 = vmlal_s16(q7s32, d26s16, d26s16);
q8s32 = vmlal_s16(q8s32, d27s16, d27s16);
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
}
q7s32 = vaddq_s32(q7s32, q8s32);
q9s32 = vaddq_s32(q9s32, q10s32);
q10s32 = vaddq_s32(q7s32, q9s32);
q1s64 = vpaddlq_s32(q10s32);
d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);
return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
}
unsigned int vp8_get4x4sse_cs_neon(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride) {
int16x4_t d22s16, d24s16, d26s16, d28s16;
int64x1_t d0s64;
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
int32x4_t q7s32, q8s32, q9s32, q10s32;
uint16x8_t q11u16, q12u16, q13u16, q14u16;
int64x2_t q1s64;
d0u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d4u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
d1u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d5u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
d2u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d6u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
d3u8 = vld1_u8(src_ptr);
src_ptr += source_stride;
d7u8 = vld1_u8(ref_ptr);
ref_ptr += recon_stride;
q11u16 = vsubl_u8(d0u8, d4u8);
q12u16 = vsubl_u8(d1u8, d5u8);
q13u16 = vsubl_u8(d2u8, d6u8);
q14u16 = vsubl_u8(d3u8, d7u8);
d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));
d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));
d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));
d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));
q7s32 = vmull_s16(d22s16, d22s16);
q8s32 = vmull_s16(d24s16, d24s16);
q9s32 = vmull_s16(d26s16, d26s16);
q10s32 = vmull_s16(d28s16, d28s16);
q7s32 = vaddq_s32(q7s32, q8s32);
q9s32 = vaddq_s32(q9s32, q10s32);
q9s32 = vaddq_s32(q7s32, q9s32);
q1s64 = vpaddlq_s32(q9s32);
d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
}

View File

@@ -11,6 +11,8 @@
#include <math.h>
#include "./vp8_rtcd.h"
void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
{
int i;

View File

@@ -11,6 +11,7 @@
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "encodemb.h"
#include "encodemv.h"
#include "vp8/common/common.h"
@@ -90,7 +91,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
* lambda using a non-linear combination (e.g., the smallest, or second
* smallest, etc.).
*/
act = vp8_variance16x16(x->src.y_buffer,
act = vpx_variance16x16(x->src.y_buffer,
x->src.y_stride, VP8_VAR_OFFS, 0, &sse);
act = act<<4;

View File

@@ -11,6 +11,7 @@
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "quantize.h"
#include "vp8/common/reconintra4x4.h"
#include "encodemb.h"
@@ -44,7 +45,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
}
}
intra_pred_var = vp8_get_mb_ss(x->src_diff);
intra_pred_var = vpx_get_mb_ss(x->src_diff);
return intra_pred_var;
}

View File

@@ -19,8 +19,6 @@
extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip);
extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
static THREAD_FUNCTION thread_loopfilter(void *p_data)
{
VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1);

View File

@@ -12,6 +12,7 @@
#include <limits.h>
#include <stdio.h>
#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "block.h"
#include "onyx_int.h"
@@ -34,8 +35,6 @@
/* #define OUTPUT_FPF 1 */
extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi);
extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv);
extern void vp8_alloc_compressor_data(VP8_COMP *cpi);
#define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q]
extern int vp8_kf_boost_qadjustment[QINDEX_RANGE];
@@ -424,14 +423,14 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x,
/* Set up pointers for this macro block raw buffer */
raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset
+ d->offset);
vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride,
(unsigned int *)(raw_motion_err));
vpx_mse16x16(src_ptr, src_stride, raw_ptr, raw_stride,
(unsigned int *)(raw_motion_err));
/* Set up pointers for this macro block recon buffer */
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset );
vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride,
(unsigned int *)(best_motion_err));
vpx_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride,
(unsigned int *)(best_motion_err));
}
static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
@@ -455,7 +454,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
int new_mv_mode_penalty = 256;
/* override the default variance function to use MSE */
v_fn_ptr.vf = vp8_mse16x16;
v_fn_ptr.vf = vpx_mse16x16;
/* Set up pointers for this macro block recon buffer */
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
@@ -1329,8 +1328,6 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
return Q;
}
extern void vp8_new_framerate(VP8_COMP *cpi, double framerate);
void vp8_init_second_pass(VP8_COMP *cpi)
{
FIRSTPASS_STATS this_frame;

View File

@@ -9,6 +9,8 @@
*/
#include "./vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "onyx_int.h"
#include "mcomp.h"
#include "vpx_mem/vpx_mem.h"
@@ -900,7 +902,7 @@ int vp8_hex_search
this_offset = base_offset + (br * (pre_stride)) + bc;
this_mv.as_mv.row = br;
this_mv.as_mv.col = bc;
bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride)
+ mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
#if CONFIG_MULTI_RES_ENCODING
@@ -927,7 +929,7 @@ int vp8_hex_search
this_mv.as_mv.row = br + hex[i].row;
this_mv.as_mv.col = bc + hex[i].col;
this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}else
@@ -938,7 +940,7 @@ int vp8_hex_search
this_mv.as_mv.col = bc + hex[i].col;
CHECK_POINT
this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}
@@ -964,7 +966,7 @@ int vp8_hex_search
this_mv.as_mv.row = br + next_chkpts[k][i].row;
this_mv.as_mv.col = bc + next_chkpts[k][i].col;
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}else
@@ -975,7 +977,7 @@ int vp8_hex_search
this_mv.as_mv.col = bc + next_chkpts[k][i].col;
CHECK_POINT
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}
@@ -1006,7 +1008,7 @@ cal_neighbors:
this_mv.as_mv.row = br + neighbors[i].row;
this_mv.as_mv.col = bc + neighbors[i].col;
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}else
@@ -1017,7 +1019,7 @@ cal_neighbors:
this_mv.as_mv.col = bc + neighbors[i].col;
CHECK_POINT
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
CHECK_BETTER
}
}
@@ -1101,7 +1103,7 @@ int vp8_diamond_search_sad_c
best_address = in_what;
/* Check the starting position */
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* search_param determines the length of the initial step and hence
@@ -1126,7 +1128,7 @@ int vp8_diamond_search_sad_c
{
check_here = ss[i].offset + best_address;
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
if (thissad < bestsad)
{
@@ -1225,7 +1227,7 @@ int vp8_diamond_search_sadx4
best_address = in_what;
/* Check the starting position */
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* search_param determines the length of the initial step and hence the
@@ -1293,7 +1295,7 @@ int vp8_diamond_search_sadx4
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
{
check_here = ss[i].offset + best_address;
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
if (thissad < bestsad)
{
@@ -1376,8 +1378,7 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
best_mv->as_mv.col = ref_col;
/* Baseline value at the centre */
bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
in_what_stride, UINT_MAX)
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* Apply further limits to prevent us looking using vectors that
@@ -1402,7 +1403,7 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
for (c = col_min; c < col_max; c++)
{
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
this_mv.as_mv.col = c;
thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
@@ -1474,8 +1475,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
best_mv->as_mv.col = ref_col;
/* Baseline value at the centre */
bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
in_what_stride, UINT_MAX)
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* Apply further limits to prevent us looking using vectors that stretch
@@ -1531,7 +1531,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
while (c < col_max)
{
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
if (thissad < bestsad)
{
@@ -1590,7 +1590,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
int col_min = ref_col - distance;
int col_max = ref_col + distance;
DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
// TODO(johannkoenig): check if this alignment is necessary.
DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
unsigned int sad_array[3];
int *mvsadcost[2];
@@ -1609,8 +1610,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
best_mv->as_mv.col = ref_col;
/* Baseline value at the centre */
bestsad = fn_ptr->sdf(what, what_stride,
bestaddress, in_what_stride, UINT_MAX)
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
/* Apply further limits to prevent us looking using vectors that stretch
@@ -1696,7 +1696,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
while (c < col_max)
{
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
if (thissad < bestsad)
{
@@ -1754,8 +1754,7 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
bestsad = fn_ptr->sdf(what, what_stride, best_address,
in_what_stride, UINT_MAX)
bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
for (i=0; i<search_range; i++)
@@ -1771,7 +1770,7 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
{
check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
if (thissad < bestsad)
{
@@ -1834,8 +1833,7 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
bestsad = fn_ptr->sdf(what, what_stride, best_address,
in_what_stride, UINT_MAX)
bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
for (i=0; i<search_range; i++)
@@ -1886,7 +1884,7 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
{
check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
if (thissad < bestsad)
{

View File

@@ -10,6 +10,7 @@
#include "vp8/common/blockd.h"
#include "modecosts.h"
#include "onyx_int.h"
#include "treewriter.h"
#include "vp8/common/entropymode.h"

View File

@@ -16,7 +16,9 @@
extern "C" {
#endif
void vp8_init_mode_costs(VP8_COMP *x);
struct VP8_COMP;
void vp8_init_mode_costs(struct VP8_COMP *x);
#ifdef __cplusplus
} // extern "C"

View File

@@ -11,6 +11,7 @@
#include "vpx_config.h"
#include "./vpx_scale_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "./vp8_rtcd.h"
#include "vp8/common/onyxc_int.h"
#include "vp8/common/blockd.h"
@@ -586,7 +587,8 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
// Turn-off under certain conditions (i.e., away from key frame, and if
// we are at good quality (low Q) and most of the blocks were skipped-encoded
// in previous frame.
if (Q >= 100) {
int qp_thresh = (cpi->oxcf.screen_content_mode == 2) ? 80 : 100;
if (Q >= qp_thresh) {
cpi->cyclic_refresh_mode_max_mbs_perframe =
(cpi->common.mb_rows * cpi->common.mb_cols) / 10;
} else if (cpi->frames_since_key > 250 &&
@@ -2010,6 +2012,8 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->source_alt_ref_active = 0;
cpi->common.refresh_alt_ref_frame = 0;
cpi->force_maxqp = 0;
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
#if CONFIG_INTERNAL_STATS
cpi->b_calculate_ssimg = 0;
@@ -2126,55 +2130,55 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
}
#endif
cpi->fn_ptr[BLOCK_16X16].sdf = vp8_sad16x16;
cpi->fn_ptr[BLOCK_16X16].vf = vp8_variance16x16;
cpi->fn_ptr[BLOCK_16X16].sdf = vpx_sad16x16;
cpi->fn_ptr[BLOCK_16X16].vf = vpx_variance16x16;
cpi->fn_ptr[BLOCK_16X16].svf = vp8_sub_pixel_variance16x16;
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h = vp8_variance_halfpixvar16x16_h;
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = vp8_variance_halfpixvar16x16_v;
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = vp8_variance_halfpixvar16x16_hv;
cpi->fn_ptr[BLOCK_16X16].sdx3f = vp8_sad16x16x3;
cpi->fn_ptr[BLOCK_16X16].sdx8f = vp8_sad16x16x8;
cpi->fn_ptr[BLOCK_16X16].sdx4df = vp8_sad16x16x4d;
cpi->fn_ptr[BLOCK_16X16].sdx3f = vpx_sad16x16x3;
cpi->fn_ptr[BLOCK_16X16].sdx8f = vpx_sad16x16x8;
cpi->fn_ptr[BLOCK_16X16].sdx4df = vpx_sad16x16x4d;
cpi->fn_ptr[BLOCK_16X8].sdf = vp8_sad16x8;
cpi->fn_ptr[BLOCK_16X8].vf = vp8_variance16x8;
cpi->fn_ptr[BLOCK_16X8].sdf = vpx_sad16x8;
cpi->fn_ptr[BLOCK_16X8].vf = vpx_variance16x8;
cpi->fn_ptr[BLOCK_16X8].svf = vp8_sub_pixel_variance16x8;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_16X8].sdx3f = vp8_sad16x8x3;
cpi->fn_ptr[BLOCK_16X8].sdx8f = vp8_sad16x8x8;
cpi->fn_ptr[BLOCK_16X8].sdx4df = vp8_sad16x8x4d;
cpi->fn_ptr[BLOCK_16X8].sdx3f = vpx_sad16x8x3;
cpi->fn_ptr[BLOCK_16X8].sdx8f = vpx_sad16x8x8;
cpi->fn_ptr[BLOCK_16X8].sdx4df = vpx_sad16x8x4d;
cpi->fn_ptr[BLOCK_8X16].sdf = vp8_sad8x16;
cpi->fn_ptr[BLOCK_8X16].vf = vp8_variance8x16;
cpi->fn_ptr[BLOCK_8X16].sdf = vpx_sad8x16;
cpi->fn_ptr[BLOCK_8X16].vf = vpx_variance8x16;
cpi->fn_ptr[BLOCK_8X16].svf = vp8_sub_pixel_variance8x16;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_8X16].sdx3f = vp8_sad8x16x3;
cpi->fn_ptr[BLOCK_8X16].sdx8f = vp8_sad8x16x8;
cpi->fn_ptr[BLOCK_8X16].sdx4df = vp8_sad8x16x4d;
cpi->fn_ptr[BLOCK_8X16].sdx3f = vpx_sad8x16x3;
cpi->fn_ptr[BLOCK_8X16].sdx8f = vpx_sad8x16x8;
cpi->fn_ptr[BLOCK_8X16].sdx4df = vpx_sad8x16x4d;
cpi->fn_ptr[BLOCK_8X8].sdf = vp8_sad8x8;
cpi->fn_ptr[BLOCK_8X8].vf = vp8_variance8x8;
cpi->fn_ptr[BLOCK_8X8].sdf = vpx_sad8x8;
cpi->fn_ptr[BLOCK_8X8].vf = vpx_variance8x8;
cpi->fn_ptr[BLOCK_8X8].svf = vp8_sub_pixel_variance8x8;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_8X8].sdx3f = vp8_sad8x8x3;
cpi->fn_ptr[BLOCK_8X8].sdx8f = vp8_sad8x8x8;
cpi->fn_ptr[BLOCK_8X8].sdx4df = vp8_sad8x8x4d;
cpi->fn_ptr[BLOCK_8X8].sdx3f = vpx_sad8x8x3;
cpi->fn_ptr[BLOCK_8X8].sdx8f = vpx_sad8x8x8;
cpi->fn_ptr[BLOCK_8X8].sdx4df = vpx_sad8x8x4d;
cpi->fn_ptr[BLOCK_4X4].sdf = vp8_sad4x4;
cpi->fn_ptr[BLOCK_4X4].vf = vp8_variance4x4;
cpi->fn_ptr[BLOCK_4X4].sdf = vpx_sad4x4;
cpi->fn_ptr[BLOCK_4X4].vf = vpx_variance4x4;
cpi->fn_ptr[BLOCK_4X4].svf = vp8_sub_pixel_variance4x4;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_4X4].sdx3f = vp8_sad4x4x3;
cpi->fn_ptr[BLOCK_4X4].sdx8f = vp8_sad4x4x8;
cpi->fn_ptr[BLOCK_4X4].sdx4df = vp8_sad4x4x4d;
cpi->fn_ptr[BLOCK_4X4].sdx3f = vpx_sad4x4x3;
cpi->fn_ptr[BLOCK_4X4].sdx8f = vpx_sad4x4x8;
cpi->fn_ptr[BLOCK_4X4].sdx4df = vpx_sad4x4x4d;
#if ARCH_X86 || ARCH_X86_64
cpi->fn_ptr[BLOCK_16X16].copymem = vp8_copy32xn;
@@ -2554,7 +2558,7 @@ static uint64_t calc_plane_error(unsigned char *orig, int orig_stride,
{
unsigned int sse;
vp8_mse16x16(orig + col, orig_stride,
vpx_mse16x16(orig + col, orig_stride,
recon + col, recon_stride,
&sse);
total_sse += sse;
@@ -3380,7 +3384,7 @@ static int measure_square_diff_partial(YV12_BUFFER_CONFIG *source,
int index = block_index_row + (j >> 4);
if (cpi->consec_zero_last[index] >= min_consec_zero_last) {
unsigned int sse;
Total += vp8_mse16x16(src + j,
Total += vpx_mse16x16(src + j,
source->y_stride,
dst + j, dest->y_stride,
&sse);
@@ -3444,7 +3448,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
int index = block_index_row + (j >> 4);
if (cpi->consec_zero_last[index] >= min_consec_zero_last) {
unsigned int sse;
const unsigned int var = vp8_variance16x16(src + j,
const unsigned int var = vpx_variance16x16(src + j,
ystride,
dst + j,
ystride,
@@ -3454,7 +3458,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
// is small (to avoid effects from lighting change).
if ((sse - var) < 128) {
unsigned int sse2;
const unsigned int act = vp8_variance16x16(src + j,
const unsigned int act = vpx_variance16x16(src + j,
ystride,
const_source,
0,
@@ -4183,7 +4187,10 @@ static void encode_frame_to_data_rate
*/
if (cpi->cyclic_refresh_mode_enabled)
{
if (cpi->current_layer==0)
// Special case for screen_content_mode with golden frame updates.
int disable_cr_gf = (cpi->oxcf.screen_content_mode == 2 &&
cm->refresh_golden_frame);
if (cpi->current_layer == 0 && cpi->force_maxqp == 0 && !disable_cr_gf)
cyclic_background_refresh(cpi, Q, 0);
else
disable_segmentation(cpi);
@@ -4405,6 +4412,11 @@ static void encode_frame_to_data_rate
/* transform / motion compensation build reconstruction frame */
vp8_encode_frame(cpi);
if (cpi->oxcf.screen_content_mode == 2) {
if (vp8_drop_encodedframe_overshoot(cpi, Q))
return;
}
cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi);
cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0;
#endif
@@ -5981,7 +5993,8 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest)
for (j = 0; j < source->y_width; j += 16)
{
unsigned int sse;
Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse);
Total += vpx_mse16x16(src + j, source->y_stride,
dst + j, dest->y_stride, &sse);
}
src += 16 * source->y_stride;

View File

@@ -526,6 +526,8 @@ typedef struct VP8_COMP
// Measure of average squared difference between source and denoised signal.
int mse_source_denoised;
int force_maxqp;
#if CONFIG_MULTITHREAD
/* multithread data */
int * mt_current_mb_col;
@@ -714,6 +716,11 @@ typedef struct VP8_COMP
} rd_costs;
} VP8_COMP;
void vp8_alloc_compressor_data(VP8_COMP *cpi);
int vp8_reverse_trans(int x);
void vp8_new_framerate(VP8_COMP *cpi, double framerate);
void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest,
unsigned char *dest_end, unsigned long *size);

View File

@@ -11,6 +11,7 @@
#include <limits.h>
#include "vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "onyx_int.h"
#include "modecosts.h"
#include "encodeintra.h"
@@ -29,8 +30,6 @@
#include "denoising.h"
#endif
extern int VP8_UVSSE(MACROBLOCK *x);
#ifdef SPEEDSTATS
extern unsigned int cnt_pm;
#endif
@@ -38,8 +37,6 @@ extern unsigned int cnt_pm;
extern const int vp8_ref_frame_order[MAX_MODES];
extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES];
extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
// Fixed point implementation of a skin color classifier. Skin color
// is model by a Gaussian distribution in the CbCr color space.
// See ../../test/skin_color_detector_test.cc where the reference
@@ -219,33 +216,6 @@ int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
}
unsigned int vp8_get4x4sse_cs_c
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride
)
{
int distortion = 0;
int r, c;
for (r = 0; r < 4; r++)
{
for (c = 0; c < 4; c++)
{
int diff = src_ptr[c] - ref_ptr[c];
distortion += diff * diff;
}
src_ptr += source_stride;
ref_ptr += recon_stride;
}
return distortion;
}
static int get_prediction_error(BLOCK *be, BLOCKD *b)
{
unsigned char *sptr;
@@ -253,7 +223,7 @@ static int get_prediction_error(BLOCK *be, BLOCKD *b)
sptr = (*(be->base_src) + be->src);
dptr = b->predictor;
return vp8_get4x4sse_cs(sptr, be->src_stride, dptr, 16);
return vpx_get4x4sse_cs(sptr, be->src_stride, dptr, 16);
}
@@ -1041,7 +1011,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
else
{
rate2 += rate;
distortion2 = vp8_variance16x16(
distortion2 = vpx_variance16x16(
*(b->base_src), b->src_stride,
x->e_mbd.predictor, 16, &sse);
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
@@ -1070,7 +1040,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
xd->dst.y_stride,
xd->predictor,
16);
distortion2 = vp8_variance16x16
distortion2 = vpx_variance16x16
(*(b->base_src), b->src_stride,
x->e_mbd.predictor, 16, &sse);
rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
@@ -1551,7 +1521,7 @@ void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_)
xd->dst.y_stride,
xd->predictor,
16);
distortion = vp8_variance16x16
distortion = vpx_variance16x16
(*(b->base_src), b->src_stride, xd->predictor, 16, &sse);
rate = x->mbmode_cost[xd->frame_type][mode];
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);

Some files were not shown because too many files have changed in this diff Show More