vpx/vp8/encoder/arm/variance_arm.c

/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "vpx_config.h"
#include "vp8/encoder/variance.h"
#include "vp8/common/filter.h"
#include "vp8/common/arm/bilinearfilter_arm.h"

#if CONFIG_SIXTEENTH_SUBPEL_UV
#define HALFNDX 8
#else
#define HALFNDX 4
#endif

#if HAVE_ARMV6

unsigned int vp8_sub_pixel_variance8x8_armv6
(
    const unsigned char  *src_ptr,
    int  src_pixels_per_line,
    int  xoffset,
    int  yoffset,
    const unsigned char *dst_ptr,
    int dst_pixels_per_line,
    unsigned int *sse
)
{
    unsigned short first_pass[10*8];
    unsigned char  second_pass[8*8];
    const short *HFilter, *VFilter;

    HFilter = vp8_bilinear_filters[xoffset];
    VFilter = vp8_bilinear_filters[yoffset];

    vp8_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass,
                                            src_pixels_per_line,
                                            9, 8, HFilter);
    vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
                                             8, 8, 8, VFilter);

    return vp8_variance8x8_armv6(second_pass, 8, dst_ptr,
                                   dst_pixels_per_line, sse);
}

unsigned int vp8_sub_pixel_variance16x16_armv6
(
    const unsigned char  *src_ptr,
    int  src_pixels_per_line,
    int  xoffset,
    int  yoffset,
    const unsigned char *dst_ptr,
    int dst_pixels_per_line,
    unsigned int *sse
)
{
    unsigned short first_pass[36*16];
    unsigned char  second_pass[20*16];
    const short *HFilter, *VFilter;
    unsigned int var;

    if (xoffset == HALFNDX && yoffset == 0)
    {
        var = vp8_variance_halfpixvar16x16_h_armv6(src_ptr, src_pixels_per_line,
                                                   dst_ptr, dst_pixels_per_line, sse);
    }
    else if (xoffset == 0 && yoffset == HALFNDX)
    {
        var = vp8_variance_halfpixvar16x16_v_armv6(src_ptr, src_pixels_per_line,
                                                   dst_ptr, dst_pixels_per_line, sse);
    }
    else if (xoffset == HALFNDX && yoffset == HALFNDX)
    {
        var = vp8_variance_halfpixvar16x16_hv_armv6(src_ptr, src_pixels_per_line,
                                                   dst_ptr, dst_pixels_per_line, sse);
    }
    else
    {
        HFilter = vp8_bilinear_filters[xoffset];
        VFilter = vp8_bilinear_filters[yoffset];

        vp8_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass,
                                                src_pixels_per_line,
                                                17, 16, HFilter);
        vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
                                                 16, 16, 16, VFilter);

        var = vp8_variance16x16_armv6(second_pass, 16, dst_ptr,
                                       dst_pixels_per_line, sse);
    }
    return var;
}

#endif /* HAVE_ARMV6 */


#if HAVE_ARMV7

unsigned int vp8_sub_pixel_variance16x16_neon
(
    const unsigned char  *src_ptr,
    int  src_pixels_per_line,
    int  xoffset,
    int  yoffset,
    const unsigned char *dst_ptr,
    int dst_pixels_per_line,
    unsigned int *sse
)
{
  if (xoffset == HALFNDX && yoffset == 0)
    return vp8_variance_halfpixvar16x16_h_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
  else if (xoffset == 0 && yoffset == HALFNDX)
    return vp8_variance_halfpixvar16x16_v_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
  else if (xoffset == HALFNDX && yoffset == HALFNDX)
    return vp8_variance_halfpixvar16x16_hv_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
  else
    return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
}

#endif
move new neon subpixel function previously wasn't guarded with ifdef ARMV7, causing a link error with ARMV6 Change-Id: I0526858be0b5f49b2bf11e9090180b2a6c48926d 2011-01-25 21:11:39 +01:00			`/*`
			`* Copyright (c) 2010 The WebM project authors. All Rights Reserved.`
			`*`
			`* Use of this source code is governed by a BSD-style license`
			`* that can be found in the LICENSE file in the root of the source`
			`* tree. An additional intellectual property rights grant can be found`
			`* in the file PATENTS. All contributing project authors may`
			`* be found in the AUTHORS file in the root of the source tree.`
			`*/`

			`#include "vpx_config.h"`
Fix relative include paths Allow compiling without adding vp8/{common,encoder,decoder} to the include paths. Change-Id: Ifeb5dac351cdfadcd659736f5158b315a0030b6c 2011-02-10 20:41:38 +01:00			`#include "vp8/encoder/variance.h"`
			`#include "vp8/common/filter.h"`
			`#include "vp8/common/arm/bilinearfilter_arm.h"`
Adds armv6 optimized variance calculation Adds vp8_sub_pixel_variance16x16_armv6 function to encoder. Integrates ARMv6 optimized bilinear interpolations from vp8/common/arm/armv6 and adds new assembly file for variance16x16 calculation. - vp8_filter_block2d_bil_first_pass_armv6 (integrated) - vp8_filter_block2d_bil_second_pass_armv6 (integrated) - vp8_variance16x16_armv6 (new) - bilinearfilter_arm.h (new) Change-Id: I18a8331ce7d031ceedd6cd415ecacb0c8f3392db 2011-02-09 15:34:56 +01:00
Supporting high precision 1/8-pel motion vectors This is the initial patch for supporting 1/8th pel motion. Currently if we configure with enable-high-precision-mv, all motion vectors would default to 1/8 pel. Encode and decode syncs fine with the current code. In the next phase the code will be refactored so that we can choose the 1/8 pel mode adaptively at a frame/segment/mb level. Derf results: http://www.corp.google.com/~debargha/vp8_results/enhinterp_hpmv.html (about 0.83% better than 8-tap interpoaltion) Patch 3: Rebased. Also adding 1/16th pel interpolation for U and V Patch 4: HD results. http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd_hpmv.html Seems impressive (unless I am doing something wrong). Patch 5: Added mmx/sse for bilateral filtering, as well as enforced use of c-versions of subpel filters with 8-taps and 1/16th pel; Also redesigned the 8-tap filters to reduce the cut-off in order to introduce a denoising effect. There is a new configure option sixteenth-subpel-uv which will use 1/16 th pel interpolation for uv, if the motion vectors have 1/8 pel accuracy. With the fixes the results are promising on the derf set. The enhanced interpolation option with 8-taps alone gives 3% improvement over thei derf set: http://www.corp.google.com/~debargha/vp8_results/enhinterpn.html Results on high precision mv and on the hd set are to follow. Patch 6: Adding a missing condition for CONFIG_SIXTEENTH_SUBPEL_UV in vp8/common/x86/x86_systemdependent.c Patch 7: Cleaning up various debug messages. Patch 8: Merge conflict Change-Id: I5b1d844457aefd7414a9e4e0e06c6ed38fd8cc04 2012-02-16 18:29:54 +01:00			`#if CONFIG_SIXTEENTH_SUBPEL_UV`
			`#define HALFNDX 8`
			`#else`
			`#define HALFNDX 4`
			`#endif`

Adds armv6 optimized variance calculation Adds vp8_sub_pixel_variance16x16_armv6 function to encoder. Integrates ARMv6 optimized bilinear interpolations from vp8/common/arm/armv6 and adds new assembly file for variance16x16 calculation. - vp8_filter_block2d_bil_first_pass_armv6 (integrated) - vp8_filter_block2d_bil_second_pass_armv6 (integrated) - vp8_variance16x16_armv6 (new) - bilinearfilter_arm.h (new) Change-Id: I18a8331ce7d031ceedd6cd415ecacb0c8f3392db 2011-02-09 15:34:56 +01:00			`#if HAVE_ARMV6`

Add vp8_variance8x8_armv6 and vp8_sub_pixel_variance8x8_armv6 functions Change-Id: I08edaffc62514907fa5e90e1689269e467c857f5 2011-03-09 13:26:24 +01:00			`unsigned int vp8_sub_pixel_variance8x8_armv6`
			`(`
			`const unsigned char *src_ptr,`
			`int src_pixels_per_line,`
			`int xoffset,`
			`int yoffset,`
			`const unsigned char *dst_ptr,`
			`int dst_pixels_per_line,`
			`unsigned int *sse`
			`)`
			`{`
			`unsigned short first_pass[10*8];`
			`unsigned char second_pass[8*8];`
			`const short HFilter, VFilter;`

			`HFilter = vp8_bilinear_filters[xoffset];`
			`VFilter = vp8_bilinear_filters[yoffset];`

			`vp8_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass,`
			`src_pixels_per_line,`
			`9, 8, HFilter);`
			`vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,`
			`8, 8, 8, VFilter);`

			`return vp8_variance8x8_armv6(second_pass, 8, dst_ptr,`
			`dst_pixels_per_line, sse);`
			`}`

Adds armv6 optimized variance calculation Adds vp8_sub_pixel_variance16x16_armv6 function to encoder. Integrates ARMv6 optimized bilinear interpolations from vp8/common/arm/armv6 and adds new assembly file for variance16x16 calculation. - vp8_filter_block2d_bil_first_pass_armv6 (integrated) - vp8_filter_block2d_bil_second_pass_armv6 (integrated) - vp8_variance16x16_armv6 (new) - bilinearfilter_arm.h (new) Change-Id: I18a8331ce7d031ceedd6cd415ecacb0c8f3392db 2011-02-09 15:34:56 +01:00			`unsigned int vp8_sub_pixel_variance16x16_armv6`
			`(`
			`const unsigned char *src_ptr,`
			`int src_pixels_per_line,`
			`int xoffset,`
			`int yoffset,`
			`const unsigned char *dst_ptr,`
			`int dst_pixels_per_line,`
			`unsigned int *sse`
			`)`
			`{`
			`unsigned short first_pass[36*16];`
			`unsigned char second_pass[20*16];`
			`const short HFilter, VFilter;`
Half pixel variance further optimized for ARMv6 Half pixel interpolations optimized in variance calculations. Separate function calls to vp8_filter_block2d_bil_x_pass_armv6 are avoided.On average, performance improvement is 6-7% for VGA@30fps sequences. Change-Id: Idb5f118a9d51548e824719d2cfe5be0fa6996628 2011-03-28 08:51:51 +02:00			`unsigned int var;`

Supporting high precision 1/8-pel motion vectors This is the initial patch for supporting 1/8th pel motion. Currently if we configure with enable-high-precision-mv, all motion vectors would default to 1/8 pel. Encode and decode syncs fine with the current code. In the next phase the code will be refactored so that we can choose the 1/8 pel mode adaptively at a frame/segment/mb level. Derf results: http://www.corp.google.com/~debargha/vp8_results/enhinterp_hpmv.html (about 0.83% better than 8-tap interpoaltion) Patch 3: Rebased. Also adding 1/16th pel interpolation for U and V Patch 4: HD results. http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd_hpmv.html Seems impressive (unless I am doing something wrong). Patch 5: Added mmx/sse for bilateral filtering, as well as enforced use of c-versions of subpel filters with 8-taps and 1/16th pel; Also redesigned the 8-tap filters to reduce the cut-off in order to introduce a denoising effect. There is a new configure option sixteenth-subpel-uv which will use 1/16 th pel interpolation for uv, if the motion vectors have 1/8 pel accuracy. With the fixes the results are promising on the derf set. The enhanced interpolation option with 8-taps alone gives 3% improvement over thei derf set: http://www.corp.google.com/~debargha/vp8_results/enhinterpn.html Results on high precision mv and on the hd set are to follow. Patch 6: Adding a missing condition for CONFIG_SIXTEENTH_SUBPEL_UV in vp8/common/x86/x86_systemdependent.c Patch 7: Cleaning up various debug messages. Patch 8: Merge conflict Change-Id: I5b1d844457aefd7414a9e4e0e06c6ed38fd8cc04 2012-02-16 18:29:54 +01:00			`if (xoffset == HALFNDX && yoffset == 0)`
Half pixel variance further optimized for ARMv6 Half pixel interpolations optimized in variance calculations. Separate function calls to vp8_filter_block2d_bil_x_pass_armv6 are avoided.On average, performance improvement is 6-7% for VGA@30fps sequences. Change-Id: Idb5f118a9d51548e824719d2cfe5be0fa6996628 2011-03-28 08:51:51 +02:00			`{`
			`var = vp8_variance_halfpixvar16x16_h_armv6(src_ptr, src_pixels_per_line,`
			`dst_ptr, dst_pixels_per_line, sse);`
			`}`
Supporting high precision 1/8-pel motion vectors This is the initial patch for supporting 1/8th pel motion. Currently if we configure with enable-high-precision-mv, all motion vectors would default to 1/8 pel. Encode and decode syncs fine with the current code. In the next phase the code will be refactored so that we can choose the 1/8 pel mode adaptively at a frame/segment/mb level. Derf results: http://www.corp.google.com/~debargha/vp8_results/enhinterp_hpmv.html (about 0.83% better than 8-tap interpoaltion) Patch 3: Rebased. Also adding 1/16th pel interpolation for U and V Patch 4: HD results. http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd_hpmv.html Seems impressive (unless I am doing something wrong). Patch 5: Added mmx/sse for bilateral filtering, as well as enforced use of c-versions of subpel filters with 8-taps and 1/16th pel; Also redesigned the 8-tap filters to reduce the cut-off in order to introduce a denoising effect. There is a new configure option sixteenth-subpel-uv which will use 1/16 th pel interpolation for uv, if the motion vectors have 1/8 pel accuracy. With the fixes the results are promising on the derf set. The enhanced interpolation option with 8-taps alone gives 3% improvement over thei derf set: http://www.corp.google.com/~debargha/vp8_results/enhinterpn.html Results on high precision mv and on the hd set are to follow. Patch 6: Adding a missing condition for CONFIG_SIXTEENTH_SUBPEL_UV in vp8/common/x86/x86_systemdependent.c Patch 7: Cleaning up various debug messages. Patch 8: Merge conflict Change-Id: I5b1d844457aefd7414a9e4e0e06c6ed38fd8cc04 2012-02-16 18:29:54 +01:00			`else if (xoffset == 0 && yoffset == HALFNDX)`
Half pixel variance further optimized for ARMv6 Half pixel interpolations optimized in variance calculations. Separate function calls to vp8_filter_block2d_bil_x_pass_armv6 are avoided.On average, performance improvement is 6-7% for VGA@30fps sequences. Change-Id: Idb5f118a9d51548e824719d2cfe5be0fa6996628 2011-03-28 08:51:51 +02:00			`{`
			`var = vp8_variance_halfpixvar16x16_v_armv6(src_ptr, src_pixels_per_line,`
			`dst_ptr, dst_pixels_per_line, sse);`
			`}`
Supporting high precision 1/8-pel motion vectors This is the initial patch for supporting 1/8th pel motion. Currently if we configure with enable-high-precision-mv, all motion vectors would default to 1/8 pel. Encode and decode syncs fine with the current code. In the next phase the code will be refactored so that we can choose the 1/8 pel mode adaptively at a frame/segment/mb level. Derf results: http://www.corp.google.com/~debargha/vp8_results/enhinterp_hpmv.html (about 0.83% better than 8-tap interpoaltion) Patch 3: Rebased. Also adding 1/16th pel interpolation for U and V Patch 4: HD results. http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd_hpmv.html Seems impressive (unless I am doing something wrong). Patch 5: Added mmx/sse for bilateral filtering, as well as enforced use of c-versions of subpel filters with 8-taps and 1/16th pel; Also redesigned the 8-tap filters to reduce the cut-off in order to introduce a denoising effect. There is a new configure option sixteenth-subpel-uv which will use 1/16 th pel interpolation for uv, if the motion vectors have 1/8 pel accuracy. With the fixes the results are promising on the derf set. The enhanced interpolation option with 8-taps alone gives 3% improvement over thei derf set: http://www.corp.google.com/~debargha/vp8_results/enhinterpn.html Results on high precision mv and on the hd set are to follow. Patch 6: Adding a missing condition for CONFIG_SIXTEENTH_SUBPEL_UV in vp8/common/x86/x86_systemdependent.c Patch 7: Cleaning up various debug messages. Patch 8: Merge conflict Change-Id: I5b1d844457aefd7414a9e4e0e06c6ed38fd8cc04 2012-02-16 18:29:54 +01:00			`else if (xoffset == HALFNDX && yoffset == HALFNDX)`
Half pixel variance further optimized for ARMv6 Half pixel interpolations optimized in variance calculations. Separate function calls to vp8_filter_block2d_bil_x_pass_armv6 are avoided.On average, performance improvement is 6-7% for VGA@30fps sequences. Change-Id: Idb5f118a9d51548e824719d2cfe5be0fa6996628 2011-03-28 08:51:51 +02:00			`{`
			`var = vp8_variance_halfpixvar16x16_hv_armv6(src_ptr, src_pixels_per_line,`
			`dst_ptr, dst_pixels_per_line, sse);`
			`}`
			`else`
			`{`
			`HFilter = vp8_bilinear_filters[xoffset];`
			`VFilter = vp8_bilinear_filters[yoffset];`

			`vp8_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass,`
			`src_pixels_per_line,`
			`17, 16, HFilter);`
			`vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,`
			`16, 16, 16, VFilter);`

			`var = vp8_variance16x16_armv6(second_pass, 16, dst_ptr,`
			`dst_pixels_per_line, sse);`
			`}`
			`return var;`
ARMv6 optimized half pixel variance calculations Adds following ARMv6 optimized functions to the encoder: - vp8_variance_halfpixvar16x16_h_armv6 - vp8_variance_halfpixvar16x16_v_armv6 - vp8_variance_halfpixvar16x16_hv_armv6 Change-Id: I1e9c2af7acd2a51b72b3845beecd990db4bebd29 2011-02-23 12:27:27 +01:00			`}`

			`#endif /* HAVE_ARMV6 */`

move new neon subpixel function previously wasn't guarded with ifdef ARMV7, causing a link error with ARMV6 Change-Id: I0526858be0b5f49b2bf11e9090180b2a6c48926d 2011-01-25 21:11:39 +01:00
			`#if HAVE_ARMV7`

			`unsigned int vp8_sub_pixel_variance16x16_neon`
			`(`
			`const unsigned char *src_ptr,`
			`int src_pixels_per_line,`
			`int xoffset,`
			`int yoffset,`
			`const unsigned char *dst_ptr,`
			`int dst_pixels_per_line,`
			`unsigned int *sse`
			`)`
			`{`
Supporting high precision 1/8-pel motion vectors This is the initial patch for supporting 1/8th pel motion. Currently if we configure with enable-high-precision-mv, all motion vectors would default to 1/8 pel. Encode and decode syncs fine with the current code. In the next phase the code will be refactored so that we can choose the 1/8 pel mode adaptively at a frame/segment/mb level. Derf results: http://www.corp.google.com/~debargha/vp8_results/enhinterp_hpmv.html (about 0.83% better than 8-tap interpoaltion) Patch 3: Rebased. Also adding 1/16th pel interpolation for U and V Patch 4: HD results. http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd_hpmv.html Seems impressive (unless I am doing something wrong). Patch 5: Added mmx/sse for bilateral filtering, as well as enforced use of c-versions of subpel filters with 8-taps and 1/16th pel; Also redesigned the 8-tap filters to reduce the cut-off in order to introduce a denoising effect. There is a new configure option sixteenth-subpel-uv which will use 1/16 th pel interpolation for uv, if the motion vectors have 1/8 pel accuracy. With the fixes the results are promising on the derf set. The enhanced interpolation option with 8-taps alone gives 3% improvement over thei derf set: http://www.corp.google.com/~debargha/vp8_results/enhinterpn.html Results on high precision mv and on the hd set are to follow. Patch 6: Adding a missing condition for CONFIG_SIXTEENTH_SUBPEL_UV in vp8/common/x86/x86_systemdependent.c Patch 7: Cleaning up various debug messages. Patch 8: Merge conflict Change-Id: I5b1d844457aefd7414a9e4e0e06c6ed38fd8cc04 2012-02-16 18:29:54 +01:00			`if (xoffset == HALFNDX && yoffset == 0)`
move new neon subpixel function previously wasn't guarded with ifdef ARMV7, causing a link error with ARMV6 Change-Id: I0526858be0b5f49b2bf11e9090180b2a6c48926d 2011-01-25 21:11:39 +01:00			`return vp8_variance_halfpixvar16x16_h_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);`
Supporting high precision 1/8-pel motion vectors This is the initial patch for supporting 1/8th pel motion. Currently if we configure with enable-high-precision-mv, all motion vectors would default to 1/8 pel. Encode and decode syncs fine with the current code. In the next phase the code will be refactored so that we can choose the 1/8 pel mode adaptively at a frame/segment/mb level. Derf results: http://www.corp.google.com/~debargha/vp8_results/enhinterp_hpmv.html (about 0.83% better than 8-tap interpoaltion) Patch 3: Rebased. Also adding 1/16th pel interpolation for U and V Patch 4: HD results. http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd_hpmv.html Seems impressive (unless I am doing something wrong). Patch 5: Added mmx/sse for bilateral filtering, as well as enforced use of c-versions of subpel filters with 8-taps and 1/16th pel; Also redesigned the 8-tap filters to reduce the cut-off in order to introduce a denoising effect. There is a new configure option sixteenth-subpel-uv which will use 1/16 th pel interpolation for uv, if the motion vectors have 1/8 pel accuracy. With the fixes the results are promising on the derf set. The enhanced interpolation option with 8-taps alone gives 3% improvement over thei derf set: http://www.corp.google.com/~debargha/vp8_results/enhinterpn.html Results on high precision mv and on the hd set are to follow. Patch 6: Adding a missing condition for CONFIG_SIXTEENTH_SUBPEL_UV in vp8/common/x86/x86_systemdependent.c Patch 7: Cleaning up various debug messages. Patch 8: Merge conflict Change-Id: I5b1d844457aefd7414a9e4e0e06c6ed38fd8cc04 2012-02-16 18:29:54 +01:00			`else if (xoffset == 0 && yoffset == HALFNDX)`
move new neon subpixel function previously wasn't guarded with ifdef ARMV7, causing a link error with ARMV6 Change-Id: I0526858be0b5f49b2bf11e9090180b2a6c48926d 2011-01-25 21:11:39 +01:00			`return vp8_variance_halfpixvar16x16_v_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);`
Supporting high precision 1/8-pel motion vectors This is the initial patch for supporting 1/8th pel motion. Currently if we configure with enable-high-precision-mv, all motion vectors would default to 1/8 pel. Encode and decode syncs fine with the current code. In the next phase the code will be refactored so that we can choose the 1/8 pel mode adaptively at a frame/segment/mb level. Derf results: http://www.corp.google.com/~debargha/vp8_results/enhinterp_hpmv.html (about 0.83% better than 8-tap interpoaltion) Patch 3: Rebased. Also adding 1/16th pel interpolation for U and V Patch 4: HD results. http://www.corp.google.com/~debargha/vp8_results/enhinterp_hd_hpmv.html Seems impressive (unless I am doing something wrong). Patch 5: Added mmx/sse for bilateral filtering, as well as enforced use of c-versions of subpel filters with 8-taps and 1/16th pel; Also redesigned the 8-tap filters to reduce the cut-off in order to introduce a denoising effect. There is a new configure option sixteenth-subpel-uv which will use 1/16 th pel interpolation for uv, if the motion vectors have 1/8 pel accuracy. With the fixes the results are promising on the derf set. The enhanced interpolation option with 8-taps alone gives 3% improvement over thei derf set: http://www.corp.google.com/~debargha/vp8_results/enhinterpn.html Results on high precision mv and on the hd set are to follow. Patch 6: Adding a missing condition for CONFIG_SIXTEENTH_SUBPEL_UV in vp8/common/x86/x86_systemdependent.c Patch 7: Cleaning up various debug messages. Patch 8: Merge conflict Change-Id: I5b1d844457aefd7414a9e4e0e06c6ed38fd8cc04 2012-02-16 18:29:54 +01:00			`else if (xoffset == HALFNDX && yoffset == HALFNDX)`
move new neon subpixel function previously wasn't guarded with ifdef ARMV7, causing a link error with ARMV6 Change-Id: I0526858be0b5f49b2bf11e9090180b2a6c48926d 2011-01-25 21:11:39 +01:00			`return vp8_variance_halfpixvar16x16_hv_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);`
			`else`
			`return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);`
			`}`

			`#endif`