RTCD: add FDCT functions

This commit continues the process of converting to the new RTCD system. Change-Id: I3f9c07db65eb206f6363d21bdb80e871570da767
2012-01-12 16:55:44 -08:00 · 2012-01-12 16:55:44 -08:00 · 510e0ab467
commit 510e0ab467
parent 83a91e789c
17 changed files with 35 additions and 265 deletions
--- a/vp8/common/rtcd_defs.sh
+++ b/vp8/common/rtcd_defs.sh
@ -392,5 +392,20 @@ if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
    specialize vp8_ssim_parms_16x16 $sse2_on_x86_64
 fi

+#
+# Forward DCT
+#
+prototype void vp8_short_fdct4x4 "short *input, short *output, int pitch"
+specialize vp8_short_fdct4x4 mmx sse2 media neon
+vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6
+
+prototype void vp8_short_fdct8x4 "short *input, short *output, int pitch"
+specialize vp8_short_fdct8x4 mmx sse2 media neon
+vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6
+
+prototype void vp8_short_walsh4x4 "short *input, short *output, int pitch"
+specialize vp8_short_walsh4x4 mmx sse2 media neon
+vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6
+
 # End of encoder only functions
 fi
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@ -32,12 +32,6 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
 #if HAVE_MEDIA
    if (flags & HAS_MEDIA)
    {
-        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_armv6;
-        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_armv6;
-        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_armv6;
-        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_armv6;
-        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_armv6;
-
        /*cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;*/
@ -53,12 +47,6 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
 #if HAVE_NEON
    if (flags & HAS_NEON)
    {
-        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_neon;
-        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_neon;
-        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_neon;
-        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_neon;
-        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_neon;
-
        /*cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;*/
--- a/vp8/encoder/arm/dct_arm.c
+++ b/vp8/encoder/arm/dct_arm.c
@ -9,7 +9,7 @@
 */

 #include "vpx_config.h"
-#include "vp8/encoder/dct.h"
+#include "vpx_rtcd.h"

 #if HAVE_MEDIA

--- a/vp8/encoder/arm/dct_arm.h
+++ b/vp8/encoder/arm/dct_arm.h
@ -1,65 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef DCT_ARM_H
-#define DCT_ARM_H
-
-#if HAVE_MEDIA
-extern prototype_fdct(vp8_short_walsh4x4_armv6);
-extern prototype_fdct(vp8_short_fdct4x4_armv6);
-extern prototype_fdct(vp8_short_fdct8x4_armv6);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef  vp8_fdct_walsh_short4x4
-#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_armv6
-
-#undef  vp8_fdct_short4x4
-#define vp8_fdct_short4x4 vp8_short_fdct4x4_armv6
-
-#undef  vp8_fdct_short8x4
-#define vp8_fdct_short8x4 vp8_short_fdct8x4_armv6
-
-#undef  vp8_fdct_fast4x4
-#define vp8_fdct_fast4x4 vp8_short_fdct4x4_armv6
-
-#undef  vp8_fdct_fast8x4
-#define vp8_fdct_fast8x4 vp8_short_fdct8x4_armv6
-#endif
-
-#endif /* HAVE_MEDIA */
-
-#if HAVE_NEON
-extern prototype_fdct(vp8_short_fdct4x4_neon);
-extern prototype_fdct(vp8_short_fdct8x4_neon);
-extern prototype_fdct(vp8_fast_fdct4x4_neon);
-extern prototype_fdct(vp8_fast_fdct8x4_neon);
-extern prototype_fdct(vp8_short_walsh4x4_neon);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef  vp8_fdct_short4x4
-#define vp8_fdct_short4x4 vp8_short_fdct4x4_neon
-
-#undef  vp8_fdct_short8x4
-#define vp8_fdct_short8x4 vp8_short_fdct8x4_neon
-
-#undef  vp8_fdct_fast4x4
-#define vp8_fdct_fast4x4 vp8_short_fdct4x4_neon
-
-#undef  vp8_fdct_fast8x4
-#define vp8_fdct_fast8x4 vp8_short_fdct8x4_neon
-
-#undef  vp8_fdct_walsh_short4x4
-#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_neon
-#endif
-
-#endif /* HAVE_NEON */
-
-#endif
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@ -117,8 +117,8 @@ typedef struct
    int optimize;
    int q_index;

-    void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
-    void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
+    void (*short_fdct4x4)(short *input, short *output, int pitch);
+    void (*short_fdct8x4)(short *input, short *output, int pitch);
    void (*short_walsh4x4)(short *input, short *output, int pitch);
    void (*quantize_b)(BLOCK *b, BLOCKD *d);
    void (*quantize_b_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@ -1,65 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef __INC_DCT_H
-#define __INC_DCT_H
-
-#define prototype_fdct(sym) void (sym)(short *input, short *output, int pitch)
-
-#if ARCH_X86 || ARCH_X86_64
-#include "x86/dct_x86.h"
-#endif
-
-#if ARCH_ARM
-#include "arm/dct_arm.h"
-#endif
-
-#ifndef vp8_fdct_short4x4
-#define vp8_fdct_short4x4  vp8_short_fdct4x4_c
-#endif
-extern prototype_fdct(vp8_fdct_short4x4);
-
-#ifndef vp8_fdct_short8x4
-#define vp8_fdct_short8x4  vp8_short_fdct8x4_c
-#endif
-extern prototype_fdct(vp8_fdct_short8x4);
-
-// There is no fast4x4 (for now)
-#ifndef vp8_fdct_fast4x4
-#define vp8_fdct_fast4x4  vp8_short_fdct4x4_c
-#endif
-
-#ifndef vp8_fdct_fast8x4
-#define vp8_fdct_fast8x4  vp8_short_fdct8x4_c
-#endif
-
-#ifndef vp8_fdct_walsh_short4x4
-#define vp8_fdct_walsh_short4x4  vp8_short_walsh4x4_c
-#endif
-extern prototype_fdct(vp8_fdct_walsh_short4x4);
-
-typedef prototype_fdct(*vp8_fdct_fn_t);
-typedef struct
-{
-    vp8_fdct_fn_t    short4x4;
-    vp8_fdct_fn_t    short8x4;
-    vp8_fdct_fn_t    fast4x4;
-    vp8_fdct_fn_t    fast8x4;
-    vp8_fdct_fn_t    walsh_short4x4;
-} vp8_fdct_rtcd_vtable_t;
-
-#if CONFIG_RUNTIME_CPU_DETECT
-#define FDCT_INVOKE(ctx,fn) (ctx)->fn
-#else
-#define FDCT_INVOKE(ctx,fn) vp8_fdct_##fn
-#endif
-
-#endif
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@ -15,7 +15,6 @@
 #include "vp8/common/reconintra4x4.h"
 #include "encodemb.h"
 #include "vp8/common/invtrans.h"
-#include "dct.h"
 #include "encodeintra.h"


@ -70,7 +69,7 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,

    ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);

-    x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
+    x->short_fdct4x4(be->src_diff, be->coeff, 32);

    x->quantize_b(be, b);

--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@ -16,7 +16,6 @@
 #include "quantize.h"
 #include "tokenize.h"
 #include "vp8/common/invtrans.h"
-#include "dct.h"
 #include "vpx_mem/vpx_mem.h"
 #include "rdopt.h"

@ -127,7 +126,7 @@ void vp8_transform_mbuv(MACROBLOCK *x)

    for (i = 16; i < 24; i += 2)
    {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+        x->short_fdct8x4(&x->block[i].src_diff[0],
            &x->block[i].coeff[0], 16);
    }
 }
@ -139,7 +138,7 @@ void vp8_transform_intra_mby(MACROBLOCK *x)

    for (i = 0; i < 16; i += 2)
    {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+        x->short_fdct8x4(&x->block[i].src_diff[0],
            &x->block[i].coeff[0], 32);
    }

@ -159,7 +158,7 @@ static void transform_mb(MACROBLOCK *x)

    for (i = 0; i < 16; i += 2)
    {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+        x->short_fdct8x4(&x->block[i].src_diff[0],
            &x->block[i].coeff[0], 32);
    }

@ -169,7 +168,7 @@ static void transform_mb(MACROBLOCK *x)

    for (i = 16; i < 24; i += 2)
    {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+        x->short_fdct8x4(&x->block[i].src_diff[0],
            &x->block[i].coeff[0], 16);
    }

@ -187,7 +186,7 @@ static void transform_mby(MACROBLOCK *x)

    for (i = 0; i < 16; i += 2)
    {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+        x->short_fdct8x4(&x->block[i].src_diff[0],
            &x->block[i].coeff[0], 32);
    }

--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@ -304,8 +304,8 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
    z->mv_row_max    = x->mv_row_max;
    */

-    z->vp8_short_fdct4x4     = x->vp8_short_fdct4x4;
-    z->vp8_short_fdct8x4     = x->vp8_short_fdct8x4;
+    z->short_fdct4x4     = x->short_fdct4x4;
+    z->short_fdct8x4     = x->short_fdct8x4;
    z->short_walsh4x4    = x->short_walsh4x4;
    z->quantize_b        = x->quantize_b;
    z->quantize_b_pair   = x->quantize_b_pair;
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@ -25,12 +25,6 @@ extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
 void vp8_cmachine_specific_config(VP8_COMP *cpi)
 {
 #if CONFIG_RUNTIME_CPU_DETECT
-    cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
-    cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
-    cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_c;
-    cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_c;
-    cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;
-
    cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
    cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
    cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@ -141,9 +141,6 @@ extern int inter_uv_modes[4] ;
 extern unsigned int inter_b_modes[15];
 #endif

-extern void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
-extern void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
-
 extern const int vp8_bits_per_mb[2][QINDEX_RANGE];

 extern const int qrounding_factors[129];
@ -979,16 +976,17 @@ void vp8_set_speed_features(VP8_COMP *cpi)

    if (cpi->sf.improved_dct)
    {
-        cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4);
-        cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
+        cpi->mb.short_fdct8x4 = vp8_short_fdct8x4;
+        cpi->mb.short_fdct4x4 = vp8_short_fdct4x4;
    }
    else
    {
-        cpi->mb.vp8_short_fdct8x4   = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4);
-        cpi->mb.vp8_short_fdct4x4   = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4);
+        /* No fast FDCT defined for any platform at this time. */
+        cpi->mb.short_fdct8x4 = vp8_short_fdct8x4;
+        cpi->mb.short_fdct4x4 = vp8_short_fdct4x4;
    }

-    cpi->mb.short_walsh4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, walsh_short4x4);
+    cpi->mb.short_walsh4x4 = vp8_short_walsh4x4;

    if (cpi->sf.improved_quant)
    {
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@ -19,7 +19,6 @@
 #include "tokenize.h"
 #include "vp8/common/onyxc_int.h"
 #include "variance.h"
-#include "dct.h"
 #include "encodemb.h"
 #include "quantize.h"
 #include "vp8/common/entropy.h"
@ -224,7 +223,6 @@ typedef struct

 typedef struct VP8_ENCODER_RTCD
 {
-    vp8_fdct_rtcd_vtable_t      fdct;
    vp8_encodemb_rtcd_vtable_t  encodemb;
    vp8_quantize_rtcd_vtable_t  quantize;
    vp8_search_rtcd_vtable_t    search;
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@ -32,7 +32,6 @@
 #include "mcomp.h"
 #include "rdopt.h"
 #include "vpx_mem/vpx_mem.h"
-#include "dct.h"
 #include "vp8/common/systemdependent.h"

 #if CONFIG_RUNTIME_CPU_DETECT
@ -580,7 +579,7 @@ static void macro_block_yrd( MACROBLOCK *mb,
    // Fdct and building the 2nd order block
    for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
    {
-        mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
+        mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
        *Y2DCPtr++ = beptr->coeff[0];
        *Y2DCPtr++ = beptr->coeff[16];
    }
@ -656,7 +655,7 @@ static int rd_pick_intra4x4block(
                     (*(b->base_dst) + b->dst, b->dst_stride,
                      mode, b->predictor, 16);
        ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
-        x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
+        x->short_fdct4x4(be->src_diff, be->coeff, 32);
        x->quantize_b(be, b);

        tempa = ta;
@ -1028,7 +1027,7 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels

            vp8_build_inter_predictors_b(bd, 16, x->e_mbd.subpixel_predict);
            ENCODEMB_INVOKE(rtcd, subb)(be, bd, 16);
-            x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
+            x->short_fdct4x4(be->src_diff, be->coeff, 32);

            // set to 0 no way to account for 2nd order DC so discount
            //be->coeff[0] = 0;
--- a/vp8/encoder/x86/dct_x86.h
+++ b/vp8/encoder/x86/dct_x86.h
@ -1,73 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef DCT_X86_H
-#define DCT_X86_H
-
-
-/* Note:
- *
- * This platform is commonly built for runtime CPU detection. If you modify
- * any of the function mappings present in this file, be sure to also update
- * them in the function pointer initialization code
- */
-#if HAVE_MMX
-extern prototype_fdct(vp8_short_fdct4x4_mmx);
-extern prototype_fdct(vp8_short_fdct8x4_mmx);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef  vp8_fdct_short4x4
-#define vp8_fdct_short4x4 vp8_short_fdct4x4_mmx
-
-#undef  vp8_fdct_short8x4
-#define vp8_fdct_short8x4 vp8_short_fdct8x4_mmx
-
-#undef  vp8_fdct_fast4x4
-#define vp8_fdct_fast4x4 vp8_short_fdct4x4_mmx
-
-#undef  vp8_fdct_fast8x4
-#define vp8_fdct_fast8x4 vp8_short_fdct8x4_mmx
-
-#endif
-
-#endif
-
-
-#if HAVE_SSE2
-extern prototype_fdct(vp8_short_fdct8x4_sse2);
-extern prototype_fdct(vp8_short_walsh4x4_sse2);
-
-extern prototype_fdct(vp8_short_fdct4x4_sse2);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef  vp8_fdct_short4x4
-#define vp8_fdct_short4x4 vp8_short_fdct4x4_sse2
-
-#undef  vp8_fdct_short8x4
-#define vp8_fdct_short8x4 vp8_short_fdct8x4_sse2
-
-#undef  vp8_fdct_fast4x4
-#define vp8_fdct_fast4x4 vp8_short_fdct4x4_sse2
-
-#undef  vp8_fdct_fast8x4
-#define vp8_fdct_fast8x4 vp8_short_fdct8x4_sse2
-
-#undef  vp8_fdct_walsh_short4x4
-#define vp8_fdct_walsh_short4x4  vp8_short_walsh4x4_sse2
-
-#endif
-
-
-#endif
-
-#endif
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@ -127,13 +127,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
 #if HAVE_MMX
    if (flags & HAS_MMX)
    {
-        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_mmx;
-        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_mmx;
-        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_mmx;
-        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_mmx;
-
-        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;
-
        cpi->rtcd.encodemb.berr                  = vp8_block_error_mmx;
        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_mmx;
        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_mmx;
@ -148,13 +141,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
 #if HAVE_SSE2
    if (flags & HAS_SSE2)
    {
-        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_sse2;
-        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_sse2;
-        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_sse2;
-        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_sse2;
-
-        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_sse2 ;
-
        cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm;
        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_xmm;
        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm;
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@ -48,7 +48,6 @@ VP8_CX_SRCS-yes += encoder/generic/csystemdependent.c
 VP8_CX_SRCS-yes += encoder/block.h
 VP8_CX_SRCS-yes += encoder/boolhuff.h
 VP8_CX_SRCS-yes += encoder/bitstream.h
-VP8_CX_SRCS-yes += encoder/dct.h
 VP8_CX_SRCS-yes += encoder/encodeintra.h
 VP8_CX_SRCS-yes += encoder/encodemb.h
 VP8_CX_SRCS-yes += encoder/encodemv.h
@ -95,7 +94,6 @@ VP8_CX_SRCS_REMOVE-yes += encoder/temporal_filter.c
 endif

 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodemb_x86.h
-VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/dct_x86.h
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/temporal_filter_x86.h
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@ -16,7 +16,6 @@
 VP8_CX_SRCS-$(ARCH_ARM)  += encoder/arm/arm_csystemdependent.c

 VP8_CX_SRCS-$(ARCH_ARM)  += encoder/arm/dct_arm.c
-VP8_CX_SRCS-$(ARCH_ARM)  += encoder/arm/dct_arm.h
 VP8_CX_SRCS-$(ARCH_ARM)  += encoder/arm/encodemb_arm.h
 VP8_CX_SRCS-$(ARCH_ARM)  += encoder/arm/quantize_arm.c
 VP8_CX_SRCS-$(ARCH_ARM)  += encoder/arm/quantize_arm.h