diff --git a/test/consistency_test.cc b/test/consistency_test.cc
index 6f5f452f6..9c2fd5508 100644
--- a/test/consistency_test.cc
+++ b/test/consistency_test.cc
@@ -23,11 +23,11 @@
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-#include "vp9/encoder/vp9_ssim.h"
+#include "vpx_dsp/ssim.h"
 #include "vpx_mem/vpx_mem.h"
 
 extern "C"
-double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch,
+double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch,
                             uint8_t *img2, int img2_pitch,
                             int width, int height,
                             Ssimv *sv2, Metrics *m,
@@ -144,7 +144,7 @@ class ConsistencyVP9Test
   double CheckConsistency(int frame) {
     EXPECT_LT(frame, 2)<< "Frame to check has to be less than 2.";
     return
-        vp9_get_ssim_metrics(source_data_[frame], source_stride_,
+        vpx_get_ssim_metrics(source_data_[frame], source_stride_,
                              reference_data_[frame], reference_stride_,
                              width_, height_, ssim_array_, &metrics_, 1);
   }
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index f029bbe3c..737fc56dc 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -261,17 +261,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/;
 }
 
-#
-# Structured Similarity (SSIM)
-#
-if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
-    add_proto qw/void vp9_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
-    specialize qw/vp9_ssim_parms_8x8/, "$sse2_x86_64";
-
-    add_proto qw/void vp9_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
-    specialize qw/vp9_ssim_parms_16x16/, "$sse2_x86_64";
-}
-
 # fdct functions
 
 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
@@ -330,14 +319,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   add_proto qw/void vp9_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
   specialize qw/vp9_highbd_quantize_fp_32x32/;
 
-  #
-  # Structured Similarity (SSIM)
-  #
-  if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
-    add_proto qw/void vp9_highbd_ssim_parms_8x8/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
-    specialize qw/vp9_highbd_ssim_parms_8x8/;
-  }
-
   # fdct functions
   add_proto qw/void vp9_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
   specialize qw/vp9_highbd_fht4x4/;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 718480131..b4e07a682 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -18,6 +18,9 @@
 #include "./vpx_scale_rtcd.h"
 #include "vpx/internal/vpx_psnr.h"
 #include "vpx_dsp/vpx_filter.h"
+#if CONFIG_INTERNAL_STATS
+#include "vpx_dsp/ssim.h"
+#endif
 #include "vpx_ports/mem.h"
 #include "vpx_ports/vpx_timer.h"
 #include "vpx_scale/vpx_scale.h"
@@ -51,9 +54,6 @@
 #include "vp9/encoder/vp9_segmentation.h"
 #include "vp9/encoder/vp9_skin_detection.h"
 #include "vp9/encoder/vp9_speed_features.h"
-#if CONFIG_INTERNAL_STATS
-#include "vp9/encoder/vp9_ssim.h"
-#endif
 #include "vp9/encoder/vp9_svc_layercontext.h"
 #include "vp9/encoder/vp9_temporal_filter.h"
 
@@ -4416,13 +4416,13 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 
 #if CONFIG_VP9_HIGHBITDEPTH
           if (cm->use_highbitdepth) {
-            frame_ssim2 = vp9_highbd_calc_ssim(orig, recon, &weight,
+            frame_ssim2 = vpx_highbd_calc_ssim(orig, recon, &weight,
                                                (int)cm->bit_depth);
           } else {
-            frame_ssim2 = vp9_calc_ssim(orig, recon, &weight);
+            frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
           }
 #else
-          frame_ssim2 = vp9_calc_ssim(orig, recon, &weight);
+          frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
           cpi->worst_ssim= MIN(cpi->worst_ssim, frame_ssim2);
@@ -4431,13 +4431,13 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 
 #if CONFIG_VP9_HIGHBITDEPTH
           if (cm->use_highbitdepth) {
-            frame_ssim2 = vp9_highbd_calc_ssim(
+            frame_ssim2 = vpx_highbd_calc_ssim(
                 orig, &cm->post_proc_buffer, &weight, (int)cm->bit_depth);
           } else {
-            frame_ssim2 = vp9_calc_ssim(orig, &cm->post_proc_buffer, &weight);
+            frame_ssim2 = vpx_calc_ssim(orig, &cm->post_proc_buffer, &weight);
           }
 #else
-          frame_ssim2 = vp9_calc_ssim(orig, &cm->post_proc_buffer, &weight);
+          frame_ssim2 = vpx_calc_ssim(orig, &cm->post_proc_buffer, &weight);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
           cpi->summedp_quality += frame_ssim2 * weight;
@@ -4472,7 +4472,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
         if (!cm->use_highbitdepth)
 #endif
         {
-          double this_inconsistency = vp9_get_ssim_metrics(
+          double this_inconsistency = vpx_get_ssim_metrics(
               cpi->Source->y_buffer, cpi->Source->y_stride,
               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
               cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
@@ -4492,14 +4492,14 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
         double y, u, v, frame_all;
 #if CONFIG_VP9_HIGHBITDEPTH
         if (cm->use_highbitdepth) {
-          frame_all = vp9_highbd_calc_ssimg(cpi->Source, cm->frame_to_show, &y,
+          frame_all = vpx_highbd_calc_ssimg(cpi->Source, cm->frame_to_show, &y,
                                             &u, &v, (int)cm->bit_depth);
         } else {
-          frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u,
+          frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u,
                                      &v);
         }
 #else
-        frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v);
+        frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
         adjust_image_stat(y, u, v, frame_all, &cpi->ssimg);
       }
@@ -4508,7 +4508,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 #endif
       {
         double y, u, v, frame_all;
-        frame_all = vp9_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
+        frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
                                       &v);
         adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
         /* TODO(JBB): add 10/12 bit support */
@@ -4518,7 +4518,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 #endif
       {
         double y, u, v, frame_all;
-        frame_all = vp9_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v);
+        frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v);
         adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
       }
     }
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 4d4da9283..78d55e1ea 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -16,6 +16,10 @@
 #include "./vpx_config.h"
 #include "vpx/internal/vpx_codec_internal.h"
 #include "vpx/vp8cx.h"
+#if CONFIG_INTERNAL_STATS
+#include "vpx_dsp/ssim.h"
+#endif
+#include "vpx_dsp/variance.h"
 #include "vpx_util/vpx_thread.h"
 
 #include "vp9/common/vp9_alloccommon.h"
@@ -34,13 +38,9 @@
 #include "vp9/encoder/vp9_quantize.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rd.h"
-#if CONFIG_INTERNAL_STATS
-#include "vp9/encoder/vp9_ssim.h"
-#endif
 #include "vp9/encoder/vp9_speed_features.h"
 #include "vp9/encoder/vp9_svc_layercontext.h"
 #include "vp9/encoder/vp9_tokenize.h"
-#include "vpx_dsp/variance.h"
 
 #if CONFIG_VP9_TEMPORAL_DENOISING
 #include "vp9/encoder/vp9_denoiser.h"
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 186ce11f0..84b12d78e 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -33,7 +33,6 @@ VP9_CX_SRCS-yes += encoder/vp9_encodemv.c
 VP9_CX_SRCS-yes += encoder/vp9_ethread.h
 VP9_CX_SRCS-yes += encoder/vp9_ethread.c
 VP9_CX_SRCS-yes += encoder/vp9_extend.c
-VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_fastssim.c
 VP9_CX_SRCS-yes += encoder/vp9_firstpass.c
 VP9_CX_SRCS-yes += encoder/vp9_block.h
 VP9_CX_SRCS-yes += encoder/vp9_bitstream.h
@@ -57,7 +56,6 @@ VP9_CX_SRCS-yes += encoder/vp9_mcomp.c
 VP9_CX_SRCS-yes += encoder/vp9_encoder.c
 VP9_CX_SRCS-yes += encoder/vp9_picklpf.c
 VP9_CX_SRCS-yes += encoder/vp9_picklpf.h
-VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_psnrhvs.c
 VP9_CX_SRCS-yes += encoder/vp9_quantize.c
 VP9_CX_SRCS-yes += encoder/vp9_ratectrl.c
 VP9_CX_SRCS-yes += encoder/vp9_rd.c
@@ -72,8 +70,6 @@ VP9_CX_SRCS-yes += encoder/vp9_subexp.h
 VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.c
 VP9_CX_SRCS-yes += encoder/vp9_resize.c
 VP9_CX_SRCS-yes += encoder/vp9_resize.h
-VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
-VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.h
 VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_blockiness.c
 
 VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
@@ -113,7 +109,6 @@ VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm
 VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3_x86_64.asm
 endif
 endif
-VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt_x86_64.asm
 
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
 VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.c
diff --git a/vp9/encoder/vp9_fastssim.c b/vpx_dsp/fastssim.c
similarity index 98%
rename from vp9/encoder/vp9_fastssim.c
rename to vpx_dsp/fastssim.c
index f1d408cbe..7024cbf45 100644
--- a/vp9/encoder/vp9_fastssim.c
+++ b/vpx_dsp/fastssim.c
@@ -13,8 +13,8 @@
 #include <math.h>
 #include <string.h>
 #include "./vpx_config.h"
-#include "./vp9_rtcd.h"
-#include "vp9/encoder/vp9_ssim.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/ssim.h"
 /* TODO(jbb): High bit depth version of this code needed */
 typedef struct fs_level fs_level;
 typedef struct fs_ctx fs_ctx;
@@ -443,10 +443,10 @@ static double convert_ssim_db(double _ssim, double _weight) {
   return 10 * (log10(_weight) - log10(_weight - _ssim));
 }
 
-double vp9_calc_fastssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_fastssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
                          double *ssim_y, double *ssim_u, double *ssim_v) {
   double ssimv;
-  vp9_clear_system_state();
+  vpx_clear_system_state();
 
   *ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer,
                       dest->y_stride, source->y_crop_width,
diff --git a/vp9/encoder/vp9_psnrhvs.c b/vpx_dsp/psnrhvs.c
similarity index 98%
rename from vp9/encoder/vp9_psnrhvs.c
rename to vpx_dsp/psnrhvs.c
index 5104b9af6..8aa30f2fc 100644
--- a/vp9/encoder/vp9_psnrhvs.c
+++ b/vpx_dsp/psnrhvs.c
@@ -15,9 +15,8 @@
 #include <math.h>
 
 #include "./vpx_config.h"
-#include "./vp9_rtcd.h"
 #include "./vpx_dsp_rtcd.h"
-#include "vp9/encoder/vp9_ssim.h"
+#include "vpx_dsp/ssim.h"
 
 #if !defined(M_PI)
 # define M_PI (3.141592653589793238462643)
@@ -201,12 +200,12 @@ static double calc_psnrhvs(const unsigned char *_src, int _systride,
   ret /= pixels;
   return ret;
 }
-double vp9_psnrhvs(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_psnrhvs(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
                    double *y_psnrhvs, double *u_psnrhvs, double *v_psnrhvs) {
   double psnrhvs;
   double par = 1.0;
   int step = 7;
-  vp9_clear_system_state();
+  vpx_clear_system_state();
   *y_psnrhvs = calc_psnrhvs(source->y_buffer, source->y_stride, dest->y_buffer,
                             dest->y_stride, par, source->y_crop_width,
                             source->y_crop_height, step, csf_y);
diff --git a/vp9/encoder/vp9_ssim.c b/vpx_dsp/ssim.c
similarity index 91%
rename from vp9/encoder/vp9_ssim.c
rename to vpx_dsp/ssim.c
index 172de5d1d..991906f2a 100644
--- a/vp9/encoder/vp9_ssim.c
+++ b/vpx_dsp/ssim.c
@@ -9,11 +9,11 @@
  */
 
 #include <math.h>
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/ssim.h"
 #include "vpx_ports/mem.h"
-#include "vp9/encoder/vp9_ssim.h"
 
-void vp9_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r,
+void vpx_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r,
                             int rp, unsigned long *sum_s, unsigned long *sum_r,
                             unsigned long *sum_sq_s, unsigned long *sum_sq_r,
                             unsigned long *sum_sxr) {
@@ -28,7 +28,7 @@ void vp9_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r,
     }
   }
 }
-void vp9_ssim_parms_8x8_c(uint8_t *s, int sp, uint8_t *r, int rp,
+void vpx_ssim_parms_8x8_c(uint8_t *s, int sp, uint8_t *r, int rp,
                           unsigned long *sum_s, unsigned long *sum_r,
                           unsigned long *sum_sq_s, unsigned long *sum_sq_r,
                           unsigned long *sum_sxr) {
@@ -45,7 +45,7 @@ void vp9_ssim_parms_8x8_c(uint8_t *s, int sp, uint8_t *r, int rp,
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_ssim_parms_8x8_c(uint16_t *s, int sp, uint16_t *r, int rp,
+void vpx_highbd_ssim_parms_8x8_c(uint16_t *s, int sp, uint16_t *r, int rp,
                                  uint32_t *sum_s, uint32_t *sum_r,
                                  uint32_t *sum_sq_s, uint32_t *sum_sq_r,
                                  uint32_t *sum_sxr) {
@@ -87,7 +87,7 @@ static double similarity(unsigned long sum_s, unsigned long sum_r,
 
 static double ssim_8x8(uint8_t *s, int sp, uint8_t *r, int rp) {
   unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
-  vp9_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
+  vpx_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
                      &sum_sxr);
   return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
 }
@@ -97,7 +97,7 @@ static double highbd_ssim_8x8(uint16_t *s, int sp, uint16_t *r, int rp,
                               unsigned int bd) {
   uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
   const int oshift = bd - 8;
-  vp9_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
+  vpx_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
                             &sum_sxr);
   return similarity(sum_s >> oshift,
                     sum_r >> oshift,
@@ -111,7 +111,7 @@ static double highbd_ssim_8x8(uint16_t *s, int sp, uint16_t *r, int rp,
 // We are using a 8x8 moving window with starting location of each 8x8 window
 // on the 4x4 pixel grid. Such arrangement allows the windows to overlap
 // block boundaries to penalize blocking artifacts.
-double vp9_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
+double vpx_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
                  int stride_img2, int width, int height) {
   int i, j;
   int samples = 0;
@@ -131,7 +131,7 @@ double vp9_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
-double vp9_highbd_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
+double vpx_highbd_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
                         int stride_img2, int width, int height,
                         unsigned int bd) {
   int i, j;
@@ -154,20 +154,20 @@ double vp9_highbd_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
                      double *weight) {
   double a, b, c;
   double ssimv;
 
-  a = vp9_ssim2(source->y_buffer, dest->y_buffer,
+  a = vpx_ssim2(source->y_buffer, dest->y_buffer,
                 source->y_stride, dest->y_stride,
                 source->y_crop_width, source->y_crop_height);
 
-  b = vp9_ssim2(source->u_buffer, dest->u_buffer,
+  b = vpx_ssim2(source->u_buffer, dest->u_buffer,
                 source->uv_stride, dest->uv_stride,
                 source->uv_crop_width, source->uv_crop_height);
 
-  c = vp9_ssim2(source->v_buffer, dest->v_buffer,
+  c = vpx_ssim2(source->v_buffer, dest->v_buffer,
                 source->uv_stride, dest->uv_stride,
                 source->uv_crop_width, source->uv_crop_height);
 
@@ -178,20 +178,20 @@ double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
   return ssimv;
 }
 
-double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
                       double *ssim_y, double *ssim_u, double *ssim_v) {
   double ssim_all = 0;
   double a, b, c;
 
-  a = vp9_ssim2(source->y_buffer, dest->y_buffer,
+  a = vpx_ssim2(source->y_buffer, dest->y_buffer,
                 source->y_stride, dest->y_stride,
                 source->y_crop_width, source->y_crop_height);
 
-  b = vp9_ssim2(source->u_buffer, dest->u_buffer,
+  b = vpx_ssim2(source->u_buffer, dest->u_buffer,
                 source->uv_stride, dest->uv_stride,
                 source->uv_crop_width, source->uv_crop_height);
 
-  c = vp9_ssim2(source->v_buffer, dest->v_buffer,
+  c = vpx_ssim2(source->v_buffer, dest->v_buffer,
                 source->uv_stride, dest->uv_stride,
                 source->uv_crop_width, source->uv_crop_height);
   *ssim_y = a;
@@ -280,12 +280,12 @@ double ssimv_similarity2(Ssimv *sv, int64_t n) {
 }
 void ssimv_parms(uint8_t *img1, int img1_pitch, uint8_t *img2, int img2_pitch,
                  Ssimv *sv) {
-  vp9_ssim_parms_8x8(img1, img1_pitch, img2, img2_pitch,
+  vpx_ssim_parms_8x8(img1, img1_pitch, img2, img2_pitch,
                      &sv->sum_s, &sv->sum_r, &sv->sum_sq_s, &sv->sum_sq_r,
                      &sv->sum_sxr);
 }
 
-double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch,
+double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch,
                             uint8_t *img2, int img2_pitch,
                             int width, int height,
                             Ssimv *sv2, Metrics *m,
@@ -298,7 +298,7 @@ double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch,
   int c = 0;
   double norm;
   double old_ssim_total = 0;
-  vp9_clear_system_state();
+  vpx_clear_system_state();
   // We can sample points as frequently as we like start with 1 per 4x4.
   for (i = 0; i < height; i += 4,
        img1 += img1_pitch * 4, img2 += img2_pitch * 4) {
@@ -448,21 +448,21 @@ double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch,
 
 
 #if CONFIG_VP9_HIGHBITDEPTH
-double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
+double vpx_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
                             YV12_BUFFER_CONFIG *dest,
                             double *weight, unsigned int bd) {
   double a, b, c;
   double ssimv;
 
-  a = vp9_highbd_ssim2(source->y_buffer, dest->y_buffer,
+  a = vpx_highbd_ssim2(source->y_buffer, dest->y_buffer,
                        source->y_stride, dest->y_stride,
                        source->y_crop_width, source->y_crop_height, bd);
 
-  b = vp9_highbd_ssim2(source->u_buffer, dest->u_buffer,
+  b = vpx_highbd_ssim2(source->u_buffer, dest->u_buffer,
                        source->uv_stride, dest->uv_stride,
                        source->uv_crop_width, source->uv_crop_height, bd);
 
-  c = vp9_highbd_ssim2(source->v_buffer, dest->v_buffer,
+  c = vpx_highbd_ssim2(source->v_buffer, dest->v_buffer,
                        source->uv_stride, dest->uv_stride,
                        source->uv_crop_width, source->uv_crop_height, bd);
 
@@ -473,21 +473,21 @@ double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
   return ssimv;
 }
 
-double vp9_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
+double vpx_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
                              YV12_BUFFER_CONFIG *dest, double *ssim_y,
                              double *ssim_u, double *ssim_v, unsigned int bd) {
   double ssim_all = 0;
   double a, b, c;
 
-  a = vp9_highbd_ssim2(source->y_buffer, dest->y_buffer,
+  a = vpx_highbd_ssim2(source->y_buffer, dest->y_buffer,
                        source->y_stride, dest->y_stride,
                        source->y_crop_width, source->y_crop_height, bd);
 
-  b = vp9_highbd_ssim2(source->u_buffer, dest->u_buffer,
+  b = vpx_highbd_ssim2(source->u_buffer, dest->u_buffer,
                        source->uv_stride, dest->uv_stride,
                        source->uv_crop_width, source->uv_crop_height, bd);
 
-  c = vp9_highbd_ssim2(source->v_buffer, dest->v_buffer,
+  c = vpx_highbd_ssim2(source->v_buffer, dest->v_buffer,
                        source->uv_stride, dest->uv_stride,
                        source->uv_crop_width, source->uv_crop_height, bd);
   *ssim_y = a;
diff --git a/vp9/encoder/vp9_ssim.h b/vpx_dsp/ssim.h
similarity index 74%
rename from vp9/encoder/vp9_ssim.h
rename to vpx_dsp/ssim.h
index 10f14c4d2..b1579f778 100644
--- a/vp9/encoder/vp9_ssim.h
+++ b/vpx_dsp/ssim.h
@@ -8,15 +8,24 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#ifndef VP9_ENCODER_VP9_SSIM_H_
-#define VP9_ENCODER_VP9_SSIM_H_
+#ifndef VPX_ENCODER_VP9_SSIM_H_
+#define VPX_ENCODER_VP9_SSIM_H_
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#include "./vpx_config.h"
 #include "vpx_scale/yv12config.h"
 
+// TODO(aconverse): Unify vp8/vp9_clear_system_state
+#if ARCH_X86 || ARCH_X86_64
+void vpx_reset_mmx_state(void);
+#define vpx_clear_system_state() vpx_reset_mmx_state()
+#else
+#define vpx_clear_system_state()
+#endif
+
 // metrics used for calculating ssim, ssim2, dssim, and ssimc
 typedef struct {
   // source sum ( over 8x8 region )
@@ -59,29 +68,29 @@ typedef struct {
   double ssimcd;
 } Metrics;
 
-double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2,
+double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2,
                       int img2_pitch, int width, int height, Ssimv *sv2,
                       Metrics *m, int do_inconsistency);
 
-double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
                      double *weight);
 
-double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
                       double *ssim_y, double *ssim_u, double *ssim_v);
 
-double vp9_calc_fastssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_fastssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
                          double *ssim_y, double *ssim_u, double *ssim_v);
 
-double vp9_psnrhvs(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_psnrhvs(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
                    double *ssim_y, double *ssim_u, double *ssim_v);
 
 #if CONFIG_VP9_HIGHBITDEPTH
-double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
+double vpx_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
                             YV12_BUFFER_CONFIG *dest,
                             double *weight,
                             unsigned int bd);
 
-double vp9_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
+double vpx_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
                              YV12_BUFFER_CONFIG *dest,
                              double *ssim_y,
                              double *ssim_u,
@@ -93,4 +102,4 @@ double vp9_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
 }  // extern "C"
 #endif
 
-#endif  // VP9_ENCODER_VP9_SSIM_H_
+#endif  // VPX_ENCODER_VP9_SSIM_H_
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
index 468e4c31b..5d4ec3e85 100644
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -22,6 +22,10 @@ DSP_SRCS-yes += bitwriter.h
 DSP_SRCS-yes += bitwriter.c
 DSP_SRCS-yes += bitwriter_buffer.c
 DSP_SRCS-yes += bitwriter_buffer.h
+DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.c
+DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.h
+DSP_SRCS-$(CONFIG_INTERNAL_STATS) += psnrhvs.c
+DSP_SRCS-$(CONFIG_INTERNAL_STATS) += fastssim.c
 endif
 
 ifeq ($(CONFIG_DECODERS),yes)
@@ -295,6 +299,10 @@ DSP_SRCS-$(HAVE_SSE2)   += x86/variance_sse2.c  # Contains SSE2 and SSSE3
 DSP_SRCS-$(HAVE_AVX2)   += x86/variance_avx2.c
 DSP_SRCS-$(HAVE_AVX2)   += x86/variance_impl_avx2.c
 
+ifeq ($(ARCH_X86_64),yes)
+DSP_SRCS-$(HAVE_SSE2)   += x86/ssim_opt_x86_64.asm
+endif  # ARCH_X86_64
+
 ifeq ($(CONFIG_USE_X86INC),yes)
 DSP_SRCS-$(HAVE_SSE2)   += x86/subpel_variance_sse2.asm  # Contains SSE2 and SSSE3
 endif  # CONFIG_USE_X86INC
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 7326adf06..326022743 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -990,6 +990,17 @@ specialize qw/vpx_sad4x8x4d msa/, "$sse_x86inc";
 add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
 specialize qw/vpx_sad4x4x4d msa/, "$sse_x86inc";
 
+#
+# Structured Similarity (SSIM)
+#
+if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
+    add_proto qw/void vpx_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+    specialize qw/vpx_ssim_parms_8x8/, "$sse2_x86_64";
+
+    add_proto qw/void vpx_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+    specialize qw/vpx_ssim_parms_16x16/, "$sse2_x86_64";
+}
+
 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   #
   # Block subtraction
@@ -1176,6 +1187,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
   specialize qw/vpx_highbd_sad4x4x4d/, "$sse2_x86inc";
 
+  #
+  # Structured Similarity (SSIM)
+  #
+  if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
+    add_proto qw/void vpx_highbd_ssim_parms_8x8/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
+    specialize qw/vpx_highbd_ssim_parms_8x8/;
+  }
 }  # CONFIG_VP9_HIGHBITDEPTH
 }  # CONFIG_ENCODERS
 
diff --git a/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm b/vpx_dsp/x86/ssim_opt_x86_64.asm
similarity index 97%
rename from vp9/encoder/x86/vp9_ssim_opt_x86_64.asm
rename to vpx_dsp/x86/ssim_opt_x86_64.asm
index 455d10d2c..5d05d4fc6 100644
--- a/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm
+++ b/vpx_dsp/x86/ssim_opt_x86_64.asm
@@ -61,8 +61,8 @@
 ; or pavgb At this point this is just meant to be first pass for calculating
 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion
 ; in mode selection code.
-global sym(vp9_ssim_parms_16x16_sse2) PRIVATE
-sym(vp9_ssim_parms_16x16_sse2):
+global sym(vpx_ssim_parms_16x16_sse2) PRIVATE
+sym(vpx_ssim_parms_16x16_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 9
@@ -151,8 +151,8 @@ sym(vp9_ssim_parms_16x16_sse2):
 ; or pavgb At this point this is just meant to be first pass for calculating
 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion
 ; in mode selection code.
-global sym(vp9_ssim_parms_8x8_sse2) PRIVATE
-sym(vp9_ssim_parms_8x8_sse2):
+global sym(vpx_ssim_parms_8x8_sse2) PRIVATE
+sym(vpx_ssim_parms_8x8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 9