diff --git a/test/hbd_metrics_test.cc b/test/hbd_metrics_test.cc
index 75b7c9bd6..fa9cfc158 100644
--- a/test/hbd_metrics_test.cc
+++ b/test/hbd_metrics_test.cc
@@ -27,11 +27,40 @@ using libvpx_test::ACMRandom;
 namespace {
 
 typedef double (*LBDMetricFunc)(const YV12_BUFFER_CONFIG *source,
-                                const YV12_BUFFER_CONFIG *dest,
-                                double *weight);
+                                const YV12_BUFFER_CONFIG *dest);
 typedef double (*HBDMetricFunc)(const YV12_BUFFER_CONFIG *source,
                                 const YV12_BUFFER_CONFIG *dest,
-                                double *weight, unsigned int bd);
+                                uint32_t bd);
+
+double compute_hbd_fastssim(const YV12_BUFFER_CONFIG *source,
+                            const YV12_BUFFER_CONFIG *dest,
+                            uint32_t bit_depth) {
+  double tempy, tempu, tempv;
+  return vpx_calc_fastssim(source, dest,
+                               &tempy, &tempu, &tempv, bit_depth);
+}
+
+double compute_fastssim(const YV12_BUFFER_CONFIG *source,
+                        const YV12_BUFFER_CONFIG *dest) {
+  double tempy, tempu, tempv;
+  return vpx_calc_fastssim(source, dest,
+                           &tempy, &tempu, &tempv, 8);
+}
+
+double compute_hbd_vpxssim(const YV12_BUFFER_CONFIG *source,
+                           const YV12_BUFFER_CONFIG *dest,
+                            uint32_t bit_depth) {
+  double ssim, weight;
+  ssim = vpx_highbd_calc_ssim(source, dest, &weight, bit_depth);
+  return 100 * pow(ssim / weight, 8.0);
+}
+
+double compute_vpxssim(const YV12_BUFFER_CONFIG *source,
+  const YV12_BUFFER_CONFIG *dest) {
+  double ssim, weight;
+  ssim = vpx_calc_ssim(source, dest, &weight);
+  return 100 * pow(ssim / weight, 8.0);
+}
 
 class HBDMetricsTestBase {
  public:
@@ -46,7 +75,7 @@ class HBDMetricsTestBase {
     YV12_BUFFER_CONFIG lbd_src, lbd_dst;
     YV12_BUFFER_CONFIG hbd_src, hbd_dst;
     ACMRandom rnd(ACMRandom::DeterministicSeed());
-    double lbd_score, hbd_score, lbd_db, hbd_db, lbd_w, hbd_w;
+    double lbd_db, hbd_db;
 
     memset(&lbd_src, 0, sizeof(lbd_src));
     memset(&lbd_dst, 0, sizeof(lbd_dst));
@@ -63,18 +92,18 @@ class HBDMetricsTestBase {
       uint16_t spel, dpel;
       spel = lbd_src.buffer_alloc[i];
       // Create some distortion for dst buffer.
-      lbd_dst.buffer_alloc[i] = rnd.Rand8();
-      dpel = lbd_dst.buffer_alloc[i];
+      dpel = rnd.Rand8();
+      lbd_dst.buffer_alloc[i] = (uint8_t)dpel;
       ((uint16_t*)(hbd_src.buffer_alloc))[i] = spel << (bit_depth_ - 8);
       ((uint16_t*)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8);
       i++;
     }
 
-    lbd_score = lbd_metric_(&lbd_src, &lbd_dst, &lbd_w);
-    hbd_score = hbd_metric_(&hbd_src, &hbd_dst, &hbd_w, bit_depth_);
+    lbd_db = lbd_metric_(&lbd_src, &lbd_dst);
+    hbd_db = hbd_metric_(&hbd_src, &hbd_dst, bit_depth_);
 
-    lbd_db = 100 * pow(lbd_score / lbd_w, 8.0);
-    hbd_db = 100 * pow(hbd_score / hbd_w, 8.0);
+    printf("%10f \n", lbd_db);
+    printf("%10f \n", hbd_db);
 
     vpx_free_frame_buffer(&lbd_src);
     vpx_free_frame_buffer(&lbd_dst);
@@ -111,13 +140,22 @@ TEST_P(HBDMetricsTest, RunAccuracyCheck) {
 
 // Allow small variation due to floating point operations.
 static const double kSsim_thresh = 0.001;
+// Allow some variation from accumulated errors in floating point operations.
+static const double kFSsim_thresh = 0.01;
 
 INSTANTIATE_TEST_CASE_P(
-    C, HBDMetricsTest,
+    VPXSSIM, HBDMetricsTest,
     ::testing::Values(
-        MetricTestTParam(&vpx_calc_ssim, &vpx_highbd_calc_ssim, 10,
+        MetricTestTParam(&compute_vpxssim, &compute_hbd_vpxssim, 10,
                          kSsim_thresh),
-        MetricTestTParam(&vpx_calc_ssim, &vpx_highbd_calc_ssim, 12,
+        MetricTestTParam(&compute_vpxssim, &compute_hbd_vpxssim, 12,
                          kSsim_thresh)));
+INSTANTIATE_TEST_CASE_P(
+    FASTSSIM, HBDMetricsTest,
+    ::testing::Values(
+        MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim, 10,
+                         kFSsim_thresh),
+        MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim, 12,
+                         kFSsim_thresh)));
 }  // namespace
 
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 739791ce6..8cd677b9e 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -4372,14 +4372,25 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
     if (cm->show_frame) {
       YV12_BUFFER_CONFIG *orig = cpi->Source;
       YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
+      uint32_t bit_depth = 8;
+#if CONFIG_VP9_HIGHBITDEPTH
+      uint32_t in_bit_depth = 8;
+#endif
       cpi->count++;
 
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (cm->use_highbitdepth) {
+        in_bit_depth = cpi->oxcf.input_bit_depth;
+        bit_depth = cm->bit_depth;
+      }
+#endif
+
       if (cpi->b_calculate_psnr) {
         PSNR_STATS psnr;
         YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
 #if CONFIG_VP9_HIGHBITDEPTH
         calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd,
-                         cpi->oxcf.input_bit_depth);
+                         in_bit_depth);
 #else
         calc_psnr(orig, recon, &psnr);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -4494,14 +4505,10 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
         }
       }
 
-#if CONFIG_VP9_HIGHBITDEPTH
-      if (!cm->use_highbitdepth)
-#endif
       {
         double y, u, v, frame_all;
-        frame_all = vpx_calc_fastssim(orig, recon, &y, &u, &v);
+        frame_all = vpx_calc_fastssim(orig, recon, &y, &u, &v, bit_depth);
         adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
-        /* TODO(JBB): add 10/12 bit support */
       }
 #if CONFIG_VP9_HIGHBITDEPTH
       if (!cm->use_highbitdepth)
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 019abfe7e..3067afea7 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -4537,7 +4537,17 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
     cpi->bytes += (int)(*size);
 
     if (cm->show_frame) {
+      uint32_t bit_depth = 8;
+#if CONFIG_VP9_HIGHBITDEPTH
+      uint32_t in_bit_depth = 8;
+#endif
       cpi->count++;
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cm->use_highbitdepth) {
+      in_bit_depth = cpi->oxcf.input_bit_depth;
+      bit_depth = cm->bit_depth;
+    }
+#endif
 
       if (cpi->b_calculate_psnr) {
         YV12_BUFFER_CONFIG *orig = cpi->Source;
@@ -4546,7 +4556,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
         PSNR_STATS psnr;
 #if CONFIG_VP9_HIGHBITDEPTH
         calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd,
-                         cpi->oxcf.input_bit_depth);
+                         in_bit_depth);
 #else
         calc_psnr(orig, recon, &psnr);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -4665,15 +4675,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
         }
       }
 
-#if CONFIG_VP9_HIGHBITDEPTH
-      if (!cm->use_highbitdepth)
-#endif
       {
         double y, u, v, frame_all;
         frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
-                                      &v);
+                                      &v, bit_depth);
         adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
-        /* TODO(JBB): add 10/12 bit support */
       }
 #if CONFIG_VP9_HIGHBITDEPTH
       if (!cm->use_highbitdepth)
diff --git a/vpx_dsp/fastssim.c b/vpx_dsp/fastssim.c
index 569f18b69..590df9510 100644
--- a/vpx_dsp/fastssim.c
+++ b/vpx_dsp/fastssim.c
@@ -19,19 +19,23 @@
 #include "vpx_dsp/ssim.h"
 #include "vpx_ports/system_state.h"
 
-/* TODO(jbb): High bit depth version of this code needed */
 typedef struct fs_level fs_level;
 typedef struct fs_ctx fs_ctx;
 
 #define SSIM_C1 (255 * 255 * 0.01 * 0.01)
 #define SSIM_C2 (255 * 255 * 0.03 * 0.03)
-
+#if CONFIG_VP9_HIGHBITDEPTH
+#define SSIM_C1_10 (1023 * 1023 * 0.01 * 0.01)
+#define SSIM_C1_12 (4095 * 4095 * 0.01 * 0.01)
+#define SSIM_C2_10 (1023 * 1023 * 0.03 * 0.03)
+#define SSIM_C2_12 (4095 * 4095 * 0.03 * 0.03)
+#endif
 #define FS_MINI(_a, _b) ((_a) < (_b) ? (_a) : (_b))
 #define FS_MAXI(_a, _b) ((_a) > (_b) ? (_a) : (_b))
 
 struct fs_level {
-  uint16_t *im1;
-  uint16_t *im2;
+  uint32_t *im1;
+  uint32_t *im2;
   double *ssim;
   int w;
   int h;
@@ -82,7 +86,7 @@ static void fs_ctx_init(fs_ctx *_ctx, int _w, int _h, int _nlevels) {
     level_size += sizeof(*_ctx->level[l].ssim) - 1;
     level_size /= sizeof(*_ctx->level[l].ssim);
     level_size *= sizeof(*_ctx->level[l].ssim);
-    _ctx->level[l].im1 = (uint16_t *) data;
+    _ctx->level[l].im1 = (uint32_t *)data;
     _ctx->level[l].im2 = _ctx->level[l].im1 + im_size;
     data += level_size;
     _ctx->level[l].ssim = (double *) data;
@@ -98,10 +102,10 @@ static void fs_ctx_clear(fs_ctx *_ctx) {
 }
 
 static void fs_downsample_level(fs_ctx *_ctx, int _l) {
-  const uint16_t *src1;
-  const uint16_t *src2;
-  uint16_t *dst1;
-  uint16_t *dst2;
+  const uint32_t *src1;
+  const uint32_t *src2;
+  uint32_t *dst1;
+  uint32_t *dst2;
   int w2;
   int h2;
   int w;
@@ -137,8 +141,40 @@ static void fs_downsample_level(fs_ctx *_ctx, int _l) {
 static void fs_downsample_level0(fs_ctx *_ctx, const unsigned char *_src1,
                                  int _s1ystride, const unsigned char *_src2,
                                  int _s2ystride, int _w, int _h) {
-  uint16_t *dst1;
-  uint16_t *dst2;
+  uint32_t *dst1;
+  uint32_t *dst2;
+  int w;
+  int h;
+  int i;
+  int j;
+  w = _ctx->level[0].w;
+  h = _ctx->level[0].h;
+  dst1 = _ctx->level[0].im1;
+  dst2 = _ctx->level[0].im2;
+  for (j = 0; j < h; j++) {
+    int j0;
+    int j1;
+    j0 = 2 * j;
+    j1 = FS_MINI(j0 + 1, _h);
+    for (i = 0; i < w; i++) {
+      int i0;
+      int i1;
+      i0 = 2 * i;
+      i1 = FS_MINI(i0 + 1, _w);
+      dst1[j * w + i] = _src1[j0 * _s1ystride + i0]
+          + _src1[j0 * _s1ystride + i1] + _src1[j1 * _s1ystride + i0]
+          + _src1[j1 * _s1ystride + i1];
+      dst2[j * w + i] = _src2[j0 * _s2ystride + i0]
+          + _src2[j0 * _s2ystride + i1] + _src2[j1 * _s2ystride + i0]
+          + _src2[j1 * _s2ystride + i1];
+    }
+  }
+}
+static void hbd_fs_downsample_level0(fs_ctx *_ctx, const uint16_t *_src1,
+                                     int _s1ystride, const uint16_t *_src2,
+                                     int _s2ystride, int _w, int _h) {
+  uint32_t *dst1;
+  uint32_t *dst2;
   int w;
   int h;
   int i;
@@ -167,11 +203,11 @@ static void fs_downsample_level0(fs_ctx *_ctx, const unsigned char *_src1,
   }
 }
 
-static void fs_apply_luminance(fs_ctx *_ctx, int _l) {
+static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) {
   unsigned *col_sums_x;
   unsigned *col_sums_y;
-  uint16_t *im1;
-  uint16_t *im2;
+  uint32_t *im1;
+  uint32_t *im2;
   double *ssim;
   double c1;
   int w;
@@ -180,6 +216,15 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l) {
   int j1offs;
   int i;
   int j;
+  double ssim_c1 = SSIM_C1;
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (bit_depth == 10)
+    ssim_c1 = SSIM_C1_10;
+  if (bit_depth == 12)
+    ssim_c1 = SSIM_C1_12;
+#else
+  assert(bit_depth == 8);
+#endif
   w = _ctx->level[_l].w;
   h = _ctx->level[_l].h;
   col_sums_x = _ctx->col_buf;
@@ -198,7 +243,7 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l) {
       col_sums_y[i] += im2[j1offs + i];
   }
   ssim = _ctx->level[_l].ssim;
-  c1 = (double) (SSIM_C1 * 4096 * (1 << 4 * _l));
+  c1 = (double) (ssim_c1 * 4096 * (1 << 4 * _l));
   for (j = 0; j < h; j++) {
     unsigned mux;
     unsigned muy;
@@ -296,9 +341,9 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l) {
   } \
   while (0)
 
-static void fs_calc_structure(fs_ctx *_ctx, int _l) {
-  uint16_t *im1;
-  uint16_t *im2;
+static void fs_calc_structure(fs_ctx *_ctx, int _l, int bit_depth) {
+  uint32_t *im1;
+  uint32_t *im2;
   unsigned *gx_buf;
   unsigned *gy_buf;
   double *ssim;
@@ -311,6 +356,16 @@ static void fs_calc_structure(fs_ctx *_ctx, int _l) {
   int h;
   int i;
   int j;
+  double ssim_c2 = SSIM_C2;
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (bit_depth == 10)
+    ssim_c2 = SSIM_C2_10;
+  if (bit_depth == 12)
+    ssim_c2 = SSIM_C2_12;
+#else
+  assert(bit_depth == 8);
+#endif
+
   w = _ctx->level[_l].w;
   h = _ctx->level[_l].h;
   im1 = _ctx->level[_l].im1;
@@ -320,7 +375,7 @@ static void fs_calc_structure(fs_ctx *_ctx, int _l) {
   stride = w + 8;
   gy_buf = gx_buf + 8 * stride;
   memset(gx_buf, 0, 2 * 8 * stride * sizeof(*gx_buf));
-  c2 = SSIM_C2 * (1 << 4 * _l) * 16 * 104;
+  c2 = ssim_c2 * (1 << 4 * _l) * 16 * 104;
   for (j = 0; j < h + 4; j++) {
     if (j < h - 1) {
       for (i = 0; i < w - 1; i++) {
@@ -423,6 +478,13 @@ static double fs_average(fs_ctx *_ctx, int _l) {
   return pow(ret / (w * h), FS_WEIGHTS[_l]);
 }
 
+static double convert_ssim_db(double _ssim, double _weight) {
+  assert(_weight >= _ssim);
+  if ((_weight - _ssim) < 1e-10)
+    return MAX_SSIM_DB;
+  return 10 * (log10(_weight) - log10(_weight - _ssim));
+}
+
 static double calc_ssim(const unsigned char *_src, int _systride,
                  const unsigned char *_dst, int _dystride, int _w, int _h) {
   fs_ctx ctx;
@@ -432,42 +494,76 @@ static double calc_ssim(const unsigned char *_src, int _systride,
   fs_ctx_init(&ctx, _w, _h, FS_NLEVELS);
   fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride, _w, _h);
   for (l = 0; l < FS_NLEVELS - 1; l++) {
-    fs_calc_structure(&ctx, l);
+    fs_calc_structure(&ctx, l, 8);
     ret *= fs_average(&ctx, l);
     fs_downsample_level(&ctx, l + 1);
   }
-  fs_calc_structure(&ctx, l);
-  fs_apply_luminance(&ctx, l);
+  fs_calc_structure(&ctx, l, 8);
+  fs_apply_luminance(&ctx, l, 8);
   ret *= fs_average(&ctx, l);
   fs_ctx_clear(&ctx);
   return ret;
 }
 
-static double convert_ssim_db(double _ssim, double _weight) {
-  assert(_weight >= _ssim);
-  if ((_weight - _ssim) < 1e-10)
-    return MAX_SSIM_DB;
-  return 10 * (log10(_weight) - log10(_weight - _ssim));
+
+#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)(x)) << 1))
+
+static double calc_hbd_ssim(const uint8_t *_src, int _systride,
+                            const uint8_t *_dst, int _dystride,
+                            int _w, int _h, uint32_t bit_depth) {
+  fs_ctx ctx;
+  double ret;
+  int l;
+  ret = 1;
+  fs_ctx_init(&ctx, _w, _h, FS_NLEVELS);
+  hbd_fs_downsample_level0(&ctx,
+                           CONVERT_TO_SHORTPTR(_src), _systride,
+                           CONVERT_TO_SHORTPTR(_dst), _dystride,
+                           _w, _h);
+  for (l = 0; l < FS_NLEVELS - 1; l++) {
+    fs_calc_structure(&ctx, l, bit_depth);
+    ret *= fs_average(&ctx, l);
+    fs_downsample_level(&ctx, l + 1);
+  }
+  fs_calc_structure(&ctx, l, bit_depth);
+  fs_apply_luminance(&ctx, l, bit_depth);
+  ret *= fs_average(&ctx, l);
+  fs_ctx_clear(&ctx);
+  return ret;
 }
 
 double vpx_calc_fastssim(const YV12_BUFFER_CONFIG *source,
                          const YV12_BUFFER_CONFIG *dest,
-                         double *ssim_y, double *ssim_u, double *ssim_v) {
+                         double *ssim_y, double *ssim_u, double *ssim_v,
+                         uint32_t bit_depth) {
   double ssimv;
   vpx_clear_system_state();
 
-  *ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer,
-                      dest->y_stride, source->y_crop_width,
-                      source->y_crop_height);
+  if (bit_depth == 8) {
+    *ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer,
+                        dest->y_stride, source->y_crop_width,
+                        source->y_crop_height);
+    *ssim_u = calc_ssim(source->u_buffer, source->uv_stride, dest->u_buffer,
+                        dest->uv_stride, source->uv_crop_width,
+                        source->uv_crop_height);
+    *ssim_v = calc_ssim(source->v_buffer, source->uv_stride, dest->v_buffer,
+                        dest->uv_stride, source->uv_crop_width,
+                        source->uv_crop_height);
+  } else if (bit_depth == 10 || bit_depth == 12) {
+    *ssim_y = calc_hbd_ssim(source->y_buffer, source->y_stride, dest->y_buffer,
+                            dest->y_stride, source->y_crop_width,
+                            source->y_crop_height, bit_depth);
+    *ssim_u = calc_hbd_ssim(source->u_buffer, source->uv_stride, dest->u_buffer,
+                            dest->uv_stride, source->uv_crop_width,
+                            source->uv_crop_height, bit_depth);
+    *ssim_v = calc_hbd_ssim(source->v_buffer, source->uv_stride, dest->v_buffer,
+                            dest->uv_stride, source->uv_crop_width,
+                            source->uv_crop_height, bit_depth);
 
-  *ssim_u = calc_ssim(source->u_buffer, source->uv_stride, dest->u_buffer,
-                      dest->uv_stride, source->uv_crop_width,
-                      source->uv_crop_height);
+  } else {
+    assert(0);
+  }
 
-  *ssim_v = calc_ssim(source->v_buffer, source->uv_stride, dest->v_buffer,
-                      dest->uv_stride, source->uv_crop_width,
-                      source->uv_crop_height);
   ssimv = (*ssim_y) * .8 + .1 * ((*ssim_u) + (*ssim_v));
-
   return convert_ssim_db(ssimv, 1.0);
 }
diff --git a/vpx_dsp/ssim.h b/vpx_dsp/ssim.h
index 0074c3622..b1b64301a 100644
--- a/vpx_dsp/ssim.h
+++ b/vpx_dsp/ssim.h
@@ -72,7 +72,8 @@ double vpx_calc_ssim(const YV12_BUFFER_CONFIG *source,
 
 double vpx_calc_fastssim(const YV12_BUFFER_CONFIG *source,
                          const YV12_BUFFER_CONFIG *dest,
-                         double *ssim_y, double *ssim_u, double *ssim_v);
+                         double *ssim_y, double *ssim_u,
+                         double *ssim_v, uint32_t bit_depth);
 
 double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source,
                    const YV12_BUFFER_CONFIG *dest,
@@ -82,7 +83,7 @@ double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source,
 double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,
                             const YV12_BUFFER_CONFIG *dest,
                             double *weight,
-                            unsigned int bd);
+                            uint32_t bd);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 #ifdef __cplusplus