multi-threaded alpha encoding for lossy

new option: 'cwebp -mt ...' new config flag: config.thread_level (allowed thread_level are 0 or 1 for now. Maybe more later...) If -mt is activated (and WEBP_USE_THREAD is used for compile), the alpha-compression will be done in parallel to RGB coding for lossy. Can save quite a bit of latency... Has no effect for lossless encoding. Change-Id: I769d0bf90e7380cf99344ad62cd77277f4df5a46
2013-03-01 01:21:34 +01:00 · 2013-03-01 01:21:34 +01:00 · f817930a55
commit f817930a55
parent 72655350ea
9 changed files with 96 additions and 33 deletions
--- a/1
+++ b/1
@ -158,6 +158,7 @@ options:
  -pass <int> ............ analysis pass number (1..10)
  -crop <x> <y> <w> <h> .. crop picture with the given rectangle
  -resize <w> <h> ........ resize picture (after any cropping)
+  -mt .................... use multi-threading if available
  -map <int> ............. print map of extra info.
  -print_psnr ............ prints averaged PSNR distortion.
  -print_ssim ............ prints averaged SSIM distortion.
--- a/examples/cwebp.c
+++ b/examples/cwebp.c
@ -546,6 +546,7 @@ static void HelpLong(void) {
  printf("  -pass <int> ............ analysis pass number (1..10)\n");
  printf("  -crop <x> <y> <w> <h> .. crop picture with the given rectangle\n");
  printf("  -resize <w> <h> ........ resize picture (after any cropping)\n");
+  printf("  -mt .................... use multi-threading if available\n");
 #ifdef WEBP_EXPERIMENTAL_FEATURES
  printf("  -444 / -422 / -gray ..... Change colorspace\n");
 #endif
@ -724,6 +725,8 @@ int main(int argc, const char *argv[]) {
      config.autofilter = 1;
    } else if (!strcmp(argv[c], "-jpeg_like")) {
      config.emulate_jpeg_size = 1;
+    } else if (!strcmp(argv[c], "-mt")) {
+      ++config.thread_level;  // increase thread level
    } else if (!strcmp(argv[c], "-strong")) {
      config.filter_type = 1;
    } else if (!strcmp(argv[c], "-nostrong")) {
--- a/man/cwebp.1
+++ b/man/cwebp.1
@ -1,5 +1,5 @@
 .\"                                      Hey, EMACS: -*- nroff -*-
-.TH CWEBP 1 "February 15, 2013"
+.TH CWEBP 1 "February 28, 2013"
 .SH NAME
 cwebp \- compress an image file to a WebP file
 .SH SYNOPSIS
@ -82,6 +82,10 @@ of JPEG compression. This flag will generally produce an output file of
 similar size to its JPEG equivalent (for the same \fB\-q\fP setting), but
 with less visual distortion.
 .TP
+.B \-mt
+Use multi-threading for encoding, if possible. This option is only effective
+when using lossy compression on a source with a transparency channel.
+.TP
 .B \-af
 Turns auto-filter on. This algorithm will spend additional time optimizing
 the filtering strength to reach a well-balanced quality.
--- a/src/enc/alpha.c
+++ b/src/enc/alpha.c
@ -286,42 +286,80 @@ static int EncodeAlpha(VP8Encoder* const enc,
 //------------------------------------------------------------------------------
 // Main calls

+static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) {
+  const WebPConfig* config = enc->config_;
+  uint8_t* alpha_data = NULL;
+  size_t alpha_size = 0;
+  const int effort_level = config->method;  // maps to [0..6]
+  const WEBP_FILTER_TYPE filter =
+      (config->alpha_filtering == 0) ? WEBP_FILTER_NONE :
+      (config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
+                                       WEBP_FILTER_BEST;
+  if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
+                   filter, effort_level, &alpha_data, &alpha_size)) {
+    return 0;
+  }
+  if (alpha_size != (uint32_t)alpha_size) {  // Sanity check.
+    free(alpha_data);
+    return 0;
+  }
+  enc->alpha_data_size_ = (uint32_t)alpha_size;
+  enc->alpha_data_ = alpha_data;
+  (void)dummy;
+  return 1;
+}
+
 void VP8EncInitAlpha(VP8Encoder* const enc) {
  enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
  enc->alpha_data_ = NULL;
  enc->alpha_data_size_ = 0;
+  if (enc->thread_level_ > 0) {
+    WebPWorker* const worker = &enc->alpha_worker_;
+    WebPWorkerInit(worker);
+    worker->data1 = enc;
+    worker->data2 = NULL;
+    worker->hook = (WebPWorkerHook)CompressAlphaJob;
+  }
+}
+
+int VP8EncStartAlpha(VP8Encoder* const enc) {
+  if (enc->has_alpha_) {
+    if (enc->thread_level_ > 0) {
+      WebPWorker* const worker = &enc->alpha_worker_;
+      if (!WebPWorkerReset(worker)) {    // Makes sure worker is good to go.
+        return 0;
+      }
+      WebPWorkerLaunch(worker);
+      return 1;
+    } else {
+      return CompressAlphaJob(enc, NULL);   // just do the job right away
+    }
+  }
+  return 1;
 }

 int VP8EncFinishAlpha(VP8Encoder* const enc) {
  if (enc->has_alpha_) {
-    const WebPConfig* config = enc->config_;
-    uint8_t* tmp_data = NULL;
-    size_t tmp_size = 0;
-    const int effort_level = config->method;  // maps to [0..6]
-    const WEBP_FILTER_TYPE filter =
-        (config->alpha_filtering == 0) ? WEBP_FILTER_NONE :
-        (config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
-                                         WEBP_FILTER_BEST;
-
-    if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
-                     filter, effort_level, &tmp_data, &tmp_size)) {
-      return 0;
+    if (enc->thread_level_ > 0) {
+      WebPWorker* const worker = &enc->alpha_worker_;
+      if (!WebPWorkerSync(worker)) return 0;  // error
    }
-    if (tmp_size != (uint32_t)tmp_size) {  // Sanity check.
-      free(tmp_data);
-      return 0;
-    }
-    enc->alpha_data_size_ = (uint32_t)tmp_size;
-    enc->alpha_data_ = tmp_data;
  }
  return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
 }

-void VP8EncDeleteAlpha(VP8Encoder* const enc) {
+int VP8EncDeleteAlpha(VP8Encoder* const enc) {
+  int ok = 1;
+  if (enc->thread_level_ > 0) {
+    WebPWorker* const worker = &enc->alpha_worker_;
+    ok = WebPWorkerSync(worker);  // finish anything left in flight
+    WebPWorkerEnd(worker);  // still need to end the worker, even if !ok
+  }
  free(enc->alpha_data_);
  enc->alpha_data_ = NULL;
  enc->alpha_data_size_ = 0;
  enc->has_alpha_ = 0;
+  return ok;
 }

 #if defined(__cplusplus) || defined(c_plusplus)
--- a/src/enc/config.c
+++ b/src/enc/config.c
@ -47,6 +47,7 @@ int WebPConfigInitInternal(WebPConfig* config,
  config->lossless = 0;
  config->image_hint = WEBP_HINT_DEFAULT;
  config->emulate_jpeg_size = 0;
+  config->thread_level = 0;

  // TODO(skal): tune.
  switch (preset) {
@ -125,6 +126,8 @@ int WebPValidateConfig(const WebPConfig* config) {
    return 0;
  if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1)
    return 0;
+  if (config->thread_level < 0 || config->thread_level > 1)
+    return 0;
  return 1;
 }

--- a/src/enc/frame.c
+++ b/src/enc/frame.c
@ -625,7 +625,10 @@ static void SetBlock(uint8_t* p, int value, int size) {
 #endif

 static void ResetSSE(VP8Encoder* const enc) {
-  memset(enc->sse_, 0, sizeof(enc->sse_));
+  enc->sse_[0] = 0;
+  enc->sse_[1] = 0;
+  enc->sse_[2] = 0;
+  // Note: enc->sse_[3] is managed by alpha.c
  enc->sse_count_ = 0;
 }

--- a/src/enc/vp8enci.h
+++ b/src/enc/vp8enci.h
@ -16,6 +16,7 @@
 #include "../webp/encode.h"
 #include "../dsp/dsp.h"
 #include "../utils/bit_writer.h"
+#include "../utils/thread.h"

 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
@ -386,6 +387,7 @@ struct VP8Encoder {
  int has_alpha_;
  uint8_t* alpha_data_;       // non-NULL if transparency is present
  uint32_t alpha_data_size_;
+  WebPWorker alpha_worker_;

  // enhancement layer
  int use_layer_;
@ -416,6 +418,7 @@ struct VP8Encoder {
  int method_;               // 0=fastest, 6=best/slowest.
  VP8RDLevel rd_opt_level_;  // Deduced from method_.
  int max_i4_header_bits_;   // partition #0 safeness factor
+  int thread_level_;         // derived from config->thread_level

  // Memory
  VP8MBInfo* mb_info_;   // contextual macroblock infos (mb_w_ + 1)
@ -499,8 +502,9 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,

  // in alpha.c
 void VP8EncInitAlpha(VP8Encoder* const enc);    // initialize alpha compression
+int VP8EncStartAlpha(VP8Encoder* const enc);    // start alpha coding process
 int VP8EncFinishAlpha(VP8Encoder* const enc);   // finalize compressed data
-void VP8EncDeleteAlpha(VP8Encoder* const enc);  // delete compressed data
+int VP8EncDeleteAlpha(VP8Encoder* const enc);   // delete compressed data

  // in layer.c
 void VP8EncInitLayer(VP8Encoder* const enc);     // init everything
--- a/src/enc/webpenc.c
+++ b/src/enc/webpenc.c
@ -125,6 +125,8 @@ static void MapConfigToTools(VP8Encoder* const enc) {
  enc->max_i4_header_bits_ =
      256 * 16 * 16 *                 // upper bound: up to 16bit per 4x4 block
      (limit * limit) / (100 * 100);  // ... modulated with a quadratic curve.
+
+  enc->thread_level_ = enc->config_->thread_level;
 }

 // Memory scaling with dimensions:
@ -266,14 +268,16 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
  return enc;
 }

-static void DeleteVP8Encoder(VP8Encoder* enc) {
+static int DeleteVP8Encoder(VP8Encoder* enc) {
+  int ok = 1;
  if (enc != NULL) {
-    VP8EncDeleteAlpha(enc);
+    ok = VP8EncDeleteAlpha(enc);
 #ifdef WEBP_EXPERIMENTAL_FEATURES
    VP8EncDeleteLayer(enc);
 #endif
    free(enc);
  }
+  return ok;
 }

 //------------------------------------------------------------------------------
@ -336,7 +340,7 @@ int WebPReportProgress(const WebPPicture* const pic,
 //------------------------------------------------------------------------------

 int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
-  int ok;
+  int ok = 0;

  if (pic == NULL)
    return 0;
@ -365,19 +369,21 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
    enc = InitVP8Encoder(config, pic);
    if (enc == NULL) return 0;  // pic->error is already set.
    // Note: each of the tasks below account for 20% in the progress report.
-    ok = VP8EncAnalyze(enc)
-      && VP8StatLoop(enc)
-      && VP8EncLoop(enc)
-      && VP8EncFinishAlpha(enc)
+    ok = VP8EncAnalyze(enc);
+
+    // Analysis is done, proceed to actual coding.
+    ok = ok && VP8EncStartAlpha(enc);   // possibly done in parallel
+    ok = ok && VP8StatLoop(enc) && VP8EncLoop(enc);
+    ok = ok && VP8EncFinishAlpha(enc);
 #ifdef WEBP_EXPERIMENTAL_FEATURES
-      && VP8EncFinishLayer(enc)
+    ok = ok && VP8EncFinishLayer(enc);
 #endif
-      && VP8EncWrite(enc);
+    ok = ok && VP8EncWrite(enc);
    StoreStats(enc);
    if (!ok) {
      VP8EncFreeBitWriters(enc);
    }
-    DeleteVP8Encoder(enc);
+    ok &= DeleteVP8Encoder(enc);  // must always be called, even if !ok
  } else {
    if (pic->argb == NULL)
      return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
--- a/src/webp/encode.h
+++ b/src/webp/encode.h
@ -125,8 +125,9 @@ struct WebPConfig {
                          // to better match the expected output size from
                          // JPEG compression. Generally, the output size will
                          // be similar but the degradation will be lower.
+  int thread_level;       // If non-zero, try and use multi-threaded encoding.

-  uint32_t pad[7];        // padding for later use
+  uint32_t pad[6];        // padding for later use
 };

 // Enumerate some predefined settings for WebPConfig, depending on the type