rescaler: move the 1x1 or 2x1 handling one level up

=> no need to handle it in the sub-functions. Change-Id: I4b0211ecfafbc9c80a73bf2206809a13c94e7911
2015-09-25 18:49:28 +00:00 · 2015-09-25 18:49:28 +00:00 · 306ce4fde1
commit 306ce4fde1
parent cced974bb2
5 changed files with 127 additions and 143 deletions
--- a/src/dsp/rescaler.c
+++ b/src/dsp/rescaler.c
@ -140,18 +140,13 @@ void WebPRescalerExportRowShrinkC(WebPRescaler* const wrk) {
      dst[x_out] = v;
      irow[x_out] = frac;   // new fractional start
    }
-  } else if (wrk->fxy_scale) {
+  } else {
    for (x_out = 0; x_out < x_out_max; ++x_out) {
      const int v = (int)MULT_FIX(irow[x_out], wrk->fxy_scale);
      assert(v >= 0 && v <= 255);
      dst[x_out] = v;
      irow[x_out] = 0;
    }
-  } else {  // very special case for src = dst = 1x1
-    for (x_out = 0; x_out < x_out_max; ++x_out) {
-      dst[x_out] = irow[x_out];
-      irow[x_out] = 0;
-    }
  }
 }

@ -175,8 +170,16 @@ void WebPRescalerExportRow(WebPRescaler* const wrk) {
    assert(!WebPRescalerOutputDone(wrk));
    if (wrk->y_expand) {
      WebPRescalerExportRowExpand(wrk);
-    } else {
+    } else if (wrk->fxy_scale) {
      WebPRescalerExportRowShrink(wrk);
+    } else {  // very special case for src = dst = 1x1
+      int i;
+      assert(wrk->src_width == 1 && wrk->dst_width <= 2);
+      assert(wrk->src_height == 1 && wrk->dst_height == 1);
+      for (i = 0; i < wrk->num_channels * wrk->dst_width; ++i) {
+        wrk->dst[i] = wrk->irow[i];
+        wrk->irow[i] = 0;
+      }
    }
    wrk->y_accum += wrk->y_add;
    wrk->dst += wrk->dst_stride;
--- a/src/dsp/rescaler_mips32.c
+++ b/src/dsp/rescaler_mips32.c
@ -141,61 +141,54 @@ static void ExportRowShrink(WebPRescaler* const wrk) {
  const int x_out_max = wrk->dst_width * wrk->num_channels;
  uint8_t* dst = wrk->dst;
  rescaler_t* irow = wrk->irow;
+  const rescaler_t* frow = wrk->frow;
+  const int yscale = wrk->fy_scale * (-wrk->y_accum);
+  int temp0, temp1, temp3, temp4, temp5, temp6, temp7, loop_end;
+  const int temp2 = (int)wrk->fxy_scale;
+  const int temp8 = x_out_max << 2;
+
  assert(!WebPRescalerOutputDone(wrk));
  assert(wrk->y_accum <= 0);
  assert(!wrk->y_expand);
-  if (wrk->fxy_scale != 0) {
-    const rescaler_t* frow = wrk->frow;
-    const int yscale = wrk->fy_scale * (-wrk->y_accum);
-    int temp0, temp1, temp3, temp4, temp5, temp6, temp7, loop_end;
-    const int temp2 = (int)(wrk->fxy_scale);
-    const int temp8 = x_out_max << 2;
+  assert(wrk->fxy_scale != 0);
+  __asm__ volatile(
+    "addiu    %[temp6],    $zero,       -256          \n\t"
+    "addiu    %[temp7],    $zero,       255           \n\t"
+    "li       %[temp3],    0x10000                    \n\t"
+    "li       %[temp4],    0x8000                     \n\t"
+    "addu     %[loop_end], %[frow],     %[temp8]      \n\t"
+  "1:                                                 \n\t"
+    "lw       %[temp0],    0(%[frow])                 \n\t"
+    "mult     %[temp3],    %[temp4]                   \n\t"
+    "addiu    %[frow],     %[frow],     4             \n\t"
+    "sll      %[temp0],    %[temp0],    2             \n\t"
+    "madd     %[temp0],    %[yscale]                  \n\t"
+    "mfhi     %[temp1]                                \n\t"
+    "lw       %[temp0],    0(%[irow])                 \n\t"
+    "addiu    %[dst],      %[dst],      1             \n\t"
+    "addiu    %[irow],     %[irow],     4             \n\t"
+    "subu     %[temp0],    %[temp0],    %[temp1]      \n\t"
+    "mult     %[temp3],    %[temp4]                   \n\t"
+    "sll      %[temp0],    %[temp0],    2             \n\t"
+    "madd     %[temp0],    %[temp2]                   \n\t"
+    "mfhi     %[temp5]                                \n\t"
+    "sw       %[temp1],    -4(%[irow])                \n\t"
+    "and      %[temp0],    %[temp5],    %[temp6]      \n\t"
+    "slti     %[temp1],    %[temp5],    0             \n\t"
+    "beqz     %[temp0],    2f                         \n\t"
+    "xor      %[temp5],    %[temp5],    %[temp5]      \n\t"
+    "movz     %[temp5],    %[temp7],    %[temp1]      \n\t"
+  "2:                                                 \n\t"
+    "sb       %[temp5],    -1(%[dst])                 \n\t"
+    "bne      %[frow],     %[loop_end], 1b            \n\t"

-    __asm__ volatile(
-      "addiu    %[temp6],    $zero,       -256          \n\t"
-      "addiu    %[temp7],    $zero,       255           \n\t"
-      "li       %[temp3],    0x10000                    \n\t"
-      "li       %[temp4],    0x8000                     \n\t"
-      "addu     %[loop_end], %[frow],     %[temp8]      \n\t"
-    "1:                                                 \n\t"
-      "lw       %[temp0],    0(%[frow])                 \n\t"
-      "mult     %[temp3],    %[temp4]                   \n\t"
-      "addiu    %[frow],     %[frow],     4             \n\t"
-      "sll      %[temp0],    %[temp0],    2             \n\t"
-      "madd     %[temp0],    %[yscale]                  \n\t"
-      "mfhi     %[temp1]                                \n\t"
-      "lw       %[temp0],    0(%[irow])                 \n\t"
-      "addiu    %[dst],      %[dst],      1             \n\t"
-      "addiu    %[irow],     %[irow],     4             \n\t"
-      "subu     %[temp0],    %[temp0],    %[temp1]      \n\t"
-      "mult     %[temp3],    %[temp4]                   \n\t"
-      "sll      %[temp0],    %[temp0],    2             \n\t"
-      "madd     %[temp0],    %[temp2]                   \n\t"
-      "mfhi     %[temp5]                                \n\t"
-      "sw       %[temp1],    -4(%[irow])                \n\t"
-      "and      %[temp0],    %[temp5],    %[temp6]      \n\t"
-      "slti     %[temp1],    %[temp5],    0             \n\t"
-      "beqz     %[temp0],    2f                         \n\t"
-      "xor      %[temp5],    %[temp5],    %[temp5]      \n\t"
-      "movz     %[temp5],    %[temp7],    %[temp1]      \n\t"
-    "2:                                                 \n\t"
-      "sb       %[temp5],    -1(%[dst])                 \n\t"
-      "bne      %[frow],     %[loop_end], 1b            \n\t"
-
-      : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
-        [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
-        [temp7]"=&r"(temp7), [frow]"+r"(frow), [irow]"+r"(irow),
-        [dst]"+r"(dst), [loop_end]"=&r"(loop_end)
-      : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp8]"r"(temp8)
-      : "memory", "hi", "lo"
-    );
-  } else {  // very special case for src = dst = 1x1
-    int x_out;
-    for (x_out = 0; x_out < x_out_max; ++x_out) {
-      dst[x_out] = irow[x_out];
-      irow[x_out] = 0;
-    }
-  }
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
+      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
+      [temp7]"=&r"(temp7), [frow]"+r"(frow), [irow]"+r"(irow),
+      [dst]"+r"(dst), [loop_end]"=&r"(loop_end)
+    : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp8]"r"(temp8)
+    : "memory", "hi", "lo"
+  );
 }

 // no ExportRowExpand yet.
--- a/src/dsp/rescaler_mips_dsp_r2.c
+++ b/src/dsp/rescaler_mips_dsp_r2.c
@ -136,87 +136,80 @@ static void ExportRowShrink(WebPRescaler* const wrk) {
  uint8_t* dst = wrk->dst;
  rescaler_t* irow = wrk->irow;
  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  const rescaler_t* frow = wrk->frow;
+  const int yscale = wrk->fy_scale * (-wrk->y_accum);
+
+  int temp0, temp1, temp3, temp4, temp5, temp6, temp7;
+  const int temp2 = (int)wrk->fxy_scale;
+  const int rest = x_out_max & 1;
+  const rescaler_t* const loop_end = frow + x_out_max - rest;
+
  assert(!WebPRescalerOutputDone(wrk));
  assert(wrk->y_accum <= 0);
  assert(!wrk->y_expand);
-  if (wrk->fxy_scale) {
-    const rescaler_t* frow = wrk->frow;
-    const int yscale = wrk->fy_scale * (-wrk->y_accum);
-
-    int temp0, temp1, temp3, temp4, temp5, temp6, temp7;
-    const int temp2 = (int)wrk->fxy_scale;
-    const int rest = x_out_max & 1;
-    const rescaler_t* const loop_end = frow + x_out_max - rest;
-
-    __asm__ volatile (
-        ".set             push                                    \n\t"
-        ".set             noreorder                               \n\t"
-        "beq              %[frow],   %[loop_end],   1f            \n\t"
-        " nop                                                     \n\t"
-      "0:                                                         \n\t"
-        "lw               %[temp0],    0(%[frow])                 \n\t"
-        "lw               %[temp1],    0(%[irow])                 \n\t"
-        "lw               %[temp3],    4(%[frow])                 \n\t"
-        "lw               %[temp4],    4(%[irow])                 \n\t"
-        "sll              %[temp0],    %[temp0],      1           \n\t"
-        "sll              %[temp3],    %[temp3],      1           \n\t"
-        "mulq_rs.w        %[temp5],    %[temp0],      %[yscale]   \n\t"
-        "mulq_rs.w        %[temp6],    %[temp3],      %[yscale]   \n\t"
-        "addiu            %[frow],     %[frow],       8           \n\t"
-        "addiu            %[dst],      %[dst],        2           \n\t"
-        "addiu            %[irow],     %[irow],       8           \n\t"
-        "subu             %[temp1],    %[temp1],      %[temp5]    \n\t"
-        "subu             %[temp4],    %[temp4],      %[temp6]    \n\t"
-        "sll              %[temp1],    %[temp1],      1           \n\t"
-        "sll              %[temp4],    %[temp4],      1           \n\t"
-        "mulq_rs.w        %[temp0],    %[temp1],      %[temp2]    \n\t"
-        "mulq_rs.w        %[temp3],    %[temp4],      %[temp2]    \n\t"
-        "sw               %[temp5],    -8(%[irow])                \n\t"
-        "sw               %[temp6],    -4(%[irow])                \n\t"
-        "shll_s.ph        %[temp0],    %[temp0],      7           \n\t"
-        "shll_s.ph        %[temp3],    %[temp3],      7           \n\t"
-        "precrqu_s.qb.ph  %[temp0],    %[temp0],      %[temp3]    \n\t"
-        "sb               %[temp0],    -1(%[dst])                 \n\t"
-        "srl              %[temp0],    %[temp0],      16          \n\t"
-        "bne              %[frow],     %[loop_end],   0b          \n\t"
-        " sb              %[temp0],    -2(%[dst])                 \n\t"
-      "1:                                                         \n\t"
-        "beqz             %[rest],     3f                         \n\t"
-        " nop                                                     \n\t"
-        "addiu            %[temp6],    $zero,         -256        \n\t"
-        "addiu            %[temp7],    $zero,         255         \n\t"
-        "lw               %[temp0],    0(%[frow])                 \n\t"
-        "sll              %[temp0],    %[temp0],      1           \n\t"
-        "mulq_rs.w        %[temp1],    %[temp0],      %[yscale]   \n\t"
-        "lw               %[temp0],    0(%[irow])                 \n\t"
-        "subu             %[temp0],    %[temp0],      %[temp1]    \n\t"
-        "sll              %[temp0],    %[temp0],      1           \n\t"
-        "mulq_rs.w        %[temp5],    %[temp0],      %[temp2]    \n\t"
-        "sw               %[temp1],    0(%[irow])                 \n\t"
-        "and              %[temp0],    %[temp5],      %[temp6]    \n\t"
-        "beqz             %[temp0],    2f                         \n\t"
-        " slti            %[temp1],    %[temp5],      0           \n\t"
-        "xor              %[temp5],    %[temp5],      %[temp5]    \n\t"
-        "movz             %[temp5],    %[temp7],      %[temp1]    \n\t"
-      "2:                                                         \n\t"
-        "sb               %[temp5],    0(%[dst])                  \n\t"
-      "3:                                                         \n\t"
-        ".set             pop                                     \n\t"
-      : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
-        [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
-        [temp7]"=&r"(temp7), [frow]"+&r"(frow), [irow]"+&r"(irow),
-        [dst]"+&r"(dst)
-      : [temp2]"r"(temp2), [yscale]"r"(yscale), [loop_end]"r"(loop_end),
-        [rest]"r"(rest)
-      : "memory", "hi", "lo"
-    );
-  } else {  // very special case for src = dst = 1x1
-    int x_out;
-    for (x_out = 0; x_out < x_out_max; ++x_out) {
-      dst[x_out] = irow[x_out];
-      irow[x_out] = 0;
-    }
-  }
+  assert(wrk->fxy_scale);
+  __asm__ volatile (
+      ".set             push                                    \n\t"
+      ".set             noreorder                               \n\t"
+      "beq              %[frow],   %[loop_end],   1f            \n\t"
+      " nop                                                     \n\t"
+    "0:                                                         \n\t"
+      "lw               %[temp0],    0(%[frow])                 \n\t"
+      "lw               %[temp1],    0(%[irow])                 \n\t"
+      "lw               %[temp3],    4(%[frow])                 \n\t"
+      "lw               %[temp4],    4(%[irow])                 \n\t"
+      "sll              %[temp0],    %[temp0],      1           \n\t"
+      "sll              %[temp3],    %[temp3],      1           \n\t"
+      "mulq_rs.w        %[temp5],    %[temp0],      %[yscale]   \n\t"
+      "mulq_rs.w        %[temp6],    %[temp3],      %[yscale]   \n\t"
+      "addiu            %[frow],     %[frow],       8           \n\t"
+      "addiu            %[dst],      %[dst],        2           \n\t"
+      "addiu            %[irow],     %[irow],       8           \n\t"
+      "subu             %[temp1],    %[temp1],      %[temp5]    \n\t"
+      "subu             %[temp4],    %[temp4],      %[temp6]    \n\t"
+      "sll              %[temp1],    %[temp1],      1           \n\t"
+      "sll              %[temp4],    %[temp4],      1           \n\t"
+      "mulq_rs.w        %[temp0],    %[temp1],      %[temp2]    \n\t"
+      "mulq_rs.w        %[temp3],    %[temp4],      %[temp2]    \n\t"
+      "sw               %[temp5],    -8(%[irow])                \n\t"
+      "sw               %[temp6],    -4(%[irow])                \n\t"
+      "shll_s.ph        %[temp0],    %[temp0],      7           \n\t"
+      "shll_s.ph        %[temp3],    %[temp3],      7           \n\t"
+      "precrqu_s.qb.ph  %[temp0],    %[temp0],      %[temp3]    \n\t"
+      "sb               %[temp0],    -1(%[dst])                 \n\t"
+      "srl              %[temp0],    %[temp0],      16          \n\t"
+      "bne              %[frow],     %[loop_end],   0b          \n\t"
+      " sb              %[temp0],    -2(%[dst])                 \n\t"
+    "1:                                                         \n\t"
+      "beqz             %[rest],     3f                         \n\t"
+      " nop                                                     \n\t"
+      "addiu            %[temp6],    $zero,         -256        \n\t"
+      "addiu            %[temp7],    $zero,         255         \n\t"
+      "lw               %[temp0],    0(%[frow])                 \n\t"
+      "sll              %[temp0],    %[temp0],      1           \n\t"
+      "mulq_rs.w        %[temp1],    %[temp0],      %[yscale]   \n\t"
+      "lw               %[temp0],    0(%[irow])                 \n\t"
+      "subu             %[temp0],    %[temp0],      %[temp1]    \n\t"
+      "sll              %[temp0],    %[temp0],      1           \n\t"
+      "mulq_rs.w        %[temp5],    %[temp0],      %[temp2]    \n\t"
+      "sw               %[temp1],    0(%[irow])                 \n\t"
+      "and              %[temp0],    %[temp5],      %[temp6]    \n\t"
+      "beqz             %[temp0],    2f                         \n\t"
+      " slti            %[temp1],    %[temp5],      0           \n\t"
+      "xor              %[temp5],    %[temp5],      %[temp5]    \n\t"
+      "movz             %[temp5],    %[temp7],      %[temp1]    \n\t"
+    "2:                                                         \n\t"
+      "sb               %[temp5],    0(%[dst])                  \n\t"
+    "3:                                                         \n\t"
+      ".set             pop                                     \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
+      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
+      [temp7]"=&r"(temp7), [frow]"+&r"(frow), [irow]"+&r"(irow),
+      [dst]"+&r"(dst)
+    : [temp2]"r"(temp2), [yscale]"r"(yscale), [loop_end]"r"(loop_end),
+      [rest]"r"(rest)
+    : "memory", "hi", "lo"
+  );
 }

 // no ExportRowExpand yet.
--- a/src/dsp/rescaler_sse2.c
+++ b/src/dsp/rescaler_sse2.c
@ -187,7 +187,7 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
      dst[x_out] = v;
      irow[x_out] = frac;   // new fractional start
    }
-  } else if (wrk->fxy_scale) {
+  } else {
    const uint32_t scale = wrk->fxy_scale;
    const __m128i mult = _mm_set_epi32(0, scale, 0, scale);
    const __m128i zero = _mm_setzero_si128();
@ -204,11 +204,6 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
      dst[x_out] = v;
      irow[x_out] = 0;
    }
-  } else {  // very special case for src = 1x1
-    for (x_out = 0; x_out < x_out_max; ++x_out) {
-      dst[x_out] = irow[x_out];
-      irow[x_out] = 0;
-    }
  }
 }

--- a/src/utils/rescaler.c
+++ b/src/utils/rescaler.c
@ -49,7 +49,7 @@ void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
  wrk->y_accum = wrk->y_expand ? wrk->y_sub : wrk->y_add;
  if (!wrk->y_expand) {
    // note the very special case where x_add = y_add = 1 cannot be represented.
-    // We special-case fxy_scale = 0 in this case, in ExportRowShrink
+    // We special-case fxy_scale = 0 in this case, in WebPRescalerExportRow().
    wrk->fxy_scale = WEBP_RESCALER_FRAC(dst_height, wrk->x_add * wrk->y_add);
    wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->y_sub);
  } else {