From eb1b7788d09e772756495a7a99f0f4515c7f3183 Mon Sep 17 00:00:00 2001 From: Roy Oursler Date: Mon, 20 Jun 2016 11:37:15 -0700 Subject: [PATCH] igzip: Move code in igzip_stateless to hide latencies more in ivybridge. Signed-off-by: Roy Oursler Reviewed-by: Greg Tucker --- igzip/igzip_stateless.asm | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/igzip/igzip_stateless.asm b/igzip/igzip_stateless.asm index 16934cc..05f4b09 100644 --- a/igzip/igzip_stateless.asm +++ b/igzip/igzip_stateless.asm @@ -236,34 +236,30 @@ loop2: inc f_i + MOVQ tmp6, xdata + shr tmp5, 16 + mov tmp8, tmp5 + compute_hash tmp6, tmp5 + mov dist2 %+ w, f_i %+ w sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2] mov [stream + _internal_state_head + 2 * hash2], f_i %+ w dec dist2 - MOVQ tmp8, xdata - shr tmp8, 16 - mov tmp6, tmp8 - compute_hash tmp2, tmp8 - ; if ((dist-1) < (D-1)) { cmp dist %+ d, (D-1) cmovae dist, tmp3 add dist, 1 neg dist + shr tmp8, 8 + compute_hash tmp2, tmp8 + cmp dist2 %+ d, (D-1) cmovae dist2, tmp3 add dist2, 1 neg dist2 - shr tmp6, 8 - compute_hash tmp3, tmp6 - - MOVD xhash, tmp2 %+ d - PINSRD xhash, tmp3 %+ d, 1 - PAND xhash, xhash, xmask - MARK __stateless_compare_ %+ ARCH ;; Check for long len/dist match (>7) with first literal MOVQ len, xdata @@ -272,6 +268,10 @@ MARK __stateless_compare_ %+ ARCH xor len, [tmp1 + dist] jz compare_loop + MOVD xhash, tmp6 %+ d + PINSRD xhash, tmp2 %+ d, 1 + PAND xhash, xhash, xmask + ;; Check for len/dist match (>7) with second literal MOVQ len2, xdata xor len2, [tmp1 + dist2 + 1] @@ -605,6 +605,9 @@ end: MARK __stateless_compare_loops_ %+ ARCH compare_loop: + MOVD xhash, tmp6 %+ d + PINSRD xhash, tmp2 %+ d, 1 + PAND xhash, xhash, xmask lea tmp2, [tmp1 + dist] %if (COMPARE_TYPE == 1) compare250 tmp1, tmp2, len, tmp3