[Zlib-devel] [PATCH 05/13] Add preprocessor define to tune Adler32 loop unrolling.

Jim Kukunas james.t.kukunas at linux.intel.com
Mon Nov 25 17:21:41 EST 2013


Excessive loop unrolling is detrimental to performance. This patch
adds a preprocessor define, ADLER32_UNROLL_LESS, to reduce unrolling
factor from 16 to 8.

Updates configure script to set as default on x86
---
 adler32.c |   16 ++++++++++++++++
 configure |    6 ++++++
 2 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/adler32.c b/adler32.c
index a868f07..1007e38 100644
--- a/adler32.c
+++ b/adler32.c
@@ -104,10 +104,19 @@ uLong ZEXPORT adler32(adler, buf, len)
     /* do length NMAX blocks -- requires just one modulo operation */
     while (len >= NMAX) {
         len -= NMAX;
+#ifndef ADLER32_UNROLL_LESS
         n = NMAX / 16;          /* NMAX is divisible by 16 */
+#else
+        n = NMAX / 8;           /* NMAX is divisible by 8 */
+#endif
         do {
+#ifndef ADLER32_UNROLL_LESS
             DO16(buf);          /* 16 sums unrolled */
             buf += 16;
+#else
+            DO8(buf,0);         /* 8 sums unrolled */
+            buf += 8;
+#endif
         } while (--n);
         MOD(adler);
         MOD(sum2);
@@ -115,10 +124,17 @@ uLong ZEXPORT adler32(adler, buf, len)
 
     /* do remaining bytes (less than NMAX, still just one modulo) */
     if (len) {                  /* avoid modulos if none remaining */
+#ifndef ADLER32_UNROLL_LESS
         while (len >= 16) {
             len -= 16;
             DO16(buf);
             buf += 16;
+#else
+        while (len >= 8) {
+            len -= 8;
+            DO8(buf, 0);
+            buf += 8;
+#endif
         }
         while (len--) {
             adler += *buf++;
diff --git a/configure b/configure
index 02b2be4..2766df0 100755
--- a/configure
+++ b/configure
@@ -751,6 +751,9 @@ case "${ARCH}" in
 
         CFLAGS="${CFLAGS} -DUNALIGNED_OK -D_REENTRANT"
         SFLAGS="${SFLAGS} -DUNALIGNED_OK -D_REENTRANT"
+
+        CFLAGS="${CFLAGS} -DADLER32_UNROLL_LESS"
+        SFLAGS="${SFLAGS} -DADLER32_UNROLL_LESS"
     ;;
     i386 | i486 | i586 | i686)
         OBJC="${OBJC} x86.o"
@@ -758,6 +761,9 @@ case "${ARCH}" in
 
         CFLAGS="${CFLAGS} -DUNALIGNED_OK -D_REENTRANT"
         SFLAGS="${SFLAGS} -DUNALIGNED_OK -D_REENTRANT"
+
+        CFLAGS="${CFLAGS} -DADLER32_UNROLL_LESS"
+        SFLAGS="${SFLAGS} -DADLER32_UNROLL_LESS"
     ;;
 esac
 
-- 
1.7.1





More information about the Zlib-devel mailing list