[Zlib-devel] crc32 big/little endian

Joakim Tjernlund joakim.tjernlund at transmode.se
Wed Apr 21 18:26:00 EDT 2010


>
> From: Török Edwin
> > Unless someone beats me to it I'll write a short benchmark code and
> > report results.
>
> Thanks for the program... I was surprised by the choice of 16384 byte buffer
> as input to crc32, so I modified the program (attached) to test buffer size,
> NOBYFOUR and performance on ARM.
>
> The buffer size choice has a major impact on speed on x86 Prescott but
> optimization levels (so long as optimization is done) only have a small effect:
>
> Buffer -O3   -Os   -O2   -O0
> 64   18644   19035   18650   40816
> 128   17060   17250   17080   36057
> 256   16280   16366   16276   34619
> 512   15874   15926   15890   33596
> 1024   15902   15928   15903   33742
> 2048   15722   15710   15699   32548
> 4096   15586   15602   15586   33543
> 8192   15624   15590   15587   34835
> 16384   18162   18146   18149   37775
> text   13473   12481   12293   13746
> data   296   296   296   296
> bss   16420   16396   16420   16420
> total   30189   29173   29709   30462
> error   <1%   <1%   <1%   5-10%

gcc has always had a hard time optimizing crc32. I recently discovered that
-O1 was noticeable faster than -O2 with gcc 4.3.4 in some crc32 tests I was
doing a while back. One must help gcc by laying out the C code so it matches
what you want. Below is a good start I think. The next step would
be to rearrange the code inside the DOLIT4/DOBE4 macros. I haven't tested
this yet though.


diff --git a/crc32.c b/crc32.c
index 91be372..e7ebca9 100644
--- a/crc32.c
+++ b/crc32.c
@@ -258,7 +258,6 @@ unsigned long ZEXPORT crc32(crc, buf, len)
 #define DOLIT4 c ^= *buf4++; \
         c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
             crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
-#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4

 /* ========================================================================= */
 local unsigned long crc32_little(crc, buf, len)
@@ -266,7 +265,7 @@ local unsigned long crc32_little(crc, buf, len)
     const unsigned char FAR *buf;
     unsigned len;
 {
-    register u4 c;
+    register u4 c, loops;
     register const u4 FAR *buf4;

     c = (u4)crc;
@@ -276,20 +275,19 @@ local unsigned long crc32_little(crc, buf, len)
         len--;
     }

+    loops = len >> 2;
+    len = len & 3;
     buf4 = (const u4 FAR *)(const void FAR *)buf;
-    while (len >= 32) {
-        DOLIT32;
-        len -= 32;
-    }
-    while (len >= 4) {
+    for ( ; loops; --loops) {
         DOLIT4;
-        len -= 4;
     }
-    buf = (const unsigned char FAR *)buf4;

-    if (len) do {
-        c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
-    } while (--len);
+    if (len) {
+	buf = (const unsigned char FAR *)buf4;
+	do {
+	    c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
+	} while (--len);
+    }
     c = ~c;
     return (unsigned long)c;
 }
@@ -298,7 +296,6 @@ local unsigned long crc32_little(crc, buf, len)
 #define DOBIG4 c ^= *++buf4; \
         c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
             crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
-#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4

 /* ========================================================================= */
 local unsigned long crc32_big(crc, buf, len)
@@ -306,7 +303,7 @@ local unsigned long crc32_big(crc, buf, len)
     const unsigned char FAR *buf;
     unsigned len;
 {
-    register u4 c;
+    register u4 c, loops;
     register const u4 FAR *buf4;

     c = REV((u4)crc);
@@ -316,22 +313,19 @@ local unsigned long crc32_big(crc, buf, len)
         len--;
     }

+    loops = len >> 2;
+    len = len & 3;
     buf4 = (const u4 FAR *)(const void FAR *)buf;
-    buf4--;
-    while (len >= 32) {
-        DOBIG32;
-        len -= 32;
-    }
-    while (len >= 4) {
+    for (buf4--; loops; --loops) {
         DOBIG4;
-        len -= 4;
     }
-    buf4++;
-    buf = (const unsigned char FAR *)buf4;

-    if (len) do {
-        c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
-    } while (--len);
+    if (len) {
+	do {
+	    buf = (const unsigned char FAR *)(buf4 + 1) - 1;
+	    c = crc_table[4][(c >> 24) ^ *++buf] ^ (c << 8);
+	} while (--len);
+    }
     c = ~c;
     return (unsigned long)(REV(c));
 }





More information about the Zlib-devel mailing list