[Zlib-devel] crc32 big/little endian
Joakim Tjernlund
joakim.tjernlund at transmode.se
Wed Apr 21 18:26:00 EDT 2010
>
> From: Török Edwin
> > Unless someone beats me to it I'll write a short benchmark code and
> > report results.
>
> Thanks for the program... I was surprised by the choice of 16384 byte buffer
> as input to crc32, so I modified the program (attached) to test buffer size,
> NOBYFOUR and performance on ARM.
>
> The buffer size choice has a major impact on speed on x86 Prescott but
> optimization levels (so long as optimization is done) only have a small effect:
>
> Buffer -O3 -Os -O2 -O0
> 64 18644 19035 18650 40816
> 128 17060 17250 17080 36057
> 256 16280 16366 16276 34619
> 512 15874 15926 15890 33596
> 1024 15902 15928 15903 33742
> 2048 15722 15710 15699 32548
> 4096 15586 15602 15586 33543
> 8192 15624 15590 15587 34835
> 16384 18162 18146 18149 37775
> text 13473 12481 12293 13746
> data 296 296 296 296
> bss 16420 16396 16420 16420
> total 30189 29173 29709 30462
> error <1% <1% <1% 5-10%
gcc has always had a hard time optimizing crc32. I recently discovered that
-O1 was noticeable faster than -O2 with gcc 4.3.4 in some crc32 tests I was
doing a while back. One must help gcc by laying out the C code so it matches
what you want. Below is a good start I think. The next step would
be to rearrange the code inside the DOLIT4/DOBE4 macros. I haven't tested
this yet though.
diff --git a/crc32.c b/crc32.c
index 91be372..e7ebca9 100644
--- a/crc32.c
+++ b/crc32.c
@@ -258,7 +258,6 @@ unsigned long ZEXPORT crc32(crc, buf, len)
#define DOLIT4 c ^= *buf4++; \
c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
-#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
/* ========================================================================= */
local unsigned long crc32_little(crc, buf, len)
@@ -266,7 +265,7 @@ local unsigned long crc32_little(crc, buf, len)
const unsigned char FAR *buf;
unsigned len;
{
- register u4 c;
+ register u4 c, loops;
register const u4 FAR *buf4;
c = (u4)crc;
@@ -276,20 +275,19 @@ local unsigned long crc32_little(crc, buf, len)
len--;
}
+ loops = len >> 2;
+ len = len & 3;
buf4 = (const u4 FAR *)(const void FAR *)buf;
- while (len >= 32) {
- DOLIT32;
- len -= 32;
- }
- while (len >= 4) {
+ for ( ; loops; --loops) {
DOLIT4;
- len -= 4;
}
- buf = (const unsigned char FAR *)buf4;
- if (len) do {
- c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
- } while (--len);
+ if (len) {
+ buf = (const unsigned char FAR *)buf4;
+ do {
+ c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
+ } while (--len);
+ }
c = ~c;
return (unsigned long)c;
}
@@ -298,7 +296,6 @@ local unsigned long crc32_little(crc, buf, len)
#define DOBIG4 c ^= *++buf4; \
c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
-#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
/* ========================================================================= */
local unsigned long crc32_big(crc, buf, len)
@@ -306,7 +303,7 @@ local unsigned long crc32_big(crc, buf, len)
const unsigned char FAR *buf;
unsigned len;
{
- register u4 c;
+ register u4 c, loops;
register const u4 FAR *buf4;
c = REV((u4)crc);
@@ -316,22 +313,19 @@ local unsigned long crc32_big(crc, buf, len)
len--;
}
+ loops = len >> 2;
+ len = len & 3;
buf4 = (const u4 FAR *)(const void FAR *)buf;
- buf4--;
- while (len >= 32) {
- DOBIG32;
- len -= 32;
- }
- while (len >= 4) {
+ for (buf4--; loops; --loops) {
DOBIG4;
- len -= 4;
}
- buf4++;
- buf = (const unsigned char FAR *)buf4;
- if (len) do {
- c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
- } while (--len);
+ if (len) {
+ do {
+ buf = (const unsigned char FAR *)(buf4 + 1) - 1;
+ c = crc_table[4][(c >> 24) ^ *++buf] ^ (c << 8);
+ } while (--len);
+ }
c = ~c;
return (unsigned long)(REV(c));
}
More information about the Zlib-devel
mailing list