[Zlib-devel] zlib 1.2.4 released!

Sat Mar 20 21:27:21 EDT 2010

On Mar 18, 2010, at 9:52 AM, John Bowler wrote:
>> Recent tests for libpng suggest that reducing the size of the
>>buffers in zlib can even increase inflate performance.
>
>I'm not sure I understand what you're saying here.  Can you provide some
>more information on the tests that were run and the results?

These tests were performed with 60,000,000 bytes of data, deflated (amounting
to 58339 bytes of deflate data) as input.  The test program is attached
followed by a bash script (it uses $(( for counting) to do some math on the
result.

The output from a single run of ziptest.sh, annotated to explain the
columns, is:

jbowler at marsco:~/src/libpng/t$ ./ziptest.sh ../h2m.dat
bufsize user-time standard-deviation
128     0.873     0.00383749
256     0.6998    0.0047989
512     0.9996    0.0044759
1024    1.1404    0.00401995
2048    1.2056    0.00348712
4096    1.2368    0.00253979
8192    1.2646    0.00306113

You might care to check that I got the awk script right, particularly
the standard deviation (I just wrote the script!)  I also misspoke - the
optimal buffer size (for this, extreme, case) is 256 bytes.  This mirrors
results that were found for libpng in other experiments.  I haven't
experimented with the difference between variations in input and output
buffer size, but it should be simple and obvious how to hack the script.

The test case reads the input data in [bufsize] chunks, passes it to
deflate and receives the default output in [bufsize] chunks.  It does this
once to discover the length of the decompressed data (60,000,000), allocates
a buffer in memory of that size then does it again to fill the buffer.

The 'gtzip' test program supports four options:

Default: Just the decompressed data to stdout on the first (only) pass.
-0:      Don't even do that, just junk the data.
-m:      Write the data to a memory buffer on the second pass.
-f:      Write the data to stdout on the second pass.

Notice that in all cases the buffer size being controlled is an intermediate
buffer - it appears between the stdio buffering and the internal zlib window.
I believe the reason the small buffer size is advantageous is that it minimizes
the second level cache entries hit by the act of copying the data to/from the
intermediate buffer without imposing undue overhead in the extra function calls
(until the buffer size drops to 128.)

This data isn't directly applicable to GZ because the GZ buffer is used to buffer
a read from the kernel (read(2)), not a copy from another (stdio) buffer.  However
the results do apply to the separate output buffer where that is used as an
intermediate buffer.

John Bowler <jbowler at acm.org>

----------------------------- gtzip.c ---------------------------------
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <zlib.h>

size_t decomp(int debug, int output, Bytef *buffer, size_t sz) {
    Bytef ib[IBC], ob[OBC];
    z_stream zs;

    zs.next_in = ib;
    zs.avail_in = 0;
    zs.next_out = ob;
    zs.avail_out = sizeof ob;
    zs.zalloc = Z_NULL;
    zs.zfree = Z_NULL;
    zs.opaque = Z_NULL;

    if (inflateInit(&zs) == Z_OK) {
	int rc = Z_OK;
	do {
	    if (zs.avail_in == 0) {
		zs.next_in = ib;
		zs.avail_in = fread(ib, 1, sizeof ib, stdin);
		if (debug) fprintf(stderr, "read %d bytes\n", zs.avail_in);
	    }

	    if (zs.avail_out == 0 || rc == Z_STREAM_END) {
	    	size_t avail = (sizeof ob) - zs.avail_out, oc = 0;
		if (debug) fprintf(stderr, "write %d bytes\n", avail);
		if (buffer != 0 && sz > 0) {
		    if (sz < avail) {
			memcpy(buffer, ob, sz);
			buffer += sz;
			sz = 0;
		    } else {
			memcpy(buffer, ob, avail);
			buffer += avail;
			sz -= avail;
		    }
		}
		while (oc < avail) {
		    size_t out;
		    if (output) 
		    	out = fwrite(ob+oc, 1, avail-oc, stdout);
		    else
		    	out = avail-oc;

		    if (out <= 0) {
		    	perror("output");
			fprintf(stderr, "inflate: failed to write %d bytes\n",
				avail-oc);
			exit(1);
		    }
		    oc += out;
		}

		if (rc == Z_STREAM_END)
		    break;

		zs.next_out = ob;
		zs.avail_out = sizeof ob;
	    }

	    rc = inflate(&zs, Z_NO_FLUSH);
	} while (rc == Z_OK || rc == Z_STREAM_END);

	if (rc != Z_STREAM_END) {
	    fprintf(stderr, "inflate: error %d\n", rc);
	    exit(1);
	}

	/* At end, rc was Z_STREAM_END. */
	if (inflateEnd(&zs) != Z_OK) {
	    fprintf(stderr, "inflateEnd: error\n");
	    exit(1);
	}

	if (fflush(stdout)) {
	    perror("output");
	    fprintf(stderr, "output failed\n");
	    exit(1);
	}
    } else {
    	fprintf(stderr, "inflateInit failed\n");
	exit(1);
    }

    return zs.total_out;
}

int main(int argc, char **argv) {
    int debug = 0, output = 1;
    size_t sz = 0;

    if (argc > 1) switch (argv[1][1]) {
    case 'd': debug = 1; break;
    case '0': output = 0; break;
    case 'm': output = 2; break;
    case 'f': output = 3; break;
    }

    sz = decomp(debug, output == 1, 0, 0);
    if (output == 2 || output == 3) {
	Bytef *buffer = malloc(sz);
    	fprintf(stderr, "allocate %d bytes\n", sz);
	rewind(stdin);
	decomp(debug, output==3, buffer, sz);
    }

    return 0;
}
---------------------------ziptest.sh----------------------------------
#!/bin/sh
infile="$1"
for i in 128 256 512 1024 2048 4096 8192
do
    count=0
    gcc -Os -DIBC=$i -DOBC=$i -o gtzip gtzip.c -lz
    while test $count -lt 20
    do
	count=$(($count + 1))
	{ time ./gtzip -m <"$infile" >tmpfile; } 2>&1
    done | awk "BEGIN{bufsize=$i}"'
	$1=="user"{ t=substr($2,3,5); ++t0; t1+=t; t2+=t*t}
	END{av=t1/t0; print bufsize, av, sqrt((t2/t0-av*av)/(t0-1))}'
done