[Zlib-devel] Performance patch set

Joakim Tjernlund joakim.tjernlund at transmode.se
Sun May 9 17:23:01 EDT 2010


> Hi devs,
>
> as I wrote some time ago, I have been working on
> Subversion performance issues and came across
> some optimization potential in zlib as well.
>
> Inflate is now about 50% faster and a few minor
> optimizations for deflate were done along the way,
> too.
>
> Although the changes are mostly independent
> from each other, it would have been difficult to
> create truly independent patches. Therefore, it's
> all in one package. The patch was made against
> 1.2.5 release version.

Hi

I just browsed your patch and noticed you are touching inffast where
I already have an optimization pending. I thing it would be
useful for you too. See below.

 Jocke


>From 6780b584ffe47e3d9cdc6d30e3515ada9d79fbdc Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <Joakim.Tjernlund at transmode.se>
Date: Tue, 20 Apr 2010 19:46:06 +0200
Subject: [PATCH] zlib: optimize inffast when copying direct from output

This is a straigth port from the work I did in Linux on inflate to
optimize it.

This patch optimizes the direct copy procedure.  Uses
an endiain independed get_unaligned() but only in one place.
The copy loop just above this one can also use this optimization, but I
havn't done so as I have not tested if it is a win there too.

On my MPC8321 this is about 17% faster on my JFFS2 root FS than the
original.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund at transmode.se>
---
 inffast.c |   69 +++++++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/inffast.c b/inffast.c
index 2f1d60b..0a18c58 100644
--- a/inffast.c
+++ b/inffast.c
@@ -21,12 +21,31 @@
    - Pentium III (Anderson)
    - M68060 (Nikl)
  */
+union uu {
+    unsigned short us;
+    unsigned char b[2];
+};
+
+/* Endian independed version */
+static inline unsigned short
+get_unaligned16(const unsigned short *p)
+{
+    union uu  mm;
+    unsigned char *b = (unsigned char *)p;
+
+    mm.b[0] = b[0];
+    mm.b[1] = b[1];
+    return mm.us;
+}
+
 #ifdef POSTINC
 #  define OFF 0
 #  define PUP(a) *(a)++
+#  define UP_UNALIGNED(a) get_unaligned16((a)++)
 #else
 #  define OFF 1
 #  define PUP(a) *++(a)
+#  define UP_UNALIGNED(a) get_unaligned16(++(a))
 #endif

 /*
@@ -266,18 +285,50 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
                     }
                 }
                 else {
+                    unsigned short *sout;
+                    unsigned long loops;
+
                     from = out - dist;          /* copy direct from output */
-                    do {                        /* minimum length is three */
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
-                        len -= 3;
-                    } while (len > 2);
-                    if (len) {
+                    /* minimum length is three */
+                    /* Align out addr */
+                    if (!((long)(out - 1 + OFF) & 1)) {
                         PUP(out) = PUP(from);
-                        if (len > 1)
-                            PUP(out) = PUP(from);
+                        len--;
+                    }
+                    sout = (unsigned short *)(out - OFF);
+                    if (dist > 2) {
+                        unsigned short *sfrom;
+
+                        sfrom = (unsigned short *)(from - OFF);
+                        loops = len >> 1;
+                        do {
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+                            PUP(sout) = PUP(sfrom);
+#else
+                            PUP(sout) = UP_UNALIGNED(sfrom);
+#endif
+                        } while (--loops);
+                        out = (unsigned char *)sout + OFF;
+                        from = (unsigned char *)sfrom + OFF;
+                    } else { /* dist == 1 or dist == 2 */
+                        unsigned short pat16;
+
+                        pat16 = *(sout-1+OFF);
+                        if (dist == 1) {
+                            union uu mm;
+                            /* copy one char pattern to both bytes */
+                            mm.us = pat16;
+                            mm.b[0] = mm.b[1];
+                            pat16 = mm.us;
+                        }
+                        loops = len >> 1;
+                        do
+                            PUP(sout) = pat16;
+                        while (--loops);
+                        out = (unsigned char *)sout + OFF;
                     }
+                    if (len & 1)
+                        PUP(out) = PUP(from);
                 }
             }
             else if ((op & 64) == 0) {          /* 2nd level distance code */
--
1.6.4.4





More information about the Zlib-devel mailing list