[Zlib-devel] [PATCH] zlib: optimize inffast when copying direct from output

Joakim Tjernlund joakim.tjernlund at transmode.se
Thu Apr 22 14:35:27 EDT 2010


Ping?

Joakim Tjernlund <Joakim.Tjernlund at transmode.se> wrote on 2010/04/20 19:53:02:
>
> This is a straigth port from the work I did in Linux on inflate to
> optimize it.
>
> This patch optimizes the direct copy procedure.  Uses
> an endiain independed get_unaligned() but only in one place.
> The copy loop just above this one can also use this optimization, but I
> havn't done so as I have not tested if it is a win there too.
>
> On my MPC8321 this is about 17% faster on my JFFS2 root FS than the
> original.
>
> Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund at transmode.se>
> ---
>  inffast.c |   69 +++++++++++++++++++++++++++++++++++++++++++++++++++++--------
>  1 files changed, 60 insertions(+), 9 deletions(-)
>
> diff --git a/inffast.c b/inffast.c
> index 2f1d60b..0a18c58 100644
> --- a/inffast.c
> +++ b/inffast.c
> @@ -21,12 +21,31 @@
>     - Pentium III (Anderson)
>     - M68060 (Nikl)
>   */
> +union uu {
> +    unsigned short us;
> +    unsigned char b[2];
> +};
> +
> +/* Endian independed version */
> +static inline unsigned short
> +get_unaligned16(const unsigned short *p)
> +{
> +    union uu  mm;
> +    unsigned char *b = (unsigned char *)p;
> +
> +    mm.b[0] = b[0];
> +    mm.b[1] = b[1];
> +    return mm.us;
> +}
> +
>  #ifdef POSTINC
>  #  define OFF 0
>  #  define PUP(a) *(a)++
> +#  define UP_UNALIGNED(a) get_unaligned16((a)++)
>  #else
>  #  define OFF 1
>  #  define PUP(a) *++(a)
> +#  define UP_UNALIGNED(a) get_unaligned16(++(a))
>  #endif
>
>  /*
> @@ -266,18 +285,50 @@ unsigned start;         /* inflate()'s starting value
> for strm->avail_out */
>                      }
>                  }
>                  else {
> +                    unsigned short *sout;
> +                    unsigned long loops;
> +
>                      from = out - dist;          /* copy direct from output */
> -                    do {                        /* minimum length is three */
> -                        PUP(out) = PUP(from);
> -                        PUP(out) = PUP(from);
> -                        PUP(out) = PUP(from);
> -                        len -= 3;
> -                    } while (len > 2);
> -                    if (len) {
> +                    /* minimum length is three */
> +                    /* Align out addr */
> +                    if (!((long)(out - 1 + OFF) & 1)) {
>                          PUP(out) = PUP(from);
> -                        if (len > 1)
> -                            PUP(out) = PUP(from);
> +                        len--;
> +                    }
> +                    sout = (unsigned short *)(out - OFF);
> +                    if (dist > 2) {
> +                        unsigned short *sfrom;
> +
> +                        sfrom = (unsigned short *)(from - OFF);
> +                        loops = len >> 1;
> +                        do {
> +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> +                            PUP(sout) = PUP(sfrom);
> +#else
> +                            PUP(sout) = UP_UNALIGNED(sfrom);
> +#endif
> +                        } while (--loops);
> +                        out = (unsigned char *)sout + OFF;
> +                        from = (unsigned char *)sfrom + OFF;
> +                    } else { /* dist == 1 or dist == 2 */
> +                        unsigned short pat16;
> +
> +                        pat16 = *(sout-1+OFF);
> +                        if (dist == 1) {
> +                            union uu mm;
> +                            /* copy one char pattern to both bytes */
> +                            mm.us = pat16;
> +                            mm.b[0] = mm.b[1];
> +                            pat16 = mm.us;
> +                        }
> +                        loops = len >> 1;
> +                        do
> +                            PUP(sout) = pat16;
> +                        while (--loops);
> +                        out = (unsigned char *)sout + OFF;
>                      }
> +                    if (len & 1)
> +                        PUP(out) = PUP(from);
>                  }
>              }
>              else if ((op & 64) == 0) {          /* 2nd level distance code */
> --
> 1.6.4.4
>





More information about the Zlib-devel mailing list