[Zlib-devel] [PATCH] zlib: optimize inffast when copying direct from output
Joakim Tjernlund
joakim.tjernlund at transmode.se
Thu Apr 22 14:35:27 EDT 2010
Ping?
Joakim Tjernlund <Joakim.Tjernlund at transmode.se> wrote on 2010/04/20 19:53:02:
>
> This is a straigth port from the work I did in Linux on inflate to
> optimize it.
>
> This patch optimizes the direct copy procedure. Uses
> an endiain independed get_unaligned() but only in one place.
> The copy loop just above this one can also use this optimization, but I
> havn't done so as I have not tested if it is a win there too.
>
> On my MPC8321 this is about 17% faster on my JFFS2 root FS than the
> original.
>
> Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund at transmode.se>
> ---
> inffast.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++--------
> 1 files changed, 60 insertions(+), 9 deletions(-)
>
> diff --git a/inffast.c b/inffast.c
> index 2f1d60b..0a18c58 100644
> --- a/inffast.c
> +++ b/inffast.c
> @@ -21,12 +21,31 @@
> - Pentium III (Anderson)
> - M68060 (Nikl)
> */
> +union uu {
> + unsigned short us;
> + unsigned char b[2];
> +};
> +
> +/* Endian independed version */
> +static inline unsigned short
> +get_unaligned16(const unsigned short *p)
> +{
> + union uu mm;
> + unsigned char *b = (unsigned char *)p;
> +
> + mm.b[0] = b[0];
> + mm.b[1] = b[1];
> + return mm.us;
> +}
> +
> #ifdef POSTINC
> # define OFF 0
> # define PUP(a) *(a)++
> +# define UP_UNALIGNED(a) get_unaligned16((a)++)
> #else
> # define OFF 1
> # define PUP(a) *++(a)
> +# define UP_UNALIGNED(a) get_unaligned16(++(a))
> #endif
>
> /*
> @@ -266,18 +285,50 @@ unsigned start; /* inflate()'s starting value
> for strm->avail_out */
> }
> }
> else {
> + unsigned short *sout;
> + unsigned long loops;
> +
> from = out - dist; /* copy direct from output */
> - do { /* minimum length is three */
> - PUP(out) = PUP(from);
> - PUP(out) = PUP(from);
> - PUP(out) = PUP(from);
> - len -= 3;
> - } while (len > 2);
> - if (len) {
> + /* minimum length is three */
> + /* Align out addr */
> + if (!((long)(out - 1 + OFF) & 1)) {
> PUP(out) = PUP(from);
> - if (len > 1)
> - PUP(out) = PUP(from);
> + len--;
> + }
> + sout = (unsigned short *)(out - OFF);
> + if (dist > 2) {
> + unsigned short *sfrom;
> +
> + sfrom = (unsigned short *)(from - OFF);
> + loops = len >> 1;
> + do {
> +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> + PUP(sout) = PUP(sfrom);
> +#else
> + PUP(sout) = UP_UNALIGNED(sfrom);
> +#endif
> + } while (--loops);
> + out = (unsigned char *)sout + OFF;
> + from = (unsigned char *)sfrom + OFF;
> + } else { /* dist == 1 or dist == 2 */
> + unsigned short pat16;
> +
> + pat16 = *(sout-1+OFF);
> + if (dist == 1) {
> + union uu mm;
> + /* copy one char pattern to both bytes */
> + mm.us = pat16;
> + mm.b[0] = mm.b[1];
> + pat16 = mm.us;
> + }
> + loops = len >> 1;
> + do
> + PUP(sout) = pat16;
> + while (--loops);
> + out = (unsigned char *)sout + OFF;
> }
> + if (len & 1)
> + PUP(out) = PUP(from);
> }
> }
> else if ((op & 64) == 0) { /* 2nd level distance code */
> --
> 1.6.4.4
>
More information about the Zlib-devel
mailing list