[Zlib-devel] Some inflate_fast() statistics

Steve Snyder swsnyder at insightbb.com
Wed Nov 1 15:20:50 CST 2006


[This is a continuation of my earlier post "Proposed code change"
in which I described multi-byte copying of data if the data length
was at least 16 bytes.]

I've been thinking about that 16-byte threshhold value.  There's 
nothing magical about that number.  I started at 32 bytes but was 
unhappy with the 4% multi-byte-copy hit rate I got so I dropped it to 
16 bytes, saw a 14% hit rate and called it good.  

Probably the optimum value (the threshhold below which the multi-byte 
copy is so small that it doesn't justify the additional handling) 
varies with the platform being run on and with the data being fed into
inflate_fast().  

I can't do much about platform characteristics, but knowing more about 
the data being handled might be informative.  

Below are some stats on the size and frequency of data copied within 
inflate_fast().  The stats are in the form "x == y" where x is the 
length of data being copied and y is the number of times that size was 
copied.

Sorry if these stats are too web-browser-specific, but I think it's 
possible other nerds^H^H^H^H^Hpeople will find these numbers
interesting.

(A mystery: according to the comments in the original code, 3 bytes
is the minimum data size to copy.  Yet there appears to be copies
of 1- and 2-byte lengths.  Hmm.....)

---------------------------------------------------------------------

Bring up Firefox v2.0 to http://www.yahoo.com.

Total count:   39838
--------------------
   1 ==      9,     2 ==      9,     3 ==   6001,     4 ==   6181,
   5 ==   3730,     6 ==   2766,     7 ==   2527,     8 ==   2400,
   9 ==   1698,    10 ==   1502,    11 ==   1385,    12 ==   1328,
  13 ==    954,    14 ==    785,    15 ==    749,    16 ==    739,
  17 ==    609,    18 ==    466,    19 ==    466,    20 ==    421,
  21 ==    350,    22 ==    293,    23 ==    259,    24 ==    288,
  25 ==    224,    26 ==    171,    27 ==    143,    28 ==    173,
  29 ==    147,    30 ==    129,    31 ==    138,    32 ==    139,
  33 ==    103,    34 ==     84,    35 ==     65,    36 ==    116,
  37 ==     72,    38 ==     57,    39 ==     69,    40 ==     65,
  41 ==     55,    42 ==     42,    43 ==     33,    44 ==     56,
  45 ==     25,    46 ==     21,    47 ==     38,    48 ==     51,
  49 ==     23,    50 ==     19,    51 ==     16,    52 ==     28,
  53 ==     13,    54 ==     20,    55 ==     11,    56 ==     20,
  57 ==     17,    58 ==     10,    59 ==     12,    60 ==     23,
  61 ==     18,    62 ==     14,    63 ==     14,    64 ==     19,
  65 ==     13,    66 ==      9,    67 ==     14,    68 ==     25,
  69 ==      9,    70 ==     10,    71 ==      6,    72 ==     20,
  73 ==      8,    74 ==      4,    75 ==      5,    76 ==      8,
  77 ==      5,    78 ==      2,    79 ==      8,    80 ==      5,
  81 ==      3,    82 ==      4,    83 ==      4,    84 ==     10,
  85 ==      3,    86 ==      4,    87 ==     14,    88 ==      4,
  89 ==      2,    90 ==      6,    91 ==      2,    92 ==      8,
  93 ==     12,    94 ==      4,    95 ==      6,    96 ==      6,
  97 ==      1,    98 ==      1,   100 ==      2,   101 ==      2,
 102 ==      3,   103 ==      2,   104 ==      2,   105 ==      2,
 106 ==      2,   107 ==      2,   108 ==      1,   109 ==      2,
 110 ==      4,   111 ==      4,   112 ==      5,   115 ==      3,
 116 ==      2,   117 ==      2,   118 ==      1,   121 ==      1,
 122 ==      1,   124 ==      1,   127 ==      4,   128 ==      7,
 129 ==      6,   130 ==      2,   132 ==      4,   134 ==      3,
 135 ==      5,   136 ==      4,   137 ==      1,   139 ==      2,
 140 ==      4,   141 ==      2,   142 ==      1,   143 ==      4,
 144 ==      1,   145 ==      2,   146 ==      3,   148 ==      1,
 149 ==      3,   153 ==      1,   157 ==      1,   168 ==      1,
 169 ==      1,   174 ==      1,   175 ==      1,   176 ==      1,
 177 ==      1,   180 ==      1,   186 ==      1,   190 ==      1,
 192 ==      1,   193 ==      1,   199 ==      3,   205 ==      1,
 206 ==      1,   209 ==      1,   212 ==      1,   215 ==      1,
 218 ==      1,   221 ==      1,   223 ==      1,   225 ==      1,
 226 ==      3,   230 ==      1,   232 ==      1,   234 ==      2,
 235 ==      3,   237 ==      1,   238 ==      2,   241 ==      2,
 242 ==      1,   243 ==      1,   248 ==      1,   250 ==      1,
 252 ==      1,   253 ==      1,   254 ==      2,   257 ==      1,
 258 ==   1033


Bring up Firefox v2.0 to blank page, browse Alexa Top 10 (10 most
popular US websites).  Unknown how many of them are compressed.

See: http://www.alexa.com/site/ds/top_sites?cc=US&ts_mode=country&lang=none

Total count:   89817
--------------------
   1 ==     12,     2 ==     14,     3 ==  17164,     4 ==  13851,
   5 ==   8592,     6 ==   6613,     7 ==   5512,     8 ==   4875,
   9 ==   3656,    10 ==   3137,    11 ==   2744,    12 ==   2456,
  13 ==   1862,    14 ==   1601,    15 ==   1460,    16 ==   1299,
  17 ==   1121,    18 ==    905,    19 ==    890,    20 ==    813,
  21 ==    730,    22 ==    640,    23 ==    609,    24 ==    536,
  25 ==    428,    26 ==    393,    27 ==    331,    28 ==    349,
  29 ==    325,    30 ==    301,    31 ==    277,    32 ==    265,
  33 ==    186,    34 ==    192,    35 ==    191,    36 ==    214,
  37 ==    169,    38 ==    134,    39 ==    186,    40 ==    120,
  41 ==    130,    42 ==    112,    43 ==    104,    44 ==    120,
  45 ==     83,    46 ==     66,    47 ==     91,    48 ==    116,
  49 ==    112,    50 ==     72,    51 ==     69,    52 ==    106,
  53 ==    137,    54 ==     68,    55 ==     67,    56 ==     80,
  57 ==     70,    58 ==     70,    59 ==     65,    60 ==     70,
  61 ==     69,    62 ==     50,    63 ==     48,    64 ==     47,
  65 ==     50,    66 ==     37,    67 ==     40,    68 ==     47,
  69 ==     46,    70 ==     45,    71 ==     23,    72 ==     40,
  73 ==     29,    74 ==     14,    75 ==     16,    76 ==     24,
  77 ==     22,    78 ==     13,    79 ==     21,    80 ==     27,
  81 ==     16,    82 ==     18,    83 ==     21,    84 ==     17,
  85 ==      8,    86 ==     21,    87 ==     28,    88 ==     10,
  89 ==      7,    90 ==     17,    91 ==      8,    92 ==     16,
  93 ==     17,    94 ==     13,    95 ==     17,    96 ==     16,
  97 ==      8,    98 ==     30,    99 ==     21,   100 ==     14,
 101 ==      8,   102 ==      9,   103 ==     12,   104 ==     10,
 105 ==      9,   106 ==     11,   107 ==     10,   108 ==     13,
 109 ==     13,   110 ==      8,   111 ==     10,   112 ==      5,
 113 ==      5,   114 ==      6,   115 ==     12,   116 ==      9,
 117 ==     16,   118 ==     27,   119 ==     20,   120 ==     11,
 121 ==     21,   122 ==      7,   123 ==      6,   124 ==      3,
 125 ==      5,   126 ==      4,   127 ==      6,   128 ==      7,
 129 ==     11,   130 ==      5,   132 ==     10,   133 ==     19,
 134 ==      8,   135 ==     17,   136 ==      9,   137 ==      7,
 138 ==      5,   139 ==      6,   140 ==      6,   141 ==      2,
 142 ==      4,   143 ==      8,   144 ==      5,   145 ==      8,
 146 ==      7,   147 ==      8,   148 ==      2,   149 ==      3,
 150 ==      1,   151 ==      4,   152 ==      4,   153 ==      3,
 154 ==      3,   155 ==      5,   156 ==      4,   157 ==      3,
 159 ==      3,   160 ==      7,   161 ==      1,   162 ==      1,
 164 ==      3,   165 ==     21,   166 ==      8,   167 ==      7,
 168 ==      7,   169 ==      4,   170 ==      2,   171 ==      1,
 172 ==      3,   173 ==      2,   174 ==      4,   175 ==      3,
 176 ==      5,   177 ==      2,   179 ==      4,   180 ==      3,
 182 ==      1,   183 ==      7,   184 ==      2,   185 ==      1,
 186 ==      2,   187 ==      3,   189 ==      9,   190 ==      9,
 191 ==      1,   192 ==      7,   193 ==      7,   194 ==      3,
 195 ==      2,   196 ==      3,   197 ==      2,   198 ==      1,
 199 ==      7,   201 ==      2,   202 ==     10,   203 ==     14,
 204 ==      1,   205 ==      3,   206 ==      3,   207 ==      1,
 208 ==      2,   209 ==      2,   210 ==      2,   211 ==      2,
 212 ==      1,   215 ==      1,   218 ==      3,   221 ==      2,
 223 ==      1,   224 ==      3,   225 ==      1,   226 ==      4,
 227 ==      3,   228 ==      2,   230 ==      1,   232 ==      2,
 234 ==      2,   235 ==      4,   237 ==      2,   238 ==      3,
 241 ==      2,   242 ==      1,   243 ==      2,   244 ==      2,
 245 ==      1,   248 ==      1,   249 ==      1,   250 ==      3,
 252 ==      1,   253 ==      1,   254 ==      3,   256 ==      2,
 257 ==      3,   258 ==   1107

Bring up Firefox v2.0 to blank page, display these graphics:

  http://ftp.arl.mil/ftp/historic-computers/png/cyber173.png
  ftp://ftp.simplesystems.org/pub/libpng/png/images/png/Alpha/OwlAlpha.png
  http://www.w3.org/Graphics/PNG/nurbcup2si.png

Total count: 3476884
--------------------
   1 ==    282,     2 ==    281,     3 == 318375,     4 == 877283,
   5 == 661055,     6 == 427466,     7 == 274888,     8 == 225198,
   9 == 152294,    10 ==  92185,    11 ==  78701,    12 ==  84492,
  13 ==  44178,    14 ==  40615,    15 ==  35215,    16 ==  29417,
  17 ==  22377,    18 ==  19533,    19 ==   8106,    20 ==  16750,
  21 ==  13289,    22 ==   4340,    23 ==   6915,    24 ==  10676,
  25 ==   2518,    26 ==   4377,    27 ==   5752,    28 ==   1950,
  29 ==   3429,    30 ==   4455,    31 ==    529,    32 ==   2615,
  33 ==   3315,    34 ==    286,    35 ==   1694,    36 ==   2738,
  37 ==    188,    38 ==   1252,    39 ==   1907,    40 ==    137,
  41 ==    992,    42 ==   1451,    43 ==     65,    44 ==    754,
  45 ==   1064,    46 ==     57,    47 ==    541,    48 ==    790,
  49 ==     35,    50 ==    348,    51 ==    652,    52 ==     34,
  53 ==    314,    54 ==    435,    55 ==     18,    56 ==    240,
  57 ==    353,    58 ==     17,    59 ==    150,    60 ==    316,
  61 ==     21,    62 ==    151,    63 ==    238,    64 ==     22,
  65 ==    112,    66 ==    192,    67 ==     13,    68 ==    112,
  69 ==    162,    70 ==     13,    71 ==     69,    72 ==    153,
  73 ==      8,    74 ==     59,    75 ==    123,    76 ==     10,
  77 ==     57,    78 ==     98,    79 ==     10,    80 ==     39,
  81 ==    106,    82 ==      5,    83 ==     42,    84 ==     91,
  85 ==      4,    86 ==     41,    87 ==    108,    88 ==      8,
  89 ==     44,    90 ==     71,    91 ==      5,    92 ==     32,
  93 ==     73,    94 ==      8,    95 ==     32,    96 ==     61,
  97 ==      3,    98 ==     23,    99 ==     59,   100 ==      8,
 101 ==     20,   102 ==     46,   103 ==      3,   104 ==     23,
 105 ==     56,   106 ==      6,   107 ==     41,   108 ==     62,
 109 ==     11,   110 ==     39,   111 ==     67,   112 ==     17,
 113 ==     26,   114 ==     63,   115 ==     11,   116 ==     24,
 117 ==     48,   118 ==      4,   119 ==     27,   120 ==     44,
 121 ==      6,   122 ==     20,   123 ==     29,   124 ==      9,
 125 ==     22,   126 ==     37,   127 ==     11,   128 ==     17,
 129 ==     37,   130 ==      3,   131 ==     12,   132 ==     37,
 133 ==      4,   134 ==     11,   135 ==     39,   136 ==      5,
 137 ==     10,   138 ==     33,   139 ==      3,   140 ==     18,
 141 ==     30,   142 ==      4,   143 ==     10,   144 ==     30,
 145 ==      3,   146 ==      8,   147 ==     27,   148 ==      1,
 149 ==     11,   150 ==     36,   151 ==      1,   152 ==      9,
 153 ==     29,   154 ==      1,   155 ==     13,   156 ==     28,
 157 ==      2,   158 ==      1,   159 ==     34,   160 ==      1,
 161 ==      5,   162 ==     31,   164 ==      7,   165 ==     23,
 166 ==      1,   167 ==      6,   168 ==     27,   169 ==      2,
 170 ==      7,   171 ==     27,   173 ==     10,   174 ==     21,
 175 ==      2,   176 ==      7,   177 ==     16,   179 ==      8,
 180 ==     22,   182 ==      3,   183 ==     19,   184 ==      2,
 185 ==      9,   186 ==     21,   188 ==      6,   189 ==     28,
 191 ==      5,   192 ==     21,   193 ==      1,   194 ==      5,
 195 ==     19,   197 ==      6,   198 ==     10,   199 ==      4,
 200 ==      3,   201 ==     18,   203 ==      7,   204 ==     21,
 205 ==      1,   206 ==      6,   207 ==     19,   209 ==      4,
 210 ==      9,   211 ==      1,   212 ==      6,   213 ==     16,
 214 ==      2,   215 ==      7,   216 ==     10,   218 ==      9,
 219 ==     14,   221 ==      5,   222 ==     12,   224 ==      1,
 225 ==     14,   226 ==      2,   227 ==      1,   228 ==     11,
 229 ==      1,   230 ==      6,   231 ==     16,   232 ==      1,
 233 ==      3,   234 ==     17,   235 ==      4,   236 ==      5,
 237 ==     15,   238 ==      3,   239 ==      3,   240 ==     11,
 241 ==      1,   242 ==      3,   243 ==     15,   245 ==      4,
 246 ==     13,   247 ==      1,   248 ==      5,   249 ==     12,
 250 ==      2,   251 ==      4,   252 ==     13,   253 ==      1,
 254 ==     10,   255 ==      8,   256 ==      1,   257 ==      4,
 258 ==   5793

---------------------------------------------------------------------

--- mozilla.ff20.orig/modules/zlib/src/inffast.c	2005-08-04 14:14:14.000000000 -0500
+++ mozilla/modules/zlib/src/inffast.c	2006-10-31 07:10:23.000000000 -0500
@@ -21,6 +21,12 @@
    - Pentium III (Anderson)
    - M68060 (Nikl)
  */
+
+#if defined(_X86_)
+/* for PUPCPY() */
+#define POSTINC
+#endif /* _X86_ */
+
 #ifdef POSTINC
 #  define OFF 0
 #  define PUP(a) *(a)++
@@ -29,6 +35,83 @@
 #  define PUP(a) *++(a)
 #endif
 
+#include <stdio.h>
+#define ZLIB_OUTPUT_WIDTH_CNT       4
+#define ZLIB_LEN_ARRAY_SIZE     MAX_MATCH+1
+#define ZLIB_LEN_ARRAY_TRIGGER  25000
+int len_cnt_i, len_cnt_j;
+int zlib_len_cnt_total = 0, shownow = 0;
+unsigned int zlib_len_cnt_array[ZLIB_LEN_ARRAY_SIZE];
+
+#if defined(_X86_)
+#define PUPCPY(dst, src, len)    \
+   if (zlib_len_cnt_total == 0)                                               \
+      { memset(&zlib_len_cnt_array[0], 0, ZLIB_LEN_ARRAY_SIZE*4); }           \
+   else if (shownow || ((zlib_len_cnt_total % ZLIB_LEN_ARRAY_TRIGGER) == 0)) {\
+      len_cnt_j = 0;                                                          \
+      printf("Total count: % 7u\n---------------------\n",                    \
+         zlib_len_cnt_total);                                                 \
+      for (len_cnt_i=0; len_cnt_i<ZLIB_LEN_ARRAY_SIZE; len_cnt_i++) {         \
+         if ( zlib_len_cnt_array[len_cnt_i] ) {                               \
+            printf("% 4u == % 6u,  ",                                         \
+               len_cnt_i, zlib_len_cnt_array[len_cnt_i]);                     \
+            if ( (++len_cnt_j % ZLIB_OUTPUT_WIDTH_CNT) == 0 ) printf("\n");   \
+         }                                                                    \
+      }                                                                       \
+      printf("\n"); shownow = 0;                                              \
+   }                                                                          \
+   zlib_len_cnt_array[len]++;                                                 \
+   zlib_len_cnt_total++;                                                      \
+   /* 32-bit copy justified by length and addresses? */ \
+   if ( (len > 16) && ((dst<=src) || (dst>(src+len))) ) \
+   {                             \
+      /* align to 32-bit dest */ \
+      while ((unsigned)dst & 3) {\
+         *dst++ = *src++; len--; \
+      }                          \
+      /* copy bulk of data */    \
+      while (len > 7) {          \
+         *((unsigned *)dst) =    \
+            *((unsigned *)src);  \
+         *((unsigned *)dst+1) =  \
+            *((unsigned *)src+1);\
+         dst+=8; src+=8; len-=8; \
+      }                          \
+      /* copy most odd bytes */  \
+      while (len > 1) {          \
+         *((short *)dst) =       \
+            *((short *)src);     \
+         dst+=2; src+=2; len-=2; \
+      }                          \
+   }                             \
+   else                          \
+   {                             \
+      /* copy bulk of data */    \
+      while (len > 3) {          \
+         dst[0] = src[0];        \
+         dst[1] = src[1];        \
+         dst[2] = src[2];        \
+         dst[3] = src[3];        \
+         dst+=4; src+=4; len-=4; \
+      }                          \
+   }                             \
+   /* singly copy odd byte(s) */ \
+   while (len--) *dst++ = *src++;
+#else /* _X86_ */
+#define PUPCPY(dst, src, len)    \
+   while (len > 2) {             \
+      PUP(dst) = PUP(src);       \
+      PUP(dst) = PUP(src);       \
+      PUP(dst) = PUP(src);       \
+      len -= 3;                  \
+   }                             \
+   if (len) {                    \
+      PUP(dst) = PUP(src);       \
+      if (len > 1)               \
+         PUP(dst) = PUP(src);    \
+   }
+#endif /* _X86_ */
+
 /*
    Decode literal, length, and distance codes and write out the resulting
    literal and match bytes until either not enough input or output is
@@ -231,31 +314,11 @@
                             from = out - dist;  /* rest from output */
                         }
                     }
-                    while (len > 2) {
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
-                        len -= 3;
-                    }
-                    if (len) {
-                        PUP(out) = PUP(from);
-                        if (len > 1)
-                            PUP(out) = PUP(from);
-                    }
+                    PUPCPY(out, from, len)
                 }
                 else {
                     from = out - dist;          /* copy direct from output */
-                    do {                        /* minimum length is three */
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
-                        len -= 3;
-                    } while (len > 2);
-                    if (len) {
-                        PUP(out) = PUP(from);
-                        if (len > 1)
-                            PUP(out) = PUP(from);
-                    }
+                    PUPCPY(out, from, len)      /* minimum length is three */
                 }
             }
             else if ((op & 64) == 0) {          /* 2nd level distance code */



More information about the Zlib-devel mailing list