Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <flate.h>
5 typedef struct Chain Chain;
6 typedef struct Chains Chains;
7 typedef struct Dyncode Dyncode;
8 typedef struct Huff Huff;
9 typedef struct LZblock LZblock;
10 typedef struct LZstate LZstate;
12 enum
13 {
14 /*
15 * deflate format paramaters
16 */
17 DeflateUnc = 0, /* uncompressed block */
18 DeflateFix = 1, /* fixed huffman codes */
19 DeflateDyn = 2, /* dynamic huffman codes */
21 DeflateEob = 256, /* end of block code in lit/len book */
22 DeflateMaxBlock = 64*1024-1, /* maximum size of uncompressed block */
24 DeflateMaxExp = 10, /* maximum expansion for a block */
26 LenStart = 257, /* start of length codes in litlen */
27 Nlitlen = 288, /* number of litlen codes */
28 Noff = 30, /* number of offset codes */
29 Nclen = 19, /* number of codelen codes */
31 MaxOff = 32*1024,
32 MinMatch = 3, /* shortest match possible */
33 MaxMatch = 258, /* longest match possible */
35 /*
36 * huffman code paramaters
37 */
38 MaxLeaf = Nlitlen,
39 MaxHuffBits = 16, /* max bits in a huffman code */
40 ChainMem = 2 * (MaxHuffBits - 1) * MaxHuffBits,
42 /*
43 * coding of the lz parse
44 */
45 LenFlag = 1 << 3,
46 LenShift = 4, /* leaves enough space for MinMatchMaxOff */
47 MaxLitRun = LenFlag - 1,
49 /*
50 * internal lz paramaters
51 */
52 DeflateOut = 4096, /* output buffer size */
53 BlockSize = 8192, /* attempted input read quanta */
54 DeflateBlock = DeflateMaxBlock & ~(BlockSize - 1),
55 MinMatchMaxOff = 4096, /* max profitable offset for small match;
56 * assumes 8 bits for len, 5+10 for offset
57 * DONT CHANGE WITHOUT CHANGING LZPARSE CONSTANTS
58 */
59 HistSlop = 512, /* must be at lead MaxMatch */
60 HistBlock = 64*1024,
61 HistSize = HistBlock + HistSlop,
63 HashLog = 13,
64 HashSize = 1<<HashLog,
66 MaxOffCode = 256, /* biggest offset looked up in direct table */
68 EstLitBits = 8,
69 EstLenBits = 4,
70 EstOffBits = 5
71 };
73 /*
74 * knuth vol. 3 multiplicative hashing
75 * each byte x chosen according to rules
76 * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
77 * with reasonable spread between the bytes & their complements
78 *
79 * the 3 byte value appears to be as almost good as the 4 byte value,
80 * and might be faster on some machines
81 */
82 /*
83 #define hashit(c) ((u32int)((c) * 0x6b43a9) >> (24 - HashLog))
84 */
85 #define hashit(c) ((u32int)(((c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog))
87 /*
88 * lempel-ziv style compression state
89 */
90 struct LZstate
91 {
92 uchar hist[HistSize];
93 ulong pos; /* current location in history buffer */
94 ulong avail; /* data available after pos */
95 int eof;
96 ushort hash[HashSize]; /* hash chains */
97 ushort nexts[MaxOff];
98 int now; /* pos in hash chains */
99 int dot; /* dawn of time in history */
100 int prevlen; /* lazy matching state */
101 int prevoff;
102 int maxcheck; /* compressor tuning */
104 uchar obuf[DeflateOut];
105 uchar *out; /* current position in the output buffer */
106 uchar *eout;
107 ulong bits; /* bit shift register */
108 int nbits;
109 int rbad; /* got an error reading the buffer */
110 int wbad; /* got an error writing the buffer */
111 int (*w)(void*, void*, int);
112 void *wr;
114 ulong totr; /* total input size */
115 ulong totw; /* total output size */
116 int debug;
117 };
119 struct LZblock
121 ushort parse[DeflateMaxBlock / 2 + 1];
122 int lastv; /* value being constucted for parse */
123 ulong litlencount[Nlitlen];
124 ulong offcount[Noff];
125 ushort *eparse; /* limit for parse table */
126 int bytes; /* consumed from the input */
127 int excost; /* cost of encoding extra len & off bits */
128 };
130 /*
131 * huffman code table
132 */
133 struct Huff
135 short bits; /* length of the code */
136 ushort encode; /* the code */
137 };
139 /*
140 * encoding of dynamic huffman trees
141 */
142 struct Dyncode
144 int nlit;
145 int noff;
146 int nclen;
147 int ncode;
148 Huff codetab[Nclen];
149 uchar codes[Nlitlen+Noff];
150 uchar codeaux[Nlitlen+Noff];
151 };
153 static int deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int));
154 static int lzcomp(LZstate*, LZblock*, uchar*, ushort*, int finish);
155 static void wrblock(LZstate*, int, ushort*, ushort*, Huff*, Huff*);
156 static int bitcost(Huff*, ulong*, int);
157 static int huffcodes(Dyncode*, Huff*, Huff*);
158 static void wrdyncode(LZstate*, Dyncode*);
159 static void lzput(LZstate*, ulong bits, int nbits);
160 static void lzflushbits(LZstate*);
161 static void lzflush(LZstate *lz);
162 static void lzwrite(LZstate *lz, void *buf, int n);
164 static int hufftabinit(Huff*, int, ulong*, int);
165 static int mkgzprecode(Huff*, ulong *, int, int);
167 static int mkprecode(Huff*, ulong *, int, int, ulong*);
168 static void nextchain(Chains*, int);
169 static void leafsort(ulong*, ushort*, int, int);
171 /* conversion from len to code word */
172 static int lencode[MaxMatch];
174 /*
175 * conversion from off to code word
176 * off <= MaxOffCode ? offcode[off] : bigoffcode[off >> 7]
177 */
178 static int offcode[MaxOffCode];
179 static int bigoffcode[256];
181 /* litlen code words LenStart-285 extra bits */
182 static int litlenbase[Nlitlen-LenStart];
183 static int litlenextra[Nlitlen-LenStart] =
185 /* 257 */ 0, 0, 0,
186 /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
187 /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
188 /* 280 */ 4, 5, 5, 5, 5, 0, 0, 0
189 };
191 /* offset code word extra bits */
192 static int offbase[Noff];
193 static int offextra[] =
195 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
196 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
197 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
198 0, 0,
199 };
201 /* order code lengths */
202 static int clenorder[Nclen] =
204 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
205 };
207 /* static huffman tables */
208 static Huff litlentab[Nlitlen];
209 static Huff offtab[Noff];
210 static Huff hofftab[Noff];
212 /* bit reversal for brain dead endian swap in huffman codes */
213 static uchar revtab[256];
214 static ulong nlits;
215 static ulong nmatches;
217 int
218 deflateinit(void)
220 ulong bitcount[MaxHuffBits];
221 int i, j, ci, n;
223 /* byte reverse table */
224 for(i=0; i<256; i++)
225 for(j=0; j<8; j++)
226 if(i & (1<<j))
227 revtab[i] |= 0x80 >> j;
229 /* static Litlen bit lengths */
230 for(i=0; i<144; i++)
231 litlentab[i].bits = 8;
232 for(i=144; i<256; i++)
233 litlentab[i].bits = 9;
234 for(i=256; i<280; i++)
235 litlentab[i].bits = 7;
236 for(i=280; i<Nlitlen; i++)
237 litlentab[i].bits = 8;
239 memset(bitcount, 0, sizeof(bitcount));
240 bitcount[8] += 144 - 0;
241 bitcount[9] += 256 - 144;
242 bitcount[7] += 280 - 256;
243 bitcount[8] += Nlitlen - 280;
245 if(!hufftabinit(litlentab, Nlitlen, bitcount, 9))
246 return FlateInternal;
248 /* static offset bit lengths */
249 for(i = 0; i < Noff; i++)
250 offtab[i].bits = 5;
252 memset(bitcount, 0, sizeof(bitcount));
253 bitcount[5] = Noff;
255 if(!hufftabinit(offtab, Noff, bitcount, 5))
256 return FlateInternal;
258 bitcount[0] = 0;
259 bitcount[1] = 0;
260 if(!mkgzprecode(hofftab, bitcount, 2, MaxHuffBits))
261 return FlateInternal;
263 /* conversion tables for lens & offs to codes */
264 ci = 0;
265 for(i = LenStart; i < 286; i++){
266 n = ci + (1 << litlenextra[i - LenStart]);
267 litlenbase[i - LenStart] = ci;
268 for(; ci < n; ci++)
269 lencode[ci] = i;
271 /* patch up special case for len MaxMatch */
272 lencode[MaxMatch-MinMatch] = 285;
273 litlenbase[285-LenStart] = MaxMatch-MinMatch;
275 ci = 0;
276 for(i = 0; i < 16; i++){
277 n = ci + (1 << offextra[i]);
278 offbase[i] = ci;
279 for(; ci < n; ci++)
280 offcode[ci] = i;
283 ci = ci >> 7;
284 for(; i < 30; i++){
285 n = ci + (1 << (offextra[i] - 7));
286 offbase[i] = ci << 7;
287 for(; ci < n; ci++)
288 bigoffcode[ci] = i;
290 return FlateOk;
293 static void
294 deflatereset(LZstate *lz, int level, int debug)
296 memset(lz->nexts, 0, sizeof lz->nexts);
297 memset(lz->hash, 0, sizeof lz->hash);
298 lz->totr = 0;
299 lz->totw = 0;
300 lz->pos = 0;
301 lz->avail = 0;
302 lz->out = lz->obuf;
303 lz->eout = &lz->obuf[DeflateOut];
304 lz->prevlen = MinMatch - 1;
305 lz->prevoff = 0;
306 lz->now = MaxOff + 1;
307 lz->dot = lz->now;
308 lz->bits = 0;
309 lz->nbits = 0;
310 lz->maxcheck = (1 << level);
311 lz->maxcheck -= lz->maxcheck >> 2;
312 if(lz->maxcheck < 2)
313 lz->maxcheck = 2;
314 else if(lz->maxcheck > 1024)
315 lz->maxcheck = 1024;
317 lz->debug = debug;
320 int
321 deflate(void *wr, int (*w)(void*, void*, int), void *rr, int (*r)(void*, void*, int), int level, int debug)
323 LZstate *lz;
324 LZblock *lzb;
325 int ok;
327 lz = malloc(sizeof *lz + sizeof *lzb);
328 if(lz == nil)
329 return FlateNoMem;
330 lzb = (LZblock*)&lz[1];
332 deflatereset(lz, level, debug);
333 lz->w = w;
334 lz->wr = wr;
335 lz->wbad = 0;
336 lz->rbad = 0;
337 lz->eof = 0;
338 ok = FlateOk;
339 while(!lz->eof || lz->avail){
340 ok = deflateb(lz, lzb, rr, r);
341 if(ok != FlateOk)
342 break;
344 if(ok == FlateOk && lz->rbad)
345 ok = FlateInputFail;
346 if(ok == FlateOk && lz->wbad)
347 ok = FlateOutputFail;
348 free(lz);
349 return ok;
352 static int
353 deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int))
355 Dyncode dyncode, hdyncode;
356 Huff dlitlentab[Nlitlen], dofftab[Noff], hlitlentab[Nlitlen];
357 ulong litcount[Nlitlen];
358 long nunc, ndyn, nfix, nhuff;
359 uchar *slop, *hslop;
360 ulong ep;
361 int i, n, m, mm, nslop;
363 memset(lzb->litlencount, 0, sizeof lzb->litlencount);
364 memset(lzb->offcount, 0, sizeof lzb->offcount);
365 lzb->litlencount[DeflateEob]++;
367 lzb->bytes = 0;
368 lzb->eparse = lzb->parse;
369 lzb->lastv = 0;
370 lzb->excost = 0;
372 slop = &lz->hist[lz->pos];
373 n = lz->avail;
374 while(n < DeflateBlock && (!lz->eof || lz->avail)){
375 /*
376 * fill the buffer as much as possible,
377 * while leaving room for MaxOff history behind lz->pos,
378 * and not reading more than we can handle.
380 * make sure we read at least HistSlop bytes.
381 */
382 if(!lz->eof){
383 ep = lz->pos + lz->avail;
384 if(ep >= HistBlock)
385 ep -= HistBlock;
386 m = HistBlock - MaxOff - lz->avail;
387 if(m > HistBlock - n)
388 m = HistBlock - n;
389 if(m > (HistBlock + HistSlop) - ep)
390 m = (HistBlock + HistSlop) - ep;
391 if(m & ~(BlockSize - 1))
392 m &= ~(BlockSize - 1);
394 /*
395 * be nice to the caller: stop reads that are too small.
396 * can only get here when we've already filled the buffer some
397 */
398 if(m < HistSlop){
399 if(!m || !lzb->bytes)
400 return FlateInternal;
401 break;
404 mm = (*r)(rr, &lz->hist[ep], m);
405 if(mm > 0){
406 /*
407 * wrap data to end if we're read it from the beginning
408 * this way, we don't have to wrap searches.
410 * wrap reads past the end to the beginning.
411 * this way, we can guarantee minimum size reads.
412 */
413 if(ep < HistSlop)
414 memmove(&lz->hist[ep + HistBlock], &lz->hist[ep], HistSlop - ep);
415 else if(ep + mm > HistBlock)
416 memmove(&lz->hist[0], &lz->hist[HistBlock], ep + mm - HistBlock);
418 lz->totr += mm;
419 n += mm;
420 lz->avail += mm;
421 }else{
422 if(mm < 0)
423 lz->rbad = 1;
424 lz->eof = 1;
427 ep = lz->pos + lz->avail;
428 if(ep > HistSize)
429 ep = HistSize;
430 if(lzb->bytes + ep - lz->pos > DeflateMaxBlock)
431 ep = lz->pos + DeflateMaxBlock - lzb->bytes;
432 m = lzcomp(lz, lzb, &lz->hist[ep], lzb->eparse, lz->eof);
433 lzb->bytes += m;
434 lz->pos = (lz->pos + m) & (HistBlock - 1);
435 lz->avail -= m;
437 if(lzb->lastv)
438 *lzb->eparse++ = lzb->lastv;
439 if(lzb->eparse > lzb->parse + nelem(lzb->parse))
440 return FlateInternal;
441 nunc = lzb->bytes;
443 if(!mkgzprecode(dlitlentab, lzb->litlencount, Nlitlen, MaxHuffBits)
444 || !mkgzprecode(dofftab, lzb->offcount, Noff, MaxHuffBits))
445 return FlateInternal;
447 ndyn = huffcodes(&dyncode, dlitlentab, dofftab);
448 if(ndyn < 0)
449 return FlateInternal;
450 ndyn += bitcost(dlitlentab, lzb->litlencount, Nlitlen)
451 + bitcost(dofftab, lzb->offcount, Noff)
452 + lzb->excost;
454 memset(litcount, 0, sizeof litcount);
456 nslop = nunc;
457 if(nslop > &lz->hist[HistSize] - slop)
458 nslop = &lz->hist[HistSize] - slop;
460 for(i = 0; i < nslop; i++)
461 litcount[slop[i]]++;
462 hslop = &lz->hist[HistSlop - nslop];
463 for(; i < nunc; i++)
464 litcount[hslop[i]]++;
465 litcount[DeflateEob]++;
467 if(!mkgzprecode(hlitlentab, litcount, Nlitlen, MaxHuffBits))
468 return FlateInternal;
469 nhuff = huffcodes(&hdyncode, hlitlentab, hofftab);
470 if(nhuff < 0)
471 return FlateInternal;
472 nhuff += bitcost(hlitlentab, litcount, Nlitlen);
474 nfix = bitcost(litlentab, lzb->litlencount, Nlitlen)
475 + bitcost(offtab, lzb->offcount, Noff)
476 + lzb->excost;
478 lzput(lz, lz->eof && !lz->avail, 1);
480 if(lz->debug){
481 fprint(2, "block: bytes=%lud entries=%ld extra bits=%d\n\tuncompressed=%lud fixed=%lud dynamic=%lud huffman=%lud\n",
482 nunc, lzb->eparse - lzb->parse, lzb->excost, (nunc + 4) * 8, nfix, ndyn, nhuff);
483 fprint(2, "\tnlit=%lud matches=%lud eof=%d\n", nlits, nmatches, lz->eof && !lz->avail);
486 if((nunc + 4) * 8 < ndyn && (nunc + 4) * 8 < nfix && (nunc + 4) * 8 < nhuff){
487 lzput(lz, DeflateUnc, 2);
488 lzflushbits(lz);
490 lzput(lz, nunc & 0xff, 8);
491 lzput(lz, (nunc >> 8) & 0xff, 8);
492 lzput(lz, ~nunc & 0xff, 8);
493 lzput(lz, (~nunc >> 8) & 0xff, 8);
494 lzflush(lz);
496 lzwrite(lz, slop, nslop);
497 lzwrite(lz, &lz->hist[HistSlop], nunc - nslop);
498 }else if(ndyn < nfix && ndyn < nhuff){
499 lzput(lz, DeflateDyn, 2);
501 wrdyncode(lz, &dyncode);
502 wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, dlitlentab, dofftab);
503 lzput(lz, dlitlentab[DeflateEob].encode, dlitlentab[DeflateEob].bits);
504 }else if(nhuff < nfix){
505 lzput(lz, DeflateDyn, 2);
507 wrdyncode(lz, &hdyncode);
509 m = 0;
510 for(i = nunc; i > MaxLitRun; i -= MaxLitRun)
511 lzb->parse[m++] = MaxLitRun;
512 lzb->parse[m++] = i;
514 wrblock(lz, slop - lz->hist, lzb->parse, lzb->parse + m, hlitlentab, hofftab);
515 lzput(lz, hlitlentab[DeflateEob].encode, hlitlentab[DeflateEob].bits);
516 }else{
517 lzput(lz, DeflateFix, 2);
519 wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, litlentab, offtab);
520 lzput(lz, litlentab[DeflateEob].encode, litlentab[DeflateEob].bits);
523 if(lz->eof && !lz->avail){
524 lzflushbits(lz);
525 lzflush(lz);
527 return FlateOk;
530 static void
531 lzwrite(LZstate *lz, void *buf, int n)
533 int nw;
535 if(n && lz->w){
536 nw = (*lz->w)(lz->wr, buf, n);
537 if(nw != n){
538 lz->w = 0;
539 lz->wbad = 1;
540 }else
541 lz->totw += n;
545 static void
546 lzflush(LZstate *lz)
548 lzwrite(lz, lz->obuf, lz->out - lz->obuf);
549 lz->out = lz->obuf;
552 static void
553 lzput(LZstate *lz, ulong bits, int nbits)
555 bits = (bits << lz->nbits) | lz->bits;
556 for(nbits += lz->nbits; nbits >= 8; nbits -= 8){
557 *lz->out++ = bits;
558 if(lz->out == lz->eout)
559 lzflush(lz);
560 bits >>= 8;
562 lz->bits = bits;
563 lz->nbits = nbits;
566 static void
567 lzflushbits(LZstate *lz)
569 if(lz->nbits)
570 lzput(lz, 0, 8 - (lz->nbits & 7));
573 /*
574 * write out a block of n samples,
575 * given lz encoding and counts for huffman tables
576 */
577 static void
578 wrblock(LZstate *out, int litoff, ushort *soff, ushort *eoff, Huff *litlentab, Huff *offtab)
580 ushort *off;
581 int i, run, offset, lit, len, c;
583 if(out->debug > 2){
584 for(off = soff; off < eoff; ){
585 offset = *off++;
586 run = offset & MaxLitRun;
587 if(run){
588 for(i = 0; i < run; i++){
589 lit = out->hist[litoff & (HistBlock - 1)];
590 litoff++;
591 fprint(2, "\tlit %.2ux %c\n", lit, lit);
593 if(!(offset & LenFlag))
594 continue;
595 len = offset >> LenShift;
596 offset = *off++;
597 }else if(offset & LenFlag){
598 len = offset >> LenShift;
599 offset = *off++;
600 }else{
601 len = 0;
602 offset >>= LenShift;
604 litoff += len + MinMatch;
605 fprint(2, "\t<%d, %d>\n", offset + 1, len + MinMatch);
609 for(off = soff; off < eoff; ){
610 offset = *off++;
611 run = offset & MaxLitRun;
612 if(run){
613 for(i = 0; i < run; i++){
614 lit = out->hist[litoff & (HistBlock - 1)];
615 litoff++;
616 lzput(out, litlentab[lit].encode, litlentab[lit].bits);
618 if(!(offset & LenFlag))
619 continue;
620 len = offset >> LenShift;
621 offset = *off++;
622 }else if(offset & LenFlag){
623 len = offset >> LenShift;
624 offset = *off++;
625 }else{
626 len = 0;
627 offset >>= LenShift;
629 litoff += len + MinMatch;
630 c = lencode[len];
631 lzput(out, litlentab[c].encode, litlentab[c].bits);
632 c -= LenStart;
633 if(litlenextra[c])
634 lzput(out, len - litlenbase[c], litlenextra[c]);
636 if(offset < MaxOffCode)
637 c = offcode[offset];
638 else
639 c = bigoffcode[offset >> 7];
640 lzput(out, offtab[c].encode, offtab[c].bits);
641 if(offextra[c])
642 lzput(out, offset - offbase[c], offextra[c]);
646 /*
647 * look for the longest, closest string which matches
648 * the next prefix. the clever part here is looking for
649 * a string 1 longer than the previous best match.
651 * follows the recommendation of limiting number of chains
652 * which are checked. this appears to be the best heuristic.
653 */
654 static int
655 lzmatch(int now, int then, uchar *p, uchar *es, ushort *nexts, uchar *hist, int runlen, int check, int *m)
657 uchar *s, *t;
658 int ml, off, last;
660 ml = check;
661 if(runlen >= 8)
662 check >>= 2;
663 *m = 0;
664 if(p + runlen >= es)
665 return runlen;
666 last = 0;
667 for(; check-- > 0; then = nexts[then & (MaxOff-1)]){
668 off = (ushort)(now - then);
669 if(off <= last || off > MaxOff)
670 break;
671 s = p + runlen;
672 t = hist + (((p - hist) - off) & (HistBlock-1));
673 t += runlen;
674 for(; s >= p; s--){
675 if(*s != *t)
676 goto matchloop;
677 t--;
680 /*
681 * we have a new best match.
682 * extend it to it's maximum length
683 */
684 t += runlen + 2;
685 s += runlen + 2;
686 for(; s < es; s++){
687 if(*s != *t)
688 break;
689 t++;
691 runlen = s - p;
692 *m = off - 1;
693 if(s == es || runlen > ml)
694 break;
695 matchloop:;
696 last = off;
698 return runlen;
701 static int
702 lzcomp(LZstate *lz, LZblock *lzb, uchar *ep, ushort *parse, int finish)
704 ulong cont, excost, *litlencount, *offcount;
705 uchar *p, *q, *s, *es;
706 ushort *nexts, *hash;
707 int v, i, h, runlen, n, now, then, m, prevlen, prevoff, maxdefer;
709 litlencount = lzb->litlencount;
710 offcount = lzb->offcount;
711 nexts = lz->nexts;
712 hash = lz->hash;
713 now = lz->now;
715 p = &lz->hist[lz->pos];
716 if(lz->prevlen != MinMatch - 1)
717 p++;
719 /*
720 * hash in the links for any hanging link positions,
721 * and calculate the hash for the current position.
722 */
723 n = MinMatch;
724 if(n > ep - p)
725 n = ep - p;
726 cont = 0;
727 for(i = 0; i < n - 1; i++){
728 m = now - ((MinMatch-1) - i);
729 if(m < lz->dot)
730 continue;
731 s = lz->hist + (((p - lz->hist) - (now - m)) & (HistBlock-1));
733 cont = (s[0] << 16) | (s[1] << 8) | s[2];
734 h = hashit(cont);
735 prevoff = 0;
736 for(then = hash[h]; ; then = nexts[then & (MaxOff-1)]){
737 v = (ushort)(now - then);
738 if(v <= prevoff || v >= (MinMatch-1) - i)
739 break;
740 prevoff = v;
742 if(then == (ushort)m)
743 continue;
744 nexts[m & (MaxOff-1)] = hash[h];
745 hash[h] = m;
747 for(i = 0; i < n; i++)
748 cont = (cont << 8) | p[i];
750 /*
751 * now must point to the index in the nexts array
752 * corresponding to p's position in the history
753 */
754 prevlen = lz->prevlen;
755 prevoff = lz->prevoff;
756 maxdefer = lz->maxcheck >> 2;
757 excost = 0;
758 v = lzb->lastv;
759 for(;;){
760 es = p + MaxMatch;
761 if(es > ep){
762 if(!finish || p >= ep)
763 break;
764 es = ep;
767 h = hashit(cont);
768 runlen = lzmatch(now, hash[h], p, es, nexts, lz->hist, prevlen, lz->maxcheck, &m);
770 /*
771 * back out of small matches too far in the past
772 */
773 if(runlen == MinMatch && m >= MinMatchMaxOff){
774 runlen = MinMatch - 1;
775 m = 0;
778 /*
779 * record the encoding and increment counts for huffman trees
780 * if we get a match, defer selecting it until we check for
781 * a longer match at the next position.
782 */
783 if(prevlen >= runlen && prevlen != MinMatch - 1){
784 /*
785 * old match at least as good; use that one
786 */
787 n = prevlen - MinMatch;
788 if(v || n){
789 *parse++ = v | LenFlag | (n << LenShift);
790 *parse++ = prevoff;
791 }else
792 *parse++ = prevoff << LenShift;
793 v = 0;
795 n = lencode[n];
796 litlencount[n]++;
797 excost += litlenextra[n - LenStart];
799 if(prevoff < MaxOffCode)
800 n = offcode[prevoff];
801 else
802 n = bigoffcode[prevoff >> 7];
803 offcount[n]++;
804 excost += offextra[n];
806 runlen = prevlen - 1;
807 prevlen = MinMatch - 1;
808 nmatches++;
809 }else if(runlen == MinMatch - 1){
810 /*
811 * no match; just put out the literal
812 */
813 if(++v == MaxLitRun){
814 *parse++ = v;
815 v = 0;
817 litlencount[*p]++;
818 nlits++;
819 runlen = 1;
820 }else{
821 if(prevlen != MinMatch - 1){
822 /*
823 * longer match now. output previous literal,
824 * update current match, and try again
825 */
826 if(++v == MaxLitRun){
827 *parse++ = v;
828 v = 0;
830 litlencount[p[-1]]++;
831 nlits++;
834 prevoff = m;
836 if(runlen < maxdefer){
837 prevlen = runlen;
838 runlen = 1;
839 }else{
840 n = runlen - MinMatch;
841 if(v || n){
842 *parse++ = v | LenFlag | (n << LenShift);
843 *parse++ = prevoff;
844 }else
845 *parse++ = prevoff << LenShift;
846 v = 0;
848 n = lencode[n];
849 litlencount[n]++;
850 excost += litlenextra[n - LenStart];
852 if(prevoff < MaxOffCode)
853 n = offcode[prevoff];
854 else
855 n = bigoffcode[prevoff >> 7];
856 offcount[n]++;
857 excost += offextra[n];
859 prevlen = MinMatch - 1;
860 nmatches++;
864 /*
865 * update the hash for the newly matched data
866 * this is constructed so the link for the old
867 * match in this position must be at the end of a chain,
868 * and will expire when this match is added, ie it will
869 * never be examined by the match loop.
870 * add to the hash chain only if we have the real hash data.
871 */
872 for(q = p + runlen; p != q; p++){
873 if(p + MinMatch <= ep){
874 h = hashit(cont);
875 nexts[now & (MaxOff-1)] = hash[h];
876 hash[h] = now;
877 if(p + MinMatch < ep)
878 cont = (cont << 8) | p[MinMatch];
880 now++;
884 /*
885 * we can just store away the lazy state and
886 * pick it up next time. the last block will have finish set
887 * so we won't have any pending matches
888 * however, we need to correct for how much we've encoded
889 */
890 if(prevlen != MinMatch - 1)
891 p--;
893 lzb->excost += excost;
894 lzb->eparse = parse;
895 lzb->lastv = v;
897 lz->now = now;
898 lz->prevlen = prevlen;
899 lz->prevoff = prevoff;
901 return p - &lz->hist[lz->pos];
904 /*
905 * make up the dynamic code tables, and return the number of bits
906 * needed to transmit them.
907 */
908 static int
909 huffcodes(Dyncode *dc, Huff *littab, Huff *offtab)
911 Huff *codetab;
912 uchar *codes, *codeaux;
913 ulong codecount[Nclen], excost;
914 int i, n, m, v, c, nlit, noff, ncode, nclen;
916 codetab = dc->codetab;
917 codes = dc->codes;
918 codeaux = dc->codeaux;
920 /*
921 * trim the sizes of the tables
922 */
923 for(nlit = Nlitlen; nlit > 257 && littab[nlit-1].bits == 0; nlit--)
925 for(noff = Noff; noff > 1 && offtab[noff-1].bits == 0; noff--)
928 /*
929 * make the code-length code
930 */
931 for(i = 0; i < nlit; i++)
932 codes[i] = littab[i].bits;
933 for(i = 0; i < noff; i++)
934 codes[i + nlit] = offtab[i].bits;
936 /*
937 * run-length compress the code-length code
938 */
939 excost = 0;
940 c = 0;
941 ncode = nlit+noff;
942 for(i = 0; i < ncode; ){
943 n = i + 1;
944 v = codes[i];
945 while(n < ncode && v == codes[n])
946 n++;
947 n -= i;
948 i += n;
949 if(v == 0){
950 while(n >= 11){
951 m = n;
952 if(m > 138)
953 m = 138;
954 codes[c] = 18;
955 codeaux[c++] = m - 11;
956 n -= m;
957 excost += 7;
959 if(n >= 3){
960 codes[c] = 17;
961 codeaux[c++] = n - 3;
962 n = 0;
963 excost += 3;
966 while(n--){
967 codes[c++] = v;
968 while(n >= 3){
969 m = n;
970 if(m > 6)
971 m = 6;
972 codes[c] = 16;
973 codeaux[c++] = m - 3;
974 n -= m;
975 excost += 3;
980 memset(codecount, 0, sizeof codecount);
981 for(i = 0; i < c; i++)
982 codecount[codes[i]]++;
983 if(!mkgzprecode(codetab, codecount, Nclen, 8))
984 return -1;
986 for(nclen = Nclen; nclen > 4 && codetab[clenorder[nclen-1]].bits == 0; nclen--)
989 dc->nlit = nlit;
990 dc->noff = noff;
991 dc->nclen = nclen;
992 dc->ncode = c;
994 return 5 + 5 + 4 + nclen * 3 + bitcost(codetab, codecount, Nclen) + excost;
997 static void
998 wrdyncode(LZstate *out, Dyncode *dc)
1000 Huff *codetab;
1001 uchar *codes, *codeaux;
1002 int i, v, c;
1005 * write out header, then code length code lengths,
1006 * and code lengths
1008 lzput(out, dc->nlit-257, 5);
1009 lzput(out, dc->noff-1, 5);
1010 lzput(out, dc->nclen-4, 4);
1012 codetab = dc->codetab;
1013 for(i = 0; i < dc->nclen; i++)
1014 lzput(out, codetab[clenorder[i]].bits, 3);
1016 codes = dc->codes;
1017 codeaux = dc->codeaux;
1018 c = dc->ncode;
1019 for(i = 0; i < c; i++){
1020 v = codes[i];
1021 lzput(out, codetab[v].encode, codetab[v].bits);
1022 if(v >= 16){
1023 if(v == 16)
1024 lzput(out, codeaux[i], 2);
1025 else if(v == 17)
1026 lzput(out, codeaux[i], 3);
1027 else /* v == 18 */
1028 lzput(out, codeaux[i], 7);
1033 static int
1034 bitcost(Huff *tab, ulong *count, int n)
1036 ulong tot;
1037 int i;
1039 tot = 0;
1040 for(i = 0; i < n; i++)
1041 tot += count[i] * tab[i].bits;
1042 return tot;
1045 static int
1046 mkgzprecode(Huff *tab, ulong *count, int n, int maxbits)
1048 ulong bitcount[MaxHuffBits];
1049 int i, nbits;
1051 nbits = mkprecode(tab, count, n, maxbits, bitcount);
1052 for(i = 0; i < n; i++){
1053 if(tab[i].bits == -1)
1054 tab[i].bits = 0;
1055 else if(tab[i].bits == 0){
1056 if(nbits != 0 || bitcount[0] != 1)
1057 return 0;
1058 bitcount[1] = 1;
1059 bitcount[0] = 0;
1060 nbits = 1;
1061 tab[i].bits = 1;
1064 if(bitcount[0] != 0)
1065 return 0;
1066 return hufftabinit(tab, n, bitcount, nbits);
1069 static int
1070 hufftabinit(Huff *tab, int n, ulong *bitcount, int nbits)
1072 ulong code, nc[MaxHuffBits];
1073 int i, bits;
1075 code = 0;
1076 for(bits = 1; bits <= nbits; bits++){
1077 code = (code + bitcount[bits-1]) << 1;
1078 nc[bits] = code;
1081 for(i = 0; i < n; i++){
1082 bits = tab[i].bits;
1083 if(bits){
1084 code = nc[bits]++ << (16 - bits);
1085 if(code & ~0xffff)
1086 return 0;
1087 tab[i].encode = revtab[code >> 8] | (revtab[code & 0xff] << 8);
1090 return 1;
1095 * this should be in a library
1097 struct Chain
1099 ulong count; /* occurances of everything in the chain */
1100 ushort leaf; /* leaves to the left of chain, or leaf value */
1101 char col; /* ref count for collecting unused chains */
1102 char gen; /* need to generate chains for next lower level */
1103 Chain *up; /* Chain up in the lists */
1106 struct Chains
1108 Chain *lists[(MaxHuffBits - 1) * 2];
1109 ulong leafcount[MaxLeaf]; /* sorted list of leaf counts */
1110 ushort leafmap[MaxLeaf]; /* map to actual leaf number */
1111 int nleaf; /* number of leaves */
1112 Chain chains[ChainMem];
1113 Chain *echains;
1114 Chain *free;
1115 char col;
1116 int nlists;
1120 * fast, low space overhead algorithm for max depth huffman type codes
1122 * J. Katajainen, A. Moffat and A. Turpin, "A fast and space-economical
1123 * algorithm for length-limited coding," Proc. Intl. Symp. on Algorithms
1124 * and Computation, Cairns, Australia, Dec. 1995, Lecture Notes in Computer
1125 * Science, Vol 1004, J. Staples, P. Eades, N. Katoh, and A. Moffat, eds.,
1126 * pp 12-21, Springer Verlag, New York, 1995.
1128 static int
1129 mkprecode(Huff *tab, ulong *count, int n, int maxbits, ulong *bitcount)
1131 Chains cs;
1132 Chain *c;
1133 int i, m, em, bits;
1135 memset(&cs, 0, sizeof cs);
1138 * set up the sorted list of leaves
1140 m = 0;
1141 for(i = 0; i < n; i++){
1142 tab[i].bits = -1;
1143 tab[i].encode = 0;
1144 if(count[i] != 0){
1145 cs.leafcount[m] = count[i];
1146 cs.leafmap[m] = i;
1147 m++;
1150 if(m < 2){
1151 if(m != 0){
1152 tab[cs.leafmap[0]].bits = 0;
1153 bitcount[0] = 1;
1154 }else
1155 bitcount[0] = 0;
1156 return 0;
1158 cs.nleaf = m;
1159 leafsort(cs.leafcount, cs.leafmap, 0, m);
1161 for(i = 0; i < m; i++)
1162 cs.leafcount[i] = count[cs.leafmap[i]];
1165 * set up free list
1167 cs.free = &cs.chains[2];
1168 cs.echains = &cs.chains[ChainMem];
1169 cs.col = 1;
1172 * initialize chains for each list
1174 c = &cs.chains[0];
1175 c->count = cs.leafcount[0];
1176 c->leaf = 1;
1177 c->col = cs.col;
1178 c->up = nil;
1179 c->gen = 0;
1180 cs.chains[1] = cs.chains[0];
1181 cs.chains[1].leaf = 2;
1182 cs.chains[1].count = cs.leafcount[1];
1183 for(i = 0; i < maxbits-1; i++){
1184 cs.lists[i * 2] = &cs.chains[0];
1185 cs.lists[i * 2 + 1] = &cs.chains[1];
1188 cs.nlists = 2 * (maxbits - 1);
1189 m = 2 * m - 2;
1190 for(i = 2; i < m; i++)
1191 nextchain(&cs, cs.nlists - 2);
1193 bits = 0;
1194 bitcount[0] = cs.nleaf;
1195 for(c = cs.lists[cs.nlists - 1]; c != nil; c = c->up){
1196 m = c->leaf;
1197 bitcount[bits++] -= m;
1198 bitcount[bits] = m;
1200 m = 0;
1201 for(i = bits; i >= 0; i--)
1202 for(em = m + bitcount[i]; m < em; m++)
1203 tab[cs.leafmap[m]].bits = i;
1205 return bits;
1209 * calculate the next chain on the list
1210 * we can always toss out the old chain
1212 static void
1213 nextchain(Chains *cs, int list)
1215 Chain *c, *oc;
1216 int i, nleaf, sumc;
1218 oc = cs->lists[list + 1];
1219 cs->lists[list] = oc;
1220 if(oc == nil)
1221 return;
1224 * make sure we have all chains needed to make sumc
1225 * note it is possible to generate only one of these,
1226 * use twice that value for sumc, and then generate
1227 * the second if that preliminary sumc would be chosen.
1228 * however, this appears to be slower on current tests
1230 if(oc->gen){
1231 nextchain(cs, list - 2);
1232 nextchain(cs, list - 2);
1233 oc->gen = 0;
1237 * pick up the chain we're going to add;
1238 * collect unused chains no free ones are left
1240 for(c = cs->free; ; c++){
1241 if(c >= cs->echains){
1242 cs->col++;
1243 for(i = 0; i < cs->nlists; i++)
1244 for(c = cs->lists[i]; c != nil; c = c->up)
1245 c->col = cs->col;
1246 c = cs->chains;
1248 if(c->col != cs->col)
1249 break;
1253 * pick the cheapest of
1254 * 1) the next package from the previous list
1255 * 2) the next leaf
1257 nleaf = oc->leaf;
1258 sumc = 0;
1259 if(list > 0 && cs->lists[list-1] != nil)
1260 sumc = cs->lists[list-2]->count + cs->lists[list-1]->count;
1261 if(sumc != 0 && (nleaf >= cs->nleaf || cs->leafcount[nleaf] > sumc)){
1262 c->count = sumc;
1263 c->leaf = oc->leaf;
1264 c->up = cs->lists[list-1];
1265 c->gen = 1;
1266 }else if(nleaf >= cs->nleaf){
1267 cs->lists[list + 1] = nil;
1268 return;
1269 }else{
1270 c->leaf = nleaf + 1;
1271 c->count = cs->leafcount[nleaf];
1272 c->up = oc->up;
1273 c->gen = 0;
1275 cs->free = c + 1;
1277 cs->lists[list + 1] = c;
1278 c->col = cs->col;
1281 static int
1282 pivot(ulong *c, int a, int n)
1284 int j, pi, pj, pk;
1286 j = n/6;
1287 pi = a + j; /* 1/6 */
1288 j += j;
1289 pj = pi + j; /* 1/2 */
1290 pk = pj + j; /* 5/6 */
1291 if(c[pi] < c[pj]){
1292 if(c[pi] < c[pk]){
1293 if(c[pj] < c[pk])
1294 return pj;
1295 return pk;
1297 return pi;
1299 if(c[pj] < c[pk]){
1300 if(c[pi] < c[pk])
1301 return pi;
1302 return pk;
1304 return pj;
1307 static void
1308 leafsort(ulong *leafcount, ushort *leafmap, int a, int n)
1310 ulong t;
1311 int j, pi, pj, pn;
1313 while(n > 1){
1314 if(n > 10){
1315 pi = pivot(leafcount, a, n);
1316 }else
1317 pi = a + (n>>1);
1319 t = leafcount[pi];
1320 leafcount[pi] = leafcount[a];
1321 leafcount[a] = t;
1322 t = leafmap[pi];
1323 leafmap[pi] = leafmap[a];
1324 leafmap[a] = t;
1325 pi = a;
1326 pn = a + n;
1327 pj = pn;
1328 for(;;){
1330 pi++;
1331 while(pi < pn && (leafcount[pi] < leafcount[a] || leafcount[pi] == leafcount[a] && leafmap[pi] > leafmap[a]));
1333 pj--;
1334 while(pj > a && (leafcount[pj] > leafcount[a] || leafcount[pj] == leafcount[a] && leafmap[pj] < leafmap[a]));
1335 if(pj < pi)
1336 break;
1337 t = leafcount[pi];
1338 leafcount[pi] = leafcount[pj];
1339 leafcount[pj] = t;
1340 t = leafmap[pi];
1341 leafmap[pi] = leafmap[pj];
1342 leafmap[pj] = t;
1344 t = leafcount[a];
1345 leafcount[a] = leafcount[pj];
1346 leafcount[pj] = t;
1347 t = leafmap[a];
1348 leafmap[a] = leafmap[pj];
1349 leafmap[pj] = t;
1350 j = pj - a;
1352 n = n-j-1;
1353 if(j >= n){
1354 leafsort(leafcount, leafmap, a, j);
1355 a += j+1;
1356 }else{
1357 leafsort(leafcount, leafmap, a + (j+1), n);
1358 n = j;