op public repos

Blob

Date:: Thu Apr 30 14:24:53 2009 UTC
Message:: libflate: shut up valgrind
Actions:: History | Blame | Raw File
1 #include <u.h>
2 #include <libc.h>
3 #include <flate.h>
4 
5 typedef struct Chain	Chain;
6 typedef struct Chains	Chains;
7 typedef struct Dyncode	Dyncode;
8 typedef struct Huff	Huff;
9 typedef struct LZblock	LZblock;
10 typedef struct LZstate	LZstate;
11 
12 enum
13 {
14 	/*
15 	 * deflate format paramaters
16 	 */
17 	DeflateUnc	= 0,			/* uncompressed block */
18 	DeflateFix	= 1,			/* fixed huffman codes */
19 	DeflateDyn	= 2,			/* dynamic huffman codes */
20 
21 	DeflateEob	= 256,			/* end of block code in lit/len book */
22 	DeflateMaxBlock	= 64*1024-1,		/* maximum size of uncompressed block */
23 
24 	DeflateMaxExp	= 10,			/* maximum expansion for a block */
25 
26 	LenStart	= 257,			/* start of length codes in litlen */
27 	Nlitlen		= 288,			/* number of litlen codes */
28 	Noff		= 30,			/* number of offset codes */
29 	Nclen		= 19,			/* number of codelen codes */
30 
31 	MaxOff		= 32*1024,
32 	MinMatch	= 3,			/* shortest match possible */
33 	MaxMatch	= 258,			/* longest match possible */
34 
35 	/*
36 	 * huffman code paramaters
37 	 */
38 	MaxLeaf		= Nlitlen,
39 	MaxHuffBits	= 16,			/* max bits in a huffman code */
40 	ChainMem	= 2 * (MaxHuffBits - 1) * MaxHuffBits,
41 
42 	/*
43 	 * coding of the lz parse
44 	 */
45 	LenFlag		= 1 << 3,
46 	LenShift	= 4,			/* leaves enough space for MinMatchMaxOff */
47 	MaxLitRun	= LenFlag - 1,
48 
49 	/*
50 	 * internal lz paramaters
51 	 */
52 	DeflateOut	= 4096,			/* output buffer size */
53 	BlockSize	= 8192,			/* attempted input read quanta */
54 	DeflateBlock	= DeflateMaxBlock & ~(BlockSize - 1),
55 	MinMatchMaxOff	= 4096,			/* max profitable offset for small match;
56 						 * assumes 8 bits for len, 5+10 for offset
57 						 * DONT CHANGE WITHOUT CHANGING LZPARSE CONSTANTS
58 						 */
59 	HistSlop	= 512,			/* must be at lead MaxMatch */
60 	HistBlock	= 64*1024,
61 	HistSize	= HistBlock + HistSlop,
62 
63 	HashLog		= 13,
64 	HashSize	= 1<<HashLog,
65 
66 	MaxOffCode	= 256,			/* biggest offset looked up in direct table */
67 
68 	EstLitBits	= 8,
69 	EstLenBits	= 4,
70 	EstOffBits	= 5
71 };
72 
73 /*
74  * knuth vol. 3 multiplicative hashing
75  * each byte x chosen according to rules
76  * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
77  * with reasonable spread between the bytes & their complements
78  *
79  * the 3 byte value appears to be as almost good as the 4 byte value,
80  * and might be faster on some machines
81  */
82 /*
83 #define hashit(c)	((u32int)((c) * 0x6b43a9) >> (24 - HashLog))
84 */
85 #define hashit(c)	((u32int)(((c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog))
86 
87 /*
88  * lempel-ziv style compression state
89  */
90 struct LZstate
91 {
92 	uchar	hist[HistSize];
93 	ulong	pos;				/* current location in history buffer */
94 	ulong	avail;				/* data available after pos */
95 	int	eof;
96 	ushort	hash[HashSize];			/* hash chains */
97 	ushort	nexts[MaxOff];
98 	int	now;				/* pos in hash chains */
99 	int	dot;				/* dawn of time in history */
100 	int	prevlen;			/* lazy matching state */
101 	int	prevoff;
102 	int	maxcheck;			/* compressor tuning */
103 
104 	uchar	obuf[DeflateOut];
105 	uchar	*out;				/* current position in the output buffer */
106 	uchar	*eout;
107 	ulong	bits;				/* bit shift register */
108 	int	nbits;
109 	int	rbad;				/* got an error reading the buffer */
110 	int	wbad;				/* got an error writing the buffer */
111 	int	(*w)(void*, void*, int);
112 	void	*wr;
113 
114 	ulong	totr;				/* total input size */
115 	ulong	totw;				/* total output size */
116 	int	debug;
117 };
118 
119 struct LZblock
120 {
121 	ushort	parse[DeflateMaxBlock / 2 + 1];
122 	int	lastv;				/* value being constucted for parse */
123 	ulong	litlencount[Nlitlen];
124 	ulong	offcount[Noff];
125 	ushort	*eparse;			/* limit for parse table */
126 	int	bytes;				/* consumed from the input */
127 	int	excost;				/* cost of encoding extra len & off bits */
128 };
129 
130 /*
131  * huffman code table
132  */
133 struct Huff
134 {
135 	short	bits;				/* length of the code */
136 	ushort	encode;				/* the code */
137 };
138 
139 /*
140  * encoding of dynamic huffman trees
141  */
142 struct Dyncode
143 {
144 	int	nlit;
145 	int	noff;
146 	int	nclen;
147 	int	ncode;
148 	Huff	codetab[Nclen];
149 	uchar	codes[Nlitlen+Noff];
150 	uchar	codeaux[Nlitlen+Noff];
151 };
152 
153 static	int	deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int));
154 static	int	lzcomp(LZstate*, LZblock*, uchar*, ushort*, int finish);
155 static	void	wrblock(LZstate*, int, ushort*, ushort*, Huff*, Huff*);
156 static	int	bitcost(Huff*, ulong*, int);
157 static	int	huffcodes(Dyncode*, Huff*, Huff*);
158 static	void	wrdyncode(LZstate*, Dyncode*);
159 static	void	lzput(LZstate*, ulong bits, int nbits);
160 static	void	lzflushbits(LZstate*);
161 static	void	lzflush(LZstate *lz);
162 static	void	lzwrite(LZstate *lz, void *buf, int n);
163 
164 static	int	hufftabinit(Huff*, int, ulong*, int);
165 static	int	mkgzprecode(Huff*, ulong *, int, int);
166 
167 static	int	mkprecode(Huff*, ulong *, int, int, ulong*);
168 static	void	nextchain(Chains*, int);
169 static	void	leafsort(ulong*, ushort*, int, int);
170 
171 /* conversion from len to code word */
172 static int lencode[MaxMatch];
173 
174 /*
175  * conversion from off to code word
176  * off <= MaxOffCode ? offcode[off] : bigoffcode[off >> 7]
177 */
178 static int offcode[MaxOffCode];
179 static int bigoffcode[256];
180 
181 /* litlen code words LenStart-285 extra bits */
182 static int litlenbase[Nlitlen-LenStart];
183 static int litlenextra[Nlitlen-LenStart] =
184 {
185 /* 257 */	0, 0, 0,
186 /* 260 */	0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
187 /* 270 */	2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
188 /* 280 */	4, 5, 5, 5, 5, 0, 0, 0
189 };
190 
191 /* offset code word extra bits */
192 static int offbase[Noff];
193 static int offextra[] =
194 {
195 	0,  0,  0,  0,  1,  1,  2,  2,  3,  3,
196 	4,  4,  5,  5,  6,  6,  7,  7,  8,  8,
197 	9,  9,  10, 10, 11, 11, 12, 12, 13, 13,
198 	0,  0,
199 };
200 
201 /* order code lengths */
202 static int clenorder[Nclen] =
203 {
204         16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
205 };
206 
207 /* static huffman tables */
208 static	Huff	litlentab[Nlitlen];
209 static	Huff	offtab[Noff];
210 static	Huff	hofftab[Noff];
211 
212 /* bit reversal for brain dead endian swap in huffman codes */
213 static	uchar	revtab[256];
214 static	ulong	nlits;
215 static	ulong	nmatches;
216 
217 int
218 deflateinit(void)
219 {
220 	ulong bitcount[MaxHuffBits];
221 	int i, j, ci, n;
222 
223 	/* byte reverse table */
224 	for(i=0; i<256; i++)
225 		for(j=0; j<8; j++)
226 			if(i & (1<<j))
227 				revtab[i] |= 0x80 >> j;
228 
229 	/* static Litlen bit lengths */
230 	for(i=0; i<144; i++)
231 		litlentab[i].bits = 8;
232 	for(i=144; i<256; i++)
233 		litlentab[i].bits = 9;
234 	for(i=256; i<280; i++)
235 		litlentab[i].bits = 7;
236 	for(i=280; i<Nlitlen; i++)
237 		litlentab[i].bits = 8;
238 
239 	memset(bitcount, 0, sizeof(bitcount));
240 	bitcount[8] += 144 - 0;
241 	bitcount[9] += 256 - 144;
242 	bitcount[7] += 280 - 256;
243 	bitcount[8] += Nlitlen - 280;
244 
245 	if(!hufftabinit(litlentab, Nlitlen, bitcount, 9))
246 		return FlateInternal;
247 
248 	/* static offset bit lengths */
249 	for(i = 0; i < Noff; i++)
250 		offtab[i].bits = 5;
251 
252 	memset(bitcount, 0, sizeof(bitcount));
253 	bitcount[5] = Noff;
254 
255 	if(!hufftabinit(offtab, Noff, bitcount, 5))
256 		return FlateInternal;
257 
258 	bitcount[0] = 0;
259 	bitcount[1] = 0;
260 	if(!mkgzprecode(hofftab, bitcount, 2, MaxHuffBits))
261 		return FlateInternal;
262 
263 	/* conversion tables for lens & offs to codes */
264 	ci = 0;
265 	for(i = LenStart; i < 286; i++){
266 		n = ci + (1 << litlenextra[i - LenStart]);
267 		litlenbase[i - LenStart] = ci;
268 		for(; ci < n; ci++)
269 			lencode[ci] = i;
270 	}
271 	/* patch up special case for len MaxMatch */
272 	lencode[MaxMatch-MinMatch] = 285;
273 	litlenbase[285-LenStart] = MaxMatch-MinMatch;
274 
275 	ci = 0;
276 	for(i = 0; i < 16; i++){
277 		n = ci + (1 << offextra[i]);
278 		offbase[i] = ci;
279 		for(; ci < n; ci++)
280 			offcode[ci] = i;
281 	}
282 
283 	ci = ci >> 7;
284 	for(; i < 30; i++){
285 		n = ci + (1 << (offextra[i] - 7));
286 		offbase[i] = ci << 7;
287 		for(; ci < n; ci++)
288 			bigoffcode[ci] = i;
289 	}
290 	return FlateOk;
291 }
292 
293 static void
294 deflatereset(LZstate *lz, int level, int debug)
295 {
296 	memset(lz->nexts, 0, sizeof lz->nexts);
297 	memset(lz->hash, 0, sizeof lz->hash);
298 	lz->totr = 0;
299 	lz->totw = 0;
300 	lz->pos = 0;
301 	lz->avail = 0;
302 	lz->out = lz->obuf;
303 	lz->eout = &lz->obuf[DeflateOut];
304 	lz->prevlen = MinMatch - 1;
305 	lz->prevoff = 0;
306 	lz->now = MaxOff + 1;
307 	lz->dot = lz->now;
308 	lz->bits = 0;
309 	lz->nbits = 0;
310 	lz->maxcheck = (1 << level);
311 	lz->maxcheck -= lz->maxcheck >> 2;
312 	if(lz->maxcheck < 2)
313 		lz->maxcheck = 2;
314 	else if(lz->maxcheck > 1024)
315 		lz->maxcheck = 1024;
316 
317 	lz->debug = debug;
318 }
319 
320 int
321 deflate(void *wr, int (*w)(void*, void*, int), void *rr, int (*r)(void*, void*, int), int level, int debug)
322 {
323 	LZstate *lz;
324 	LZblock *lzb;
325 	int ok;
326 
327 	lz = malloc(sizeof *lz + sizeof *lzb);
328 	if(lz == nil)
329 		return FlateNoMem;
330 	lzb = (LZblock*)&lz[1];
331 
332 	deflatereset(lz, level, debug);
333 	lz->w = w;
334 	lz->wr = wr;
335 	lz->wbad = 0;
336 	lz->rbad = 0;
337 	lz->eof = 0;
338 	ok = FlateOk;
339 	while(!lz->eof || lz->avail){
340 		ok = deflateb(lz, lzb, rr, r);
341 		if(ok != FlateOk)
342 			break;
343 	}
344 	if(ok == FlateOk && lz->rbad)
345 		ok = FlateInputFail;
346 	if(ok == FlateOk && lz->wbad)
347 		ok = FlateOutputFail;
348 	free(lz);
349 	return ok;
350 }
351 
352 static int
353 deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int))
354 {
355 	Dyncode dyncode, hdyncode;
356 	Huff dlitlentab[Nlitlen], dofftab[Noff], hlitlentab[Nlitlen];
357 	ulong litcount[Nlitlen];
358 	long nunc, ndyn, nfix, nhuff;
359 	uchar *slop, *hslop;
360 	ulong ep;
361 	int i, n, m, mm, nslop;
362 
363 	memset(lzb->litlencount, 0, sizeof lzb->litlencount);
364 	memset(lzb->offcount, 0, sizeof lzb->offcount);
365 	lzb->litlencount[DeflateEob]++;
366 
367 	lzb->bytes = 0;
368 	lzb->eparse = lzb->parse;
369 	lzb->lastv = 0;
370 	lzb->excost = 0;
371 
372 	slop = &lz->hist[lz->pos];
373 	n = lz->avail;
374 	while(n < DeflateBlock && (!lz->eof || lz->avail)){
375 		/*
376 		 * fill the buffer as much as possible,
377 		 * while leaving room for MaxOff history behind lz->pos,
378 		 * and not reading more than we can handle.
379 		 *
380 		 * make sure we read at least HistSlop bytes.
381 		 */
382 		if(!lz->eof){
383 			ep = lz->pos + lz->avail;
384 			if(ep >= HistBlock)
385 				ep -= HistBlock;
386 			m = HistBlock - MaxOff - lz->avail;
387 			if(m > HistBlock - n)
388 				m = HistBlock - n;
389 			if(m > (HistBlock + HistSlop) - ep)
390 				m = (HistBlock + HistSlop) - ep;
391 			if(m & ~(BlockSize - 1))
392 				m &= ~(BlockSize - 1);
393 
394 			/*
395 			 * be nice to the caller: stop reads that are too small.
396 			 * can only get here when we've already filled the buffer some
397 			 */
398 			if(m < HistSlop){
399 				if(!m || !lzb->bytes)
400 					return FlateInternal;
401 				break;
402 			}
403 
404 			mm = (*r)(rr, &lz->hist[ep], m);
405 			if(mm > 0){
406 				/*
407 				 * wrap data to end if we're read it from the beginning
408 				 * this way, we don't have to wrap searches.
409 				 *
410 				 * wrap reads past the end to the beginning.
411 				 * this way, we can guarantee minimum size reads.
412 				 */
413 				if(ep < HistSlop)
414 					memmove(&lz->hist[ep + HistBlock], &lz->hist[ep], HistSlop - ep);
415 				else if(ep + mm > HistBlock)
416 					memmove(&lz->hist[0], &lz->hist[HistBlock], ep + mm - HistBlock);
417 
418 				lz->totr += mm;
419 				n += mm;
420 				lz->avail += mm;
421 			}else{
422 				if(mm < 0)
423 					lz->rbad = 1;
424 				lz->eof = 1;
425 			}
426 		}
427 		ep = lz->pos + lz->avail;
428 		if(ep > HistSize)
429 			ep = HistSize;
430 		if(lzb->bytes + ep - lz->pos > DeflateMaxBlock)
431 			ep = lz->pos + DeflateMaxBlock - lzb->bytes;
432 		m = lzcomp(lz, lzb, &lz->hist[ep], lzb->eparse, lz->eof);
433 		lzb->bytes += m;
434 		lz->pos = (lz->pos + m) & (HistBlock - 1);
435 		lz->avail -= m;
436 	}
437 	if(lzb->lastv)
438 		*lzb->eparse++ = lzb->lastv;
439 	if(lzb->eparse > lzb->parse + nelem(lzb->parse))
440 		return FlateInternal;
441 	nunc = lzb->bytes;
442 
443 	if(!mkgzprecode(dlitlentab, lzb->litlencount, Nlitlen, MaxHuffBits)
444 	|| !mkgzprecode(dofftab, lzb->offcount, Noff, MaxHuffBits))
445 		return FlateInternal;
446 
447 	ndyn = huffcodes(&dyncode, dlitlentab, dofftab);
448 	if(ndyn < 0)
449 		return FlateInternal;
450 	ndyn += bitcost(dlitlentab, lzb->litlencount, Nlitlen)
451 		+ bitcost(dofftab, lzb->offcount, Noff)
452 		+ lzb->excost;
453 
454 	memset(litcount, 0, sizeof litcount);
455 
456 	nslop = nunc;
457 	if(nslop > &lz->hist[HistSize] - slop)
458 		nslop = &lz->hist[HistSize] - slop;
459 
460 	for(i = 0; i < nslop; i++)
461 		litcount[slop[i]]++;
462 	hslop = &lz->hist[HistSlop - nslop];
463 	for(; i < nunc; i++)
464 		litcount[hslop[i]]++;
465 	litcount[DeflateEob]++;
466 
467 	if(!mkgzprecode(hlitlentab, litcount, Nlitlen, MaxHuffBits))
468 		return FlateInternal;
469 	nhuff = huffcodes(&hdyncode, hlitlentab, hofftab);
470 	if(nhuff < 0)
471 		return FlateInternal;
472 	nhuff += bitcost(hlitlentab, litcount, Nlitlen);
473 
474 	nfix = bitcost(litlentab, lzb->litlencount, Nlitlen)
475 		+ bitcost(offtab, lzb->offcount, Noff)
476 		+ lzb->excost;
477 
478 	lzput(lz, lz->eof && !lz->avail, 1);
479 
480 	if(lz->debug){
481 		fprint(2, "block: bytes=%lud entries=%ld extra bits=%d\n\tuncompressed=%lud fixed=%lud dynamic=%lud huffman=%lud\n",
482 			nunc, lzb->eparse - lzb->parse, lzb->excost, (nunc + 4) * 8, nfix, ndyn, nhuff);
483 		fprint(2, "\tnlit=%lud matches=%lud eof=%d\n", nlits, nmatches, lz->eof && !lz->avail);
484 	}
485 
486 	if((nunc + 4) * 8 < ndyn && (nunc + 4) * 8 < nfix && (nunc + 4) * 8 < nhuff){
487 		lzput(lz, DeflateUnc, 2);
488 		lzflushbits(lz);
489 
490 		lzput(lz, nunc & 0xff, 8);
491 		lzput(lz, (nunc >> 8) & 0xff, 8);
492 		lzput(lz, ~nunc & 0xff, 8);
493 		lzput(lz, (~nunc >> 8) & 0xff, 8);
494 		lzflush(lz);
495 
496 		lzwrite(lz, slop, nslop);
497 		lzwrite(lz, &lz->hist[HistSlop], nunc - nslop);
498 	}else if(ndyn < nfix && ndyn < nhuff){
499 		lzput(lz, DeflateDyn, 2);
500 
501 		wrdyncode(lz, &dyncode);
502 		wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, dlitlentab, dofftab);
503 		lzput(lz, dlitlentab[DeflateEob].encode, dlitlentab[DeflateEob].bits);
504 	}else if(nhuff < nfix){
505 		lzput(lz, DeflateDyn, 2);
506 
507 		wrdyncode(lz, &hdyncode);
508 
509 		m = 0;
510 		for(i = nunc; i > MaxLitRun; i -= MaxLitRun)
511 			lzb->parse[m++] = MaxLitRun;
512 		lzb->parse[m++] = i;
513 
514 		wrblock(lz, slop - lz->hist, lzb->parse, lzb->parse + m, hlitlentab, hofftab);
515 		lzput(lz, hlitlentab[DeflateEob].encode, hlitlentab[DeflateEob].bits);
516 	}else{
517 		lzput(lz, DeflateFix, 2);
518 
519 		wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, litlentab, offtab);
520 		lzput(lz, litlentab[DeflateEob].encode, litlentab[DeflateEob].bits);
521 	}
522 
523 	if(lz->eof && !lz->avail){
524 		lzflushbits(lz);
525 		lzflush(lz);
526 	}
527 	return FlateOk;
528 }
529 
530 static void
531 lzwrite(LZstate *lz, void *buf, int n)
532 {
533 	int nw;
534 
535 	if(n && lz->w){
536 		nw = (*lz->w)(lz->wr, buf, n);
537 		if(nw != n){
538 			lz->w = 0;
539 			lz->wbad = 1;
540 		}else
541 			lz->totw += n;
542 	}
543 }
544 
545 static void
546 lzflush(LZstate *lz)
547 {
548 	lzwrite(lz, lz->obuf, lz->out - lz->obuf);
549 	lz->out = lz->obuf;
550 }
551 
552 static void
553 lzput(LZstate *lz, ulong bits, int nbits)
554 {
555 	bits = (bits << lz->nbits) | lz->bits;
556 	for(nbits += lz->nbits; nbits >= 8; nbits -= 8){
557 		*lz->out++ = bits;
558 		if(lz->out == lz->eout)
559 			lzflush(lz);
560 		bits >>= 8;
561 	}
562 	lz->bits = bits;
563 	lz->nbits = nbits;
564 }
565 
566 static void
567 lzflushbits(LZstate *lz)
568 {
569 	if(lz->nbits)
570 		lzput(lz, 0, 8 - (lz->nbits & 7));
571 }
572 
573 /*
574  * write out a block of n samples,
575  * given lz encoding and counts for huffman tables
576  */
577 static void
578 wrblock(LZstate *out, int litoff, ushort *soff, ushort *eoff, Huff *litlentab, Huff *offtab)
579 {
580 	ushort *off;
581 	int i, run, offset, lit, len, c;
582 
583 	if(out->debug > 2){
584 		for(off = soff; off < eoff; ){
585 			offset = *off++;
586 			run = offset & MaxLitRun;
587 			if(run){
588 				for(i = 0; i < run; i++){
589 					lit = out->hist[litoff & (HistBlock - 1)];
590 					litoff++;
591 					fprint(2, "\tlit %.2ux %c\n", lit, lit);
592 				}
593 				if(!(offset & LenFlag))
594 					continue;
595 				len = offset >> LenShift;
596 				offset = *off++;
597 			}else if(offset & LenFlag){
598 				len = offset >> LenShift;
599 				offset = *off++;
600 			}else{
601 				len = 0;
602 				offset >>= LenShift;
603 			}
604 			litoff += len + MinMatch;
605 			fprint(2, "\t<%d, %d>\n", offset + 1, len + MinMatch);
606 		}
607 	}
608 
609 	for(off = soff; off < eoff; ){
610 		offset = *off++;
611 		run = offset & MaxLitRun;
612 		if(run){
613 			for(i = 0; i < run; i++){
614 				lit = out->hist[litoff & (HistBlock - 1)];
615 				litoff++;
616 				lzput(out, litlentab[lit].encode, litlentab[lit].bits);
617 			}
618 			if(!(offset & LenFlag))
619 				continue;
620 			len = offset >> LenShift;
621 			offset = *off++;
622 		}else if(offset & LenFlag){
623 			len = offset >> LenShift;
624 			offset = *off++;
625 		}else{
626 			len = 0;
627 			offset >>= LenShift;
628 		}
629 		litoff += len + MinMatch;
630 		c = lencode[len];
631 		lzput(out, litlentab[c].encode, litlentab[c].bits);
632 		c -= LenStart;
633 		if(litlenextra[c])
634 			lzput(out, len - litlenbase[c], litlenextra[c]);
635 
636 		if(offset < MaxOffCode)
637 			c = offcode[offset];
638 		else
639 			c = bigoffcode[offset >> 7];
640 		lzput(out, offtab[c].encode, offtab[c].bits);
641 		if(offextra[c])
642 			lzput(out, offset - offbase[c], offextra[c]);
643 	}
644 }
645 
646 /*
647  * look for the longest, closest string which matches
648  * the next prefix.  the clever part here is looking for
649  * a string 1 longer than the previous best match.
650  *
651  * follows the recommendation of limiting number of chains
652  * which are checked.  this appears to be the best heuristic.
653  */
654 static int
655 lzmatch(int now, int then, uchar *p, uchar *es, ushort *nexts, uchar *hist, int runlen, int check, int *m)
656 {
657 	uchar *s, *t;
658 	int ml, off, last;
659 
660 	ml = check;
661 	if(runlen >= 8)
662 		check >>= 2;
663 	*m = 0;
664 	if(p + runlen >= es)
665 		return runlen;
666 	last = 0;
667 	for(; check-- > 0; then = nexts[then & (MaxOff-1)]){
668 		off = (ushort)(now - then);
669 		if(off <= last || off > MaxOff)
670 			break;
671 		s = p + runlen;
672 		t = hist + (((p - hist) - off) & (HistBlock-1));
673 		t += runlen;
674 		for(; s >= p; s--){
675 			if(*s != *t)
676 				goto matchloop;
677 			t--;
678 		}
679 
680 		/*
681 		 * we have a new best match.
682 		 * extend it to it's maximum length
683 		 */
684 		t += runlen + 2;
685 		s += runlen + 2;
686 		for(; s < es; s++){
687 			if(*s != *t)
688 				break;
689 			t++;
690 		}
691 		runlen = s - p;
692 		*m = off - 1;
693 		if(s == es || runlen > ml)
694 			break;
695 matchloop:;
696 		last = off;
697 	}
698 	return runlen;
699 }
700 
701 static int
702 lzcomp(LZstate *lz, LZblock *lzb, uchar *ep, ushort *parse, int finish)
703 {
704 	ulong cont, excost, *litlencount, *offcount;
705 	uchar *p, *q, *s, *es;
706 	ushort *nexts, *hash;
707 	int v, i, h, runlen, n, now, then, m, prevlen, prevoff, maxdefer;
708 
709 	litlencount = lzb->litlencount;
710 	offcount = lzb->offcount;
711 	nexts = lz->nexts;
712 	hash = lz->hash;
713 	now = lz->now;
714 
715 	p = &lz->hist[lz->pos];
716 	if(lz->prevlen != MinMatch - 1)
717 		p++;
718 
719 	/*
720 	 * hash in the links for any hanging link positions,
721 	 * and calculate the hash for the current position.
722 	 */
723 	n = MinMatch;
724 	if(n > ep - p)
725 		n = ep - p;
726 	cont = 0;
727 	for(i = 0; i < n - 1; i++){
728 		m = now - ((MinMatch-1) - i);
729 		if(m < lz->dot)
730 			continue;
731 		s = lz->hist + (((p - lz->hist) - (now - m)) & (HistBlock-1));
732 
733 		cont = (s[0] << 16) | (s[1] << 8) | s[2];
734 		h = hashit(cont);
735 		prevoff = 0;
736 		for(then = hash[h]; ; then = nexts[then & (MaxOff-1)]){
737 			v = (ushort)(now - then);
738 			if(v <= prevoff || v >= (MinMatch-1) - i)
739 				break;
740 			prevoff = v;
741 		}
742 		if(then == (ushort)m)
743 			continue;
744 		nexts[m & (MaxOff-1)] = hash[h];
745 		hash[h] = m;
746 	}
747 	for(i = 0; i < n; i++)
748 		cont = (cont << 8) | p[i];
749 
750 	/*
751 	 * now must point to the index in the nexts array
752 	 * corresponding to p's position in the history
753 	 */
754 	prevlen = lz->prevlen;
755 	prevoff = lz->prevoff;
756 	maxdefer = lz->maxcheck >> 2;
757 	excost = 0;
758 	v = lzb->lastv;
759 	for(;;){
760 		es = p + MaxMatch;
761 		if(es > ep){
762 			if(!finish || p >= ep)
763 				break;
764 			es = ep;
765 		}
766 
767 		h = hashit(cont);
768 		runlen = lzmatch(now, hash[h], p, es, nexts, lz->hist, prevlen, lz->maxcheck, &m);
769 
770 		/*
771 		 * back out of small matches too far in the past
772 		 */
773 		if(runlen == MinMatch && m >= MinMatchMaxOff){
774 			runlen = MinMatch - 1;
775 			m = 0;
776 		}
777 
778 		/*
779 		 * record the encoding and increment counts for huffman trees
780 		 * if we get a match, defer selecting it until we check for
781 		 * a longer match at the next position.
782 		 */
783 		if(prevlen >= runlen && prevlen != MinMatch - 1){
784 			/*
785 			 * old match at least as good; use that one
786 			 */
787 			n = prevlen - MinMatch;
788 			if(v || n){
789 				*parse++ = v | LenFlag | (n << LenShift);
790 				*parse++ = prevoff;
791 			}else
792 				*parse++ = prevoff << LenShift;
793 			v = 0;
794 
795 			n = lencode[n];
796 			litlencount[n]++;
797 			excost += litlenextra[n - LenStart];
798 
799 			if(prevoff < MaxOffCode)
800 				n = offcode[prevoff];
801 			else
802 				n = bigoffcode[prevoff >> 7];
803 			offcount[n]++;
804 			excost += offextra[n];
805 
806 			runlen = prevlen - 1;
807 			prevlen = MinMatch - 1;
808 			nmatches++;
809 		}else if(runlen == MinMatch - 1){
810 			/*
811 			 * no match; just put out the literal
812 			 */
813 			if(++v == MaxLitRun){
814 				*parse++ = v;
815 				v = 0;
816 			}
817 			litlencount[*p]++;
818 			nlits++;
819 			runlen = 1;
820 		}else{
821 			if(prevlen != MinMatch - 1){
822 				/*
823 				 * longer match now. output previous literal,
824 				 * update current match, and try again
825 				 */
826 				if(++v == MaxLitRun){
827 					*parse++ = v;
828 					v = 0;
829 				}
830 				litlencount[p[-1]]++;
831 				nlits++;
832 			}
833 
834 			prevoff = m;
835 
836 			if(runlen < maxdefer){
837 				prevlen = runlen;
838 				runlen = 1;
839 			}else{
840 				n = runlen - MinMatch;
841 				if(v || n){
842 					*parse++ = v | LenFlag | (n << LenShift);
843 					*parse++ = prevoff;
844 				}else
845 					*parse++ = prevoff << LenShift;
846 				v = 0;
847 
848 				n = lencode[n];
849 				litlencount[n]++;
850 				excost += litlenextra[n - LenStart];
851 
852 				if(prevoff < MaxOffCode)
853 					n = offcode[prevoff];
854 				else
855 					n = bigoffcode[prevoff >> 7];
856 				offcount[n]++;
857 				excost += offextra[n];
858 
859 				prevlen = MinMatch - 1;
860 				nmatches++;
861 			}
862 		}
863 
864 		/*
865 		 * update the hash for the newly matched data
866 		 * this is constructed so the link for the old
867 		 * match in this position must be at the end of a chain,
868 		 * and will expire when this match is added, ie it will
869 		 * never be examined by the match loop.
870 		 * add to the hash chain only if we have the real hash data.
871 		 */
872 		for(q = p + runlen; p != q; p++){
873 			if(p + MinMatch <= ep){
874 				h = hashit(cont);
875 				nexts[now & (MaxOff-1)] = hash[h];
876 				hash[h] = now;
877 				if(p + MinMatch < ep)
878 					cont = (cont << 8) | p[MinMatch];
879 			}
880 			now++;
881 		}
882 	}
883 
884 	/*
885 	 * we can just store away the lazy state and
886 	 * pick it up next time.  the last block will have finish set
887 	 * so we won't have any pending matches
888 	 * however, we need to correct for how much we've encoded
889 	 */
890 	if(prevlen != MinMatch - 1)
891 		p--;
892 
893 	lzb->excost += excost;
894 	lzb->eparse = parse;
895 	lzb->lastv = v;
896 
897 	lz->now = now;
898 	lz->prevlen = prevlen;
899 	lz->prevoff = prevoff;
900 
901 	return p - &lz->hist[lz->pos];
902 }
903 
904 /*
905  * make up the dynamic code tables, and return the number of bits
906  * needed to transmit them.
907  */
908 static int
909 huffcodes(Dyncode *dc, Huff *littab, Huff *offtab)
910 {
911 	Huff *codetab;
912 	uchar *codes, *codeaux;
913 	ulong codecount[Nclen], excost;
914 	int i, n, m, v, c, nlit, noff, ncode, nclen;
915 
916 	codetab = dc->codetab;
917 	codes = dc->codes;
918 	codeaux = dc->codeaux;
919 
920 	/*
921 	 * trim the sizes of the tables
922 	 */
923 	for(nlit = Nlitlen; nlit > 257 && littab[nlit-1].bits == 0; nlit--)
924 		;
925 	for(noff = Noff; noff > 1 && offtab[noff-1].bits == 0; noff--)
926 		;
927 
928 	/*
929 	 * make the code-length code
930 	 */
931 	for(i = 0; i < nlit; i++)
932 		codes[i] = littab[i].bits;
933 	for(i = 0; i < noff; i++)
934 		codes[i + nlit] = offtab[i].bits;
935 
936 	/*
937 	 * run-length compress the code-length code
938 	 */
939 	excost = 0;
940 	c = 0;
941 	ncode = nlit+noff;
942 	for(i = 0; i < ncode; ){
943 		n = i + 1;
944 		v = codes[i];
945 		while(n < ncode && v == codes[n])
946 			n++;
947 		n -= i;
948 		i += n;
949 		if(v == 0){
950 			while(n >= 11){
951 				m = n;
952 				if(m > 138)
953 					m = 138;
954 				codes[c] = 18;
955 				codeaux[c++] = m - 11;
956 				n -= m;
957 				excost += 7;
958 			}
959 			if(n >= 3){
960 				codes[c] = 17;
961 				codeaux[c++] = n - 3;
962 				n = 0;
963 				excost += 3;
964 			}
965 		}
966 		while(n--){
967 			codes[c++] = v;
968 			while(n >= 3){
969 				m = n;
970 				if(m > 6)
971 					m = 6;
972 				codes[c] = 16;
973 				codeaux[c++] = m - 3;
974 				n -= m;
975 				excost += 3;
976 			}
977 		}
978 	}
979 
980 	memset(codecount, 0, sizeof codecount);
981 	for(i = 0; i < c; i++)
982 		codecount[codes[i]]++;
983 	if(!mkgzprecode(codetab, codecount, Nclen, 8))
984 		return -1;
985 
986 	for(nclen = Nclen; nclen > 4 && codetab[clenorder[nclen-1]].bits == 0; nclen--)
987 		;
988 
989 	dc->nlit = nlit;
990 	dc->noff = noff;
991 	dc->nclen = nclen;
992 	dc->ncode = c;
993 
994 	return 5 + 5 + 4 + nclen * 3 + bitcost(codetab, codecount, Nclen) + excost;
995 }
996 
997 static void
998 wrdyncode(LZstate *out, Dyncode *dc)
999 {
1000 	Huff *codetab;
1001 	uchar *codes, *codeaux;
1002 	int i, v, c;
1003 
1004 	/*
1005 	 * write out header, then code length code lengths,
1006 	 * and code lengths
1007 	 */
1008 	lzput(out, dc->nlit-257, 5);
1009 	lzput(out, dc->noff-1, 5);
1010 	lzput(out, dc->nclen-4, 4);
1011 
1012 	codetab = dc->codetab;
1013 	for(i = 0; i < dc->nclen; i++)
1014 		lzput(out, codetab[clenorder[i]].bits, 3);
1015 
1016 	codes = dc->codes;
1017 	codeaux = dc->codeaux;
1018 	c = dc->ncode;
1019 	for(i = 0; i < c; i++){
1020 		v = codes[i];
1021 		lzput(out, codetab[v].encode, codetab[v].bits);
1022 		if(v >= 16){
1023 			if(v == 16)
1024 				lzput(out, codeaux[i], 2);
1025 			else if(v == 17)
1026 				lzput(out, codeaux[i], 3);
1027 			else /* v == 18 */
1028 				lzput(out, codeaux[i], 7);
1029 		}
1030 	}
1031 }
1032 
1033 static int
1034 bitcost(Huff *tab, ulong *count, int n)
1035 {
1036 	ulong tot;
1037 	int i;
1038 
1039 	tot = 0;
1040 	for(i = 0; i < n; i++)
1041 		tot += count[i] * tab[i].bits;
1042 	return tot;
1043 }
1044 
1045 static int
1046 mkgzprecode(Huff *tab, ulong *count, int n, int maxbits)
1047 {
1048 	ulong bitcount[MaxHuffBits];
1049 	int i, nbits;
1050 
1051 	nbits = mkprecode(tab, count, n, maxbits, bitcount);
1052 	for(i = 0; i < n; i++){
1053 		if(tab[i].bits == -1)
1054 			tab[i].bits = 0;
1055 		else if(tab[i].bits == 0){
1056 			if(nbits != 0 || bitcount[0] != 1)
1057 				return 0;
1058 			bitcount[1] = 1;
1059 			bitcount[0] = 0;
1060 			nbits = 1;
1061 			tab[i].bits = 1;
1062 		}
1063 	}
1064 	if(bitcount[0] != 0)
1065 		return 0;
1066 	return hufftabinit(tab, n, bitcount, nbits);
1067 }
1068 
1069 static int
1070 hufftabinit(Huff *tab, int n, ulong *bitcount, int nbits)
1071 {
1072 	ulong code, nc[MaxHuffBits];
1073 	int i, bits;
1074 
1075 	code = 0;
1076 	for(bits = 1; bits <= nbits; bits++){
1077 		code = (code + bitcount[bits-1]) << 1;
1078 		nc[bits] = code;
1079 	}
1080 
1081 	for(i = 0; i < n; i++){
1082 		bits = tab[i].bits;
1083 		if(bits){
1084 			code = nc[bits]++ << (16 - bits);
1085 			if(code & ~0xffff)
1086 				return 0;
1087 			tab[i].encode = revtab[code >> 8] | (revtab[code & 0xff] << 8);
1088 		}
1089 	}
1090 	return 1;
1091 }
1092 
1093 
1094 /*
1095  * this should be in a library
1096  */
1097 struct Chain
1098 {
1099 	ulong	count;				/* occurances of everything in the chain */
1100 	ushort	leaf;				/* leaves to the left of chain, or leaf value */
1101 	char	col;				/* ref count for collecting unused chains */
1102 	char	gen;				/* need to generate chains for next lower level */
1103 	Chain	*up;				/* Chain up in the lists */
1104 };
1105 
1106 struct Chains
1107 {
1108 	Chain	*lists[(MaxHuffBits - 1) * 2];
1109 	ulong	leafcount[MaxLeaf];		/* sorted list of leaf counts */
1110 	ushort	leafmap[MaxLeaf];		/* map to actual leaf number */
1111 	int	nleaf;				/* number of leaves */
1112 	Chain	chains[ChainMem];
1113 	Chain	*echains;
1114 	Chain	*free;
1115 	char	col;
1116 	int	nlists;
1117 };
1118 
1119 /*
1120  * fast, low space overhead algorithm for max depth huffman type codes
1121  *
1122  * J. Katajainen, A. Moffat and A. Turpin, "A fast and space-economical
1123  * algorithm for length-limited coding," Proc. Intl. Symp. on Algorithms
1124  * and Computation, Cairns, Australia, Dec. 1995, Lecture Notes in Computer
1125  * Science, Vol 1004, J. Staples, P. Eades, N. Katoh, and A. Moffat, eds.,
1126  * pp 12-21, Springer Verlag, New York, 1995.
1127  */
1128 static int
1129 mkprecode(Huff *tab, ulong *count, int n, int maxbits, ulong *bitcount)
1130 {
1131 	Chains cs;
1132 	Chain *c;
1133 	int i, m, em, bits;
1134 
1135 	memset(&cs, 0, sizeof cs);
1136 
1137 	/*
1138 	 * set up the sorted list of leaves
1139 	 */
1140 	m = 0;
1141 	for(i = 0; i < n; i++){
1142 		tab[i].bits = -1;
1143 		tab[i].encode = 0;
1144 		if(count[i] != 0){
1145 			cs.leafcount[m] = count[i];
1146 			cs.leafmap[m] = i;
1147 			m++;
1148 		}
1149 	}
1150 	if(m < 2){
1151 		if(m != 0){
1152 			tab[cs.leafmap[0]].bits = 0;
1153 			bitcount[0] = 1;
1154 		}else
1155 			bitcount[0] = 0;
1156 		return 0;
1157 	}
1158 	cs.nleaf = m;
1159 	leafsort(cs.leafcount, cs.leafmap, 0, m);
1160 
1161 	for(i = 0; i < m; i++)
1162 		cs.leafcount[i] = count[cs.leafmap[i]];
1163 
1164 	/*
1165 	 * set up free list
1166 	 */
1167 	cs.free = &cs.chains[2];
1168 	cs.echains = &cs.chains[ChainMem];
1169 	cs.col = 1;
1170 
1171 	/*
1172 	 * initialize chains for each list
1173 	 */
1174 	c = &cs.chains[0];
1175 	c->count = cs.leafcount[0];
1176 	c->leaf = 1;
1177 	c->col = cs.col;
1178 	c->up = nil;
1179 	c->gen = 0;
1180 	cs.chains[1] = cs.chains[0];
1181 	cs.chains[1].leaf = 2;
1182 	cs.chains[1].count = cs.leafcount[1];
1183 	for(i = 0; i < maxbits-1; i++){
1184 		cs.lists[i * 2] = &cs.chains[0];
1185 		cs.lists[i * 2 + 1] = &cs.chains[1];
1186 	}
1187 
1188 	cs.nlists = 2 * (maxbits - 1);
1189 	m = 2 * m - 2;
1190 	for(i = 2; i < m; i++)
1191 		nextchain(&cs, cs.nlists - 2);
1192 
1193 	bits = 0;
1194 	bitcount[0] = cs.nleaf;
1195 	for(c = cs.lists[cs.nlists - 1]; c != nil; c = c->up){
1196 		m = c->leaf;
1197 		bitcount[bits++] -= m;
1198 		bitcount[bits] = m;
1199 	}
1200 	m = 0;
1201 	for(i = bits; i >= 0; i--)
1202 		for(em = m + bitcount[i]; m < em; m++)
1203 			tab[cs.leafmap[m]].bits = i;
1204 
1205 	return bits;
1206 }
1207 
1208 /*
1209  * calculate the next chain on the list
1210  * we can always toss out the old chain
1211  */
1212 static void
1213 nextchain(Chains *cs, int list)
1214 {
1215 	Chain *c, *oc;
1216 	int i, nleaf, sumc;
1217 
1218 	oc = cs->lists[list + 1];
1219 	cs->lists[list] = oc;
1220 	if(oc == nil)
1221 		return;
1222 
1223 	/*
1224 	 * make sure we have all chains needed to make sumc
1225 	 * note it is possible to generate only one of these,
1226 	 * use twice that value for sumc, and then generate
1227 	 * the second if that preliminary sumc would be chosen.
1228 	 * however, this appears to be slower on current tests
1229 	 */
1230 	if(oc->gen){
1231 		nextchain(cs, list - 2);
1232 		nextchain(cs, list - 2);
1233 		oc->gen = 0;
1234 	}
1235 
1236 	/*
1237 	 * pick up the chain we're going to add;
1238 	 * collect unused chains no free ones are left
1239 	 */
1240 	for(c = cs->free; ; c++){
1241 		if(c >= cs->echains){
1242 			cs->col++;
1243 			for(i = 0; i < cs->nlists; i++)
1244 				for(c = cs->lists[i]; c != nil; c = c->up)
1245 					c->col = cs->col;
1246 			c = cs->chains;
1247 		}
1248 		if(c->col != cs->col)
1249 			break;
1250 	}
1251 
1252 	/*
1253 	 * pick the cheapest of
1254 	 * 1) the next package from the previous list
1255 	 * 2) the next leaf
1256 	 */
1257 	nleaf = oc->leaf;
1258 	sumc = 0;
1259 	if(list > 0 && cs->lists[list-1] != nil)
1260 		sumc = cs->lists[list-2]->count + cs->lists[list-1]->count;
1261 	if(sumc != 0 && (nleaf >= cs->nleaf || cs->leafcount[nleaf] > sumc)){
1262 		c->count = sumc;
1263 		c->leaf = oc->leaf;
1264 		c->up = cs->lists[list-1];
1265 		c->gen = 1;
1266 	}else if(nleaf >= cs->nleaf){
1267 		cs->lists[list + 1] = nil;
1268 		return;
1269 	}else{
1270 		c->leaf = nleaf + 1;
1271 		c->count = cs->leafcount[nleaf];
1272 		c->up = oc->up;
1273 		c->gen = 0;
1274 	}
1275 	cs->free = c + 1;
1276 
1277 	cs->lists[list + 1] = c;
1278 	c->col = cs->col;
1279 }
1280 
1281 static int
1282 pivot(ulong *c, int a, int n)
1283 {
1284 	int j, pi, pj, pk;
1285 
1286 	j = n/6;
1287 	pi = a + j;	/* 1/6 */
1288 	j += j;
1289 	pj = pi + j;	/* 1/2 */
1290 	pk = pj + j;	/* 5/6 */
1291 	if(c[pi] < c[pj]){
1292 		if(c[pi] < c[pk]){
1293 			if(c[pj] < c[pk])
1294 				return pj;
1295 			return pk;
1296 		}
1297 		return pi;
1298 	}
1299 	if(c[pj] < c[pk]){
1300 		if(c[pi] < c[pk])
1301 			return pi;
1302 		return pk;
1303 	}
1304 	return pj;
1305 }
1306 
1307 static	void
1308 leafsort(ulong *leafcount, ushort *leafmap, int a, int n)
1309 {
1310 	ulong t;
1311 	int j, pi, pj, pn;
1312 
1313 	while(n > 1){
1314 		if(n > 10){
1315 			pi = pivot(leafcount, a, n);
1316 		}else
1317 			pi = a + (n>>1);
1318 
1319 		t = leafcount[pi];
1320 		leafcount[pi] = leafcount[a];
1321 		leafcount[a] = t;
1322 		t = leafmap[pi];
1323 		leafmap[pi] = leafmap[a];
1324 		leafmap[a] = t;
1325 		pi = a;
1326 		pn = a + n;
1327 		pj = pn;
1328 		for(;;){
1329 			do
1330 				pi++;
1331 			while(pi < pn && (leafcount[pi] < leafcount[a] || leafcount[pi] == leafcount[a] && leafmap[pi] > leafmap[a]));
1332 			do
1333 				pj--;
1334 			while(pj > a && (leafcount[pj] > leafcount[a] || leafcount[pj] == leafcount[a] && leafmap[pj] < leafmap[a]));
1335 			if(pj < pi)
1336 				break;
1337 			t = leafcount[pi];
1338 			leafcount[pi] = leafcount[pj];
1339 			leafcount[pj] = t;
1340 			t = leafmap[pi];
1341 			leafmap[pi] = leafmap[pj];
1342 			leafmap[pj] = t;
1343 		}
1344 		t = leafcount[a];
1345 		leafcount[a] = leafcount[pj];
1346 		leafcount[pj] = t;
1347 		t = leafmap[a];
1348 		leafmap[a] = leafmap[pj];
1349 		leafmap[pj] = t;
1350 		j = pj - a;
1351 
1352 		n = n-j-1;
1353 		if(j >= n){
1354 			leafsort(leafcount, leafmap, a, j);
1355 			a += j+1;
1356 		}else{
1357 			leafsort(leafcount, leafmap, a + (j+1), n);
1358 			n = j;
1359 		}
1360 	}
1361 }