Blob


1 #include "stdinc.h"
2 #include "whack.h"
4 typedef struct Huff Huff;
6 enum
7 {
8 MaxFastLen = 9,
9 BigLenCode = 0x1f4, /* minimum code for large lenth encoding */
10 BigLenBits = 9,
11 BigLenBase = 4, /* starting items to encode for big lens */
13 MinOffBits = 6,
14 MaxOffBits = MinOffBits + 8,
16 MaxLen = 2051 /* max. length encodable in 24 bits */
17 };
19 enum
20 {
21 StatBytes,
22 StatOutBytes,
23 StatLits,
24 StatMatches,
25 StatLitBits,
26 StatOffBits,
27 StatLenBits,
29 MaxStat
30 };
32 struct Huff
33 {
34 short bits; /* length of the code */
35 ulong encode; /* the code */
36 };
38 static Huff lentab[MaxFastLen] =
39 {
40 {2, 0x2}, /* 10 */
41 {3, 0x6}, /* 110 */
42 {5, 0x1c}, /* 11100 */
43 {5, 0x1d}, /* 11101 */
44 {6, 0x3c}, /* 111100 */
45 {7, 0x7a}, /* 1111010 */
46 {7, 0x7b}, /* 1111011 */
47 {8, 0xf8}, /* 11111000 */
48 {8, 0xf9}, /* 11111001 */
49 };
51 static int thwmaxcheck;
53 void
54 whackinit(Whack *tw, int level)
55 {
56 thwmaxcheck = (1 << level);
57 thwmaxcheck -= thwmaxcheck >> 2;
58 if(thwmaxcheck < 2)
59 thwmaxcheck = 2;
60 else if(thwmaxcheck > 1024)
61 thwmaxcheck = 1024;
62 memset(tw, 0, sizeof *tw);
63 tw->begin = 2 * WhackMaxOff;
64 }
66 /*
67 * find a string in the dictionary
68 */
69 static int
70 whackmatch(Whack *b, uchar **ss, uchar *esrc, ulong h, ulong now)
71 {
72 ushort then, off, last;
73 int bestoff, bestlen, check;
74 uchar *s, *t;
76 s = *ss;
77 if(esrc < s + MinMatch)
78 return -1;
79 if(s + MaxLen < esrc)
80 esrc = s + MaxLen;
82 bestoff = 0;
83 bestlen = 0;
84 check = thwmaxcheck;
85 last = 0;
86 for(then = b->hash[h]; check-- > 0; then = b->next[then & (WhackMaxOff - 1)]){
87 off = now - then;
88 if(off <= last || off > WhackMaxOff)
89 break;
91 /*
92 * don't need to check for the end because
93 * 1) s too close check above
94 */
95 t = s - off;
96 if(s[0] == t[0] && s[1] == t[1] && s[2] == t[2]){
97 if(!bestlen || esrc - s > bestlen && s[bestlen] == t[bestlen]){
98 t += 3;
99 for(s += 3; s < esrc; s++){
100 if(*s != *t)
101 break;
102 t++;
104 if(s - *ss > bestlen){
105 bestlen = s - *ss;
106 bestoff = off;
107 if(bestlen > thwmaxcheck)
108 break;
112 s = *ss;
113 last = off;
115 *ss += bestlen;
116 return bestoff;
119 /*
120 * knuth vol. 3 multiplicative hashing
121 * each byte x chosen according to rules
122 * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
123 * with reasonable spread between the bytes & their complements
125 * the 3 byte value appears to be as almost good as the 4 byte value,
126 * and might be faster on some machines
127 */
128 /*
129 #define hashit(c) ((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask)
130 */
131 #define hashit(c) (((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
133 /*
134 * lz77 compression with single lookup in a hash table for each block
135 */
136 int
137 whack(Whack *w, uchar *dst, uchar *src, int n, ulong stats[WhackStats])
139 uchar *s, *ss, *sss, *esrc, *half, *wdst, *wdmax;
140 ulong cont, code, wbits;
141 ushort now;
142 int toff, lithist, h, len, bits, use, wnbits, lits, matches, offbits, lenbits;
144 if(n < MinMatch)
145 return -1;
147 wdst = dst;
148 wdmax = dst + n;
150 now = w->begin;
151 s = src;
152 w->data = s;
154 cont = (s[0] << 16) | (s[1] << 8) | s[2];
156 esrc = s + n;
157 half = s + (n >> 1);
158 wnbits = 0;
159 wbits = 0;
160 lits = 0;
161 matches = 0;
162 offbits = 0;
163 lenbits = 0;
164 lithist = ~0;
165 while(s < esrc){
166 h = hashit(cont);
168 sss = s;
169 toff = whackmatch(w, &sss, esrc, h, now);
170 ss = sss;
172 len = ss - s;
173 for(; wnbits >= 8; wnbits -= 8){
174 if(wdst >= wdmax){
175 w->begin = now;
176 return -1;
178 *wdst++ = wbits >> (wnbits - 8);
180 if(len < MinMatch){
181 toff = *s;
182 lithist = (lithist << 1) | toff < 32 | toff > 127;
183 if(lithist & 0x1e){
184 wbits = (wbits << 9) | toff;
185 wnbits += 9;
186 }else if(lithist & 1){
187 toff = (toff + 64) & 0xff;
188 if(toff < 96){
189 wbits = (wbits << 10) | toff;
190 wnbits += 10;
191 }else{
192 wbits = (wbits << 11) | toff;
193 wnbits += 11;
195 }else{
196 wbits = (wbits << 8) | toff;
197 wnbits += 8;
199 lits++;
201 /*
202 * speed hack
203 * check for compression progress, bail if none achieved
204 */
205 if(s > half){
206 if(4 * (s - src) < 5 * lits){
207 w->begin = now;
208 return -1;
210 half = esrc;
213 if(s + MinMatch <= esrc){
214 w->next[now & (WhackMaxOff - 1)] = w->hash[h];
215 w->hash[h] = now;
216 if(s + MinMatch < esrc)
217 cont = (cont << 8) | s[MinMatch];
219 now++;
220 s++;
221 continue;
224 matches++;
226 /*
227 * length of match
228 */
229 if(len > MaxLen){
230 len = MaxLen;
231 ss = s + len;
233 len -= MinMatch;
234 if(len < MaxFastLen){
235 bits = lentab[len].bits;
236 wbits = (wbits << bits) | lentab[len].encode;
237 wnbits += bits;
238 lenbits += bits;
239 }else{
240 code = BigLenCode;
241 bits = BigLenBits;
242 use = BigLenBase;
243 len -= MaxFastLen;
244 while(len >= use){
245 len -= use;
246 code = (code + use) << 1;
247 use <<= (bits & 1) ^ 1;
248 bits++;
251 wbits = (wbits << bits) | (code + len);
252 wnbits += bits;
253 lenbits += bits;
255 for(; wnbits >= 8; wnbits -= 8){
256 if(wdst >= wdmax){
257 w->begin = now;
258 return -1;
260 *wdst++ = wbits >> (wnbits - 8);
264 /*
265 * offset in history
266 */
267 toff--;
268 for(bits = MinOffBits; toff >= (1 << bits); bits++)
270 if(bits < MaxOffBits-1){
271 wbits = (wbits << 3) | (bits - MinOffBits);
272 if(bits != MinOffBits)
273 bits--;
274 wnbits += bits + 3;
275 offbits += bits + 3;
276 }else{
277 wbits = (wbits << 4) | 0xe | (bits - (MaxOffBits-1));
278 bits--;
279 wnbits += bits + 4;
280 offbits += bits + 4;
282 wbits = (wbits << bits) | toff & ((1 << bits) - 1);
284 for(; s != ss; s++){
285 if(s + MinMatch <= esrc){
286 h = hashit(cont);
287 w->next[now & (WhackMaxOff - 1)] = w->hash[h];
288 w->hash[h] = now;
289 if(s + MinMatch < esrc)
290 cont = (cont << 8) | s[MinMatch];
292 now++;
296 w->begin = now;
298 stats[StatBytes] += esrc - src;
299 stats[StatLits] += lits;
300 stats[StatMatches] += matches;
301 stats[StatLitBits] += (wdst - (dst + 2)) * 8 + wnbits - offbits - lenbits;
302 stats[StatOffBits] += offbits;
303 stats[StatLenBits] += lenbits;
305 if(wnbits & 7){
306 wbits <<= 8 - (wnbits & 7);
307 wnbits += 8 - (wnbits & 7);
309 for(; wnbits >= 8; wnbits -= 8){
310 if(wdst >= wdmax)
311 return -1;
312 *wdst++ = wbits >> (wnbits - 8);
315 stats[StatOutBytes] += wdst - dst;
317 return wdst - dst;
320 int
321 whackblock(uchar *dst, uchar *src, int ssize)
323 Whack w;
324 ulong stats[MaxStat];
325 int r;
327 whackinit(&w, 6);
328 r = whack(&w, dst, src, ssize, stats);
329 return r;