Blob


1 #include "stdinc.h"
2 #include "whack.h"
4 typedef struct Huff Huff;
5 int compressblocks = 1;
7 enum
8 {
9 MaxFastLen = 9,
10 BigLenCode = 0x1f4, /* minimum code for large lenth encoding */
11 BigLenBits = 9,
12 BigLenBase = 4, /* starting items to encode for big lens */
14 MinOffBits = 6,
15 MaxOffBits = MinOffBits + 8,
17 MaxLen = 2051 /* max. length encodable in 24 bits */
18 };
20 enum
21 {
22 StatBytes,
23 StatOutBytes,
24 StatLits,
25 StatMatches,
26 StatLitBits,
27 StatOffBits,
28 StatLenBits,
30 MaxStat
31 };
33 struct Huff
34 {
35 short bits; /* length of the code */
36 ulong encode; /* the code */
37 };
39 static Huff lentab[MaxFastLen] =
40 {
41 {2, 0x2}, /* 10 */
42 {3, 0x6}, /* 110 */
43 {5, 0x1c}, /* 11100 */
44 {5, 0x1d}, /* 11101 */
45 {6, 0x3c}, /* 111100 */
46 {7, 0x7a}, /* 1111010 */
47 {7, 0x7b}, /* 1111011 */
48 {8, 0xf8}, /* 11111000 */
49 {8, 0xf9}, /* 11111001 */
50 };
52 static int thwmaxcheck;
54 void
55 whackinit(Whack *tw, int level)
56 {
57 thwmaxcheck = (1 << level);
58 thwmaxcheck -= thwmaxcheck >> 2;
59 if(thwmaxcheck < 2)
60 thwmaxcheck = 2;
61 else if(thwmaxcheck > 1024)
62 thwmaxcheck = 1024;
63 memset(tw, 0, sizeof *tw);
64 tw->begin = 2 * WhackMaxOff;
65 }
67 /*
68 * find a string in the dictionary
69 */
70 static int
71 whackmatch(Whack *b, uchar **ss, uchar *esrc, ulong h, ulong now)
72 {
73 ushort then, off, last;
74 int bestoff, bestlen, check;
75 uchar *s, *t;
77 s = *ss;
78 if(esrc < s + MinMatch)
79 return -1;
80 if(s + MaxLen < esrc)
81 esrc = s + MaxLen;
83 bestoff = 0;
84 bestlen = 0;
85 check = thwmaxcheck;
86 last = 0;
87 for(then = b->hash[h]; check-- > 0; then = b->next[then & (WhackMaxOff - 1)]){
88 off = now - then;
89 if(off <= last || off > WhackMaxOff)
90 break;
92 /*
93 * don't need to check for the end because
94 * 1) s too close check above
95 */
96 t = s - off;
97 if(s[0] == t[0] && s[1] == t[1] && s[2] == t[2]){
98 if(!bestlen || esrc - s > bestlen && s[bestlen] == t[bestlen]){
99 t += 3;
100 for(s += 3; s < esrc; s++){
101 if(*s != *t)
102 break;
103 t++;
105 if(s - *ss > bestlen){
106 bestlen = s - *ss;
107 bestoff = off;
108 if(bestlen > thwmaxcheck)
109 break;
113 s = *ss;
114 last = off;
116 *ss += bestlen;
117 return bestoff;
120 /*
121 * knuth vol. 3 multiplicative hashing
122 * each byte x chosen according to rules
123 * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
124 * with reasonable spread between the bytes & their complements
126 * the 3 byte value appears to be as almost good as the 4 byte value,
127 * and might be faster on some machines
128 */
129 /*
130 #define hashit(c) ((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask)
131 */
132 #define hashit(c) (((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
134 /*
135 * lz77 compression with single lookup in a hash table for each block
136 */
137 int
138 whack(Whack *w, uchar *dst, uchar *src, int n, ulong stats[WhackStats])
140 uchar *s, *ss, *sss, *esrc, *half, *wdst, *wdmax;
141 ulong cont, code, wbits;
142 ushort now;
143 int toff, lithist, h, len, bits, use, wnbits, lits, matches, offbits, lenbits;
145 if(!compressblocks || n < MinMatch)
146 return -1;
148 wdst = dst;
149 wdmax = dst + n;
151 now = w->begin;
152 s = src;
153 w->data = s;
155 cont = (s[0] << 16) | (s[1] << 8) | s[2];
157 esrc = s + n;
158 half = s + (n >> 1);
159 wnbits = 0;
160 wbits = 0;
161 lits = 0;
162 matches = 0;
163 offbits = 0;
164 lenbits = 0;
165 lithist = ~0;
166 while(s < esrc){
167 h = hashit(cont);
169 sss = s;
170 toff = whackmatch(w, &sss, esrc, h, now);
171 ss = sss;
173 len = ss - s;
174 for(; wnbits >= 8; wnbits -= 8){
175 if(wdst >= wdmax){
176 w->begin = now;
177 return -1;
179 *wdst++ = wbits >> (wnbits - 8);
181 if(len < MinMatch){
182 toff = *s;
183 lithist = (lithist << 1) | toff < 32 | toff > 127;
184 if(lithist & 0x1e){
185 wbits = (wbits << 9) | toff;
186 wnbits += 9;
187 }else if(lithist & 1){
188 toff = (toff + 64) & 0xff;
189 if(toff < 96){
190 wbits = (wbits << 10) | toff;
191 wnbits += 10;
192 }else{
193 wbits = (wbits << 11) | toff;
194 wnbits += 11;
196 }else{
197 wbits = (wbits << 8) | toff;
198 wnbits += 8;
200 lits++;
202 /*
203 * speed hack
204 * check for compression progress, bail if none achieved
205 */
206 if(s > half){
207 if(4 * (s - src) < 5 * lits){
208 w->begin = now;
209 return -1;
211 half = esrc;
214 if(s + MinMatch <= esrc){
215 w->next[now & (WhackMaxOff - 1)] = w->hash[h];
216 w->hash[h] = now;
217 if(s + MinMatch < esrc)
218 cont = (cont << 8) | s[MinMatch];
220 now++;
221 s++;
222 continue;
225 matches++;
227 /*
228 * length of match
229 */
230 if(len > MaxLen){
231 len = MaxLen;
232 ss = s + len;
234 len -= MinMatch;
235 if(len < MaxFastLen){
236 bits = lentab[len].bits;
237 wbits = (wbits << bits) | lentab[len].encode;
238 wnbits += bits;
239 lenbits += bits;
240 }else{
241 code = BigLenCode;
242 bits = BigLenBits;
243 use = BigLenBase;
244 len -= MaxFastLen;
245 while(len >= use){
246 len -= use;
247 code = (code + use) << 1;
248 use <<= (bits & 1) ^ 1;
249 bits++;
252 wbits = (wbits << bits) | (code + len);
253 wnbits += bits;
254 lenbits += bits;
256 for(; wnbits >= 8; wnbits -= 8){
257 if(wdst >= wdmax){
258 w->begin = now;
259 return -1;
261 *wdst++ = wbits >> (wnbits - 8);
265 /*
266 * offset in history
267 */
268 toff--;
269 for(bits = MinOffBits; toff >= (1 << bits); bits++)
271 if(bits < MaxOffBits-1){
272 wbits = (wbits << 3) | (bits - MinOffBits);
273 if(bits != MinOffBits)
274 bits--;
275 wnbits += bits + 3;
276 offbits += bits + 3;
277 }else{
278 wbits = (wbits << 4) | 0xe | (bits - (MaxOffBits-1));
279 bits--;
280 wnbits += bits + 4;
281 offbits += bits + 4;
283 wbits = (wbits << bits) | toff & ((1 << bits) - 1);
285 for(; s != ss; s++){
286 if(s + MinMatch <= esrc){
287 h = hashit(cont);
288 w->next[now & (WhackMaxOff - 1)] = w->hash[h];
289 w->hash[h] = now;
290 if(s + MinMatch < esrc)
291 cont = (cont << 8) | s[MinMatch];
293 now++;
297 w->begin = now;
299 stats[StatBytes] += esrc - src;
300 stats[StatLits] += lits;
301 stats[StatMatches] += matches;
302 stats[StatLitBits] += (wdst - (dst + 2)) * 8 + wnbits - offbits - lenbits;
303 stats[StatOffBits] += offbits;
304 stats[StatLenBits] += lenbits;
306 if(wnbits & 7){
307 wbits <<= 8 - (wnbits & 7);
308 wnbits += 8 - (wnbits & 7);
310 for(; wnbits >= 8; wnbits -= 8){
311 if(wdst >= wdmax)
312 return -1;
313 *wdst++ = wbits >> (wnbits - 8);
316 stats[StatOutBytes] += wdst - dst;
318 return wdst - dst;
321 int
322 whackblock(uchar *dst, uchar *src, int ssize)
324 Whack w;
325 ulong stats[MaxStat];
326 int r;
328 whackinit(&w, 6);
329 r = whack(&w, dst, src, ssize, stats);
330 return r;