Blob


1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
5 Bloom b;
7 void
8 usage(void)
9 {
10 fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
11 threadexitsall(0);
12 }
14 void
15 threadmain(int argc, char *argv[])
16 {
17 Part *part;
18 char *file;
19 vlong bits, size, size2;
20 int nhash;
21 vlong nblocks;
23 ventifmtinstall();
24 statsinit();
26 size = 0;
27 nhash = 0;
28 nblocks = 0;
29 ARGBEGIN{
30 case 'n':
31 if(nhash || nblocks)
32 usage();
33 nblocks = unittoull(EARGF(usage()));
34 break;
35 case 'N':
36 if(nhash || nblocks)
37 usage();
38 nhash = unittoull(EARGF(usage()));
39 if(nhash > BloomMaxHash){
40 fprint(2, "maximum possible is -N %d", BloomMaxHash);
41 usage();
42 }
43 break;
44 case 's':
45 size = unittoull(ARGF());
46 if(size == ~0)
47 usage();
48 break;
49 default:
50 usage();
51 break;
52 }ARGEND
54 if(argc != 1)
55 usage();
57 file = argv[0];
59 part = initpart(file, ORDWR|ODIRECT);
60 if(part == nil)
61 sysfatal("can't open partition %s: %r", file);
63 if(size == 0)
64 size = part->size;
66 if(size < 1024*1024)
67 sysfatal("bloom filter too small");
69 if(size > MaxBloomSize){
70 fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
71 size, (vlong)MaxBloomSize);
72 size = MaxBloomSize;
73 }
74 if(size&(size-1)){
75 for(size2=1; size2<size; size2*=2)
76 ;
77 size = size2/2;
78 fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
79 }
81 if(nblocks){
82 /*
83 * no use for more than 32 bits per block
84 * shoot for less than 64 bits per block
85 */
86 size2 = size;
87 while(size2*8 >= nblocks*64)
88 size2 >>= 1;
89 if(size2 != size){
90 size = size2;
91 fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
92 size/1024/1024);
93 }
95 /*
96 * optimal is to use ln 2 times as many hash functions as we have bits per blocks.
97 */
98 bits = (8*size)/nblocks;
99 nhash = bits*7/10;
100 if(nhash > BloomMaxHash)
101 nhash = BloomMaxHash;
103 if(!nhash)
104 nhash = BloomMaxHash;
105 if(bloominit(&b, size, nil) < 0)
106 sysfatal("bloominit: %r");
107 b.nhash = nhash;
108 bits = nhash*10/7;
109 nblocks = (8*size)/bits;
110 fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size, nhash, nblocks);
111 b.data = vtmallocz(size);
112 b.part = part;
113 if(writebloom(&b) < 0)
114 sysfatal("writing %s: %r", file);
115 threadexitsall(0);