Blame


1 a0d146ed 2005-07-12 devnull #include "stdinc.h"
2 a0d146ed 2005-07-12 devnull #include "dat.h"
3 a0d146ed 2005-07-12 devnull #include "fns.h"
4 a0d146ed 2005-07-12 devnull
5 a0d146ed 2005-07-12 devnull Bloom b;
6 a0d146ed 2005-07-12 devnull
7 a0d146ed 2005-07-12 devnull void
8 a0d146ed 2005-07-12 devnull usage(void)
9 a0d146ed 2005-07-12 devnull {
10 9ee00732 2011-06-07 rsc fprint(2, "usage: fmtbloom [-n nblocks | -N nhash] [-s size] file\n");
11 a0d146ed 2005-07-12 devnull threadexitsall(0);
12 a0d146ed 2005-07-12 devnull }
13 a0d146ed 2005-07-12 devnull
14 a0d146ed 2005-07-12 devnull void
15 a0d146ed 2005-07-12 devnull threadmain(int argc, char *argv[])
16 a0d146ed 2005-07-12 devnull {
17 a0d146ed 2005-07-12 devnull Part *part;
18 a0d146ed 2005-07-12 devnull char *file;
19 a0d146ed 2005-07-12 devnull vlong bits, size, size2;
20 a0d146ed 2005-07-12 devnull int nhash;
21 a0d146ed 2005-07-12 devnull vlong nblocks;
22 fa325e9b 2020-01-10 cross
23 a0d146ed 2005-07-12 devnull ventifmtinstall();
24 a0d146ed 2005-07-12 devnull statsinit();
25 a0d146ed 2005-07-12 devnull
26 a0d146ed 2005-07-12 devnull size = 0;
27 3d46902d 2005-10-29 devnull nhash = 0;
28 3d46902d 2005-10-29 devnull nblocks = 0;
29 a0d146ed 2005-07-12 devnull ARGBEGIN{
30 a0d146ed 2005-07-12 devnull case 'n':
31 a0d146ed 2005-07-12 devnull if(nhash || nblocks)
32 a0d146ed 2005-07-12 devnull usage();
33 a0d146ed 2005-07-12 devnull nblocks = unittoull(EARGF(usage()));
34 a0d146ed 2005-07-12 devnull break;
35 a0d146ed 2005-07-12 devnull case 'N':
36 a0d146ed 2005-07-12 devnull if(nhash || nblocks)
37 a0d146ed 2005-07-12 devnull usage();
38 a0d146ed 2005-07-12 devnull nhash = unittoull(EARGF(usage()));
39 a0d146ed 2005-07-12 devnull if(nhash > BloomMaxHash){
40 a0d146ed 2005-07-12 devnull fprint(2, "maximum possible is -N %d", BloomMaxHash);
41 a0d146ed 2005-07-12 devnull usage();
42 a0d146ed 2005-07-12 devnull }
43 a0d146ed 2005-07-12 devnull break;
44 a0d146ed 2005-07-12 devnull case 's':
45 a0d146ed 2005-07-12 devnull size = unittoull(ARGF());
46 a0d146ed 2005-07-12 devnull if(size == ~0)
47 a0d146ed 2005-07-12 devnull usage();
48 a0d146ed 2005-07-12 devnull break;
49 a0d146ed 2005-07-12 devnull default:
50 a0d146ed 2005-07-12 devnull usage();
51 a0d146ed 2005-07-12 devnull break;
52 a0d146ed 2005-07-12 devnull }ARGEND
53 a0d146ed 2005-07-12 devnull
54 a0d146ed 2005-07-12 devnull if(argc != 1)
55 a0d146ed 2005-07-12 devnull usage();
56 a0d146ed 2005-07-12 devnull
57 a0d146ed 2005-07-12 devnull file = argv[0];
58 a0d146ed 2005-07-12 devnull
59 a0d146ed 2005-07-12 devnull part = initpart(file, ORDWR|ODIRECT);
60 a0d146ed 2005-07-12 devnull if(part == nil)
61 a0d146ed 2005-07-12 devnull sysfatal("can't open partition %s: %r", file);
62 a0d146ed 2005-07-12 devnull
63 a0d146ed 2005-07-12 devnull if(size == 0)
64 a0d146ed 2005-07-12 devnull size = part->size;
65 fa325e9b 2020-01-10 cross
66 a0d146ed 2005-07-12 devnull if(size < 1024*1024)
67 a0d146ed 2005-07-12 devnull sysfatal("bloom filter too small");
68 a0d146ed 2005-07-12 devnull
69 a0d146ed 2005-07-12 devnull if(size > MaxBloomSize){
70 a0d146ed 2005-07-12 devnull fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
71 3d46902d 2005-10-29 devnull size, (vlong)MaxBloomSize);
72 a0d146ed 2005-07-12 devnull size = MaxBloomSize;
73 a0d146ed 2005-07-12 devnull }
74 a0d146ed 2005-07-12 devnull if(size&(size-1)){
75 a0d146ed 2005-07-12 devnull for(size2=1; size2<size; size2*=2)
76 a0d146ed 2005-07-12 devnull ;
77 a0d146ed 2005-07-12 devnull size = size2/2;
78 a0d146ed 2005-07-12 devnull fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
79 a0d146ed 2005-07-12 devnull }
80 a0d146ed 2005-07-12 devnull
81 a0d146ed 2005-07-12 devnull if(nblocks){
82 a0d146ed 2005-07-12 devnull /*
83 a0d146ed 2005-07-12 devnull * no use for more than 32 bits per block
84 a0d146ed 2005-07-12 devnull * shoot for less than 64 bits per block
85 a0d146ed 2005-07-12 devnull */
86 a0d146ed 2005-07-12 devnull size2 = size;
87 a0d146ed 2005-07-12 devnull while(size2*8 >= nblocks*64)
88 a0d146ed 2005-07-12 devnull size2 >>= 1;
89 a0d146ed 2005-07-12 devnull if(size2 != size){
90 a0d146ed 2005-07-12 devnull size = size2;
91 a0d146ed 2005-07-12 devnull fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
92 a0d146ed 2005-07-12 devnull size/1024/1024);
93 a0d146ed 2005-07-12 devnull }
94 a0d146ed 2005-07-12 devnull
95 a0d146ed 2005-07-12 devnull /*
96 fa325e9b 2020-01-10 cross * optimal is to use ln 2 times as many hash functions as we have bits per blocks.
97 a0d146ed 2005-07-12 devnull */
98 a0d146ed 2005-07-12 devnull bits = (8*size)/nblocks;
99 a0d146ed 2005-07-12 devnull nhash = bits*7/10;
100 a0d146ed 2005-07-12 devnull if(nhash > BloomMaxHash)
101 a0d146ed 2005-07-12 devnull nhash = BloomMaxHash;
102 a0d146ed 2005-07-12 devnull }
103 a0d146ed 2005-07-12 devnull if(!nhash)
104 a0d146ed 2005-07-12 devnull nhash = BloomMaxHash;
105 a0d146ed 2005-07-12 devnull if(bloominit(&b, size, nil) < 0)
106 a0d146ed 2005-07-12 devnull sysfatal("bloominit: %r");
107 a0d146ed 2005-07-12 devnull b.nhash = nhash;
108 a0d146ed 2005-07-12 devnull bits = nhash*10/7;
109 a0d146ed 2005-07-12 devnull nblocks = (8*size)/bits;
110 4f959600 2006-04-20 devnull fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size/1024/1024, nhash, nblocks);
111 a0d146ed 2005-07-12 devnull b.data = vtmallocz(size);
112 a0d146ed 2005-07-12 devnull b.part = part;
113 a0d146ed 2005-07-12 devnull if(writebloom(&b) < 0)
114 a0d146ed 2005-07-12 devnull sysfatal("writing %s: %r", file);
115 a0d146ed 2005-07-12 devnull threadexitsall(0);
116 a0d146ed 2005-07-12 devnull }