Blame


1 b343c297 2021-10-11 stsp /*
2 b343c297 2021-10-11 stsp * Copyright (c) 2012-2019, Jyri J. Virkki
3 b343c297 2021-10-11 stsp * All rights reserved.
4 b343c297 2021-10-11 stsp *
5 b343c297 2021-10-11 stsp * Redistribution and use in source and binary forms, with or without
6 b343c297 2021-10-11 stsp * modification, are permitted provided that the following conditions are
7 b343c297 2021-10-11 stsp * met:
8 b343c297 2021-10-11 stsp *
9 b343c297 2021-10-11 stsp * 1. Redistributions of source code must retain the above copyright
10 b343c297 2021-10-11 stsp * notice, this list of conditions and the following disclaimer.
11 b343c297 2021-10-11 stsp *
12 b343c297 2021-10-11 stsp * 2. Redistributions in binary form must reproduce the above copyright
13 b343c297 2021-10-11 stsp * notice, this list of conditions and the following disclaimer in the
14 b343c297 2021-10-11 stsp * documentation and/or other materials provided with the distribution.
15 b343c297 2021-10-11 stsp *
16 b343c297 2021-10-11 stsp * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 b343c297 2021-10-11 stsp * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 b343c297 2021-10-11 stsp * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 b343c297 2021-10-11 stsp * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 b343c297 2021-10-11 stsp * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 b343c297 2021-10-11 stsp * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 b343c297 2021-10-11 stsp * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 b343c297 2021-10-11 stsp * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 b343c297 2021-10-11 stsp * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 b343c297 2021-10-11 stsp * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 b343c297 2021-10-11 stsp * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 b343c297 2021-10-11 stsp */
28 b343c297 2021-10-11 stsp
29 b343c297 2021-10-11 stsp /* Obtained from https://github.com/jvirkki/libbloom */
30 b343c297 2021-10-11 stsp
31 b343c297 2021-10-11 stsp /*
32 b343c297 2021-10-11 stsp * Refer to bloom.h for documentation on the public interfaces.
33 b343c297 2021-10-11 stsp */
34 b343c297 2021-10-11 stsp
35 b343c297 2021-10-11 stsp #include <assert.h>
36 b343c297 2021-10-11 stsp #include <fcntl.h>
37 b343c297 2021-10-11 stsp #include <math.h>
38 b343c297 2021-10-11 stsp #include <stdint.h>
39 b343c297 2021-10-11 stsp #include <stdio.h>
40 b343c297 2021-10-11 stsp #include <stdlib.h>
41 b343c297 2021-10-11 stsp #include <string.h>
42 b343c297 2021-10-11 stsp #include <sys/stat.h>
43 b343c297 2021-10-11 stsp #include <sys/types.h>
44 b343c297 2021-10-11 stsp #include <unistd.h>
45 b343c297 2021-10-11 stsp
46 b343c297 2021-10-11 stsp #include "bloom.h"
47 b343c297 2021-10-11 stsp #include "murmurhash2.h"
48 b343c297 2021-10-11 stsp
49 b343c297 2021-10-11 stsp #define MAKESTRING(n) STRING(n)
50 b343c297 2021-10-11 stsp #define STRING(n) #n
51 b343c297 2021-10-11 stsp
52 b343c297 2021-10-11 stsp
53 b343c297 2021-10-11 stsp inline static int test_bit_set_bit(unsigned char * buf,
54 b343c297 2021-10-11 stsp unsigned int x, int set_bit)
55 b343c297 2021-10-11 stsp {
56 b343c297 2021-10-11 stsp unsigned int byte = x >> 3;
57 b343c297 2021-10-11 stsp unsigned char c = buf[byte]; // expensive memory access
58 b343c297 2021-10-11 stsp unsigned int mask = 1 << (x % 8);
59 b343c297 2021-10-11 stsp
60 b343c297 2021-10-11 stsp if (c & mask) {
61 b343c297 2021-10-11 stsp return 1;
62 b343c297 2021-10-11 stsp } else {
63 b343c297 2021-10-11 stsp if (set_bit) {
64 b343c297 2021-10-11 stsp buf[byte] = c | mask;
65 b343c297 2021-10-11 stsp }
66 b343c297 2021-10-11 stsp return 0;
67 b343c297 2021-10-11 stsp }
68 b343c297 2021-10-11 stsp }
69 b343c297 2021-10-11 stsp
70 b343c297 2021-10-11 stsp
71 b343c297 2021-10-11 stsp static int bloom_check_add(struct bloom * bloom,
72 b343c297 2021-10-11 stsp const void * buffer, int len, int add)
73 b343c297 2021-10-11 stsp {
74 b343c297 2021-10-11 stsp if (bloom->ready == 0) {
75 b343c297 2021-10-11 stsp printf("bloom at %p not initialized!\n", (void *)bloom);
76 b343c297 2021-10-11 stsp return -1;
77 b343c297 2021-10-11 stsp }
78 b343c297 2021-10-11 stsp
79 b343c297 2021-10-11 stsp int hits = 0;
80 d6a28ffe 2022-05-20 op register unsigned int a = murmurhash2(buffer, len, bloom->seed);
81 b343c297 2021-10-11 stsp register unsigned int b = murmurhash2(buffer, len, a);
82 b343c297 2021-10-11 stsp register unsigned int x;
83 b343c297 2021-10-11 stsp register unsigned int i;
84 b343c297 2021-10-11 stsp
85 b343c297 2021-10-11 stsp for (i = 0; i < bloom->hashes; i++) {
86 b343c297 2021-10-11 stsp x = (a + i*b) % bloom->bits;
87 b343c297 2021-10-11 stsp if (test_bit_set_bit(bloom->bf, x, add)) {
88 b343c297 2021-10-11 stsp hits++;
89 b343c297 2021-10-11 stsp } else if (!add) {
90 b343c297 2021-10-11 stsp // Don't care about the presence of all the bits. Just our own.
91 b343c297 2021-10-11 stsp return 0;
92 b343c297 2021-10-11 stsp }
93 b343c297 2021-10-11 stsp }
94 b343c297 2021-10-11 stsp
95 b343c297 2021-10-11 stsp if (hits == bloom->hashes) {
96 b343c297 2021-10-11 stsp return 1; // 1 == element already in (or collision)
97 b343c297 2021-10-11 stsp }
98 b343c297 2021-10-11 stsp
99 b343c297 2021-10-11 stsp return 0;
100 b343c297 2021-10-11 stsp }
101 b343c297 2021-10-11 stsp
102 b343c297 2021-10-11 stsp
103 b343c297 2021-10-11 stsp int bloom_init_size(struct bloom * bloom, int entries, double error,
104 b343c297 2021-10-11 stsp unsigned int cache_size)
105 b343c297 2021-10-11 stsp {
106 b343c297 2021-10-11 stsp return bloom_init(bloom, entries, error);
107 b343c297 2021-10-11 stsp }
108 b343c297 2021-10-11 stsp
109 b343c297 2021-10-11 stsp
110 b343c297 2021-10-11 stsp int bloom_init(struct bloom * bloom, int entries, double error)
111 b343c297 2021-10-11 stsp {
112 b343c297 2021-10-11 stsp bloom->ready = 0;
113 b343c297 2021-10-11 stsp
114 d6a28ffe 2022-05-20 op bloom->seed = arc4random();
115 d6a28ffe 2022-05-20 op
116 b343c297 2021-10-11 stsp if (entries < 1000 || error == 0) {
117 b343c297 2021-10-11 stsp return 1;
118 b343c297 2021-10-11 stsp }
119 b343c297 2021-10-11 stsp
120 b343c297 2021-10-11 stsp bloom->entries = entries;
121 b343c297 2021-10-11 stsp bloom->error = error;
122 b343c297 2021-10-11 stsp
123 b343c297 2021-10-11 stsp double num = log(bloom->error);
124 b343c297 2021-10-11 stsp double denom = 0.480453013918201; // ln(2)^2
125 b343c297 2021-10-11 stsp bloom->bpe = -(num / denom);
126 b343c297 2021-10-11 stsp
127 b343c297 2021-10-11 stsp double dentries = (double)entries;
128 b343c297 2021-10-11 stsp bloom->bits = (int)(dentries * bloom->bpe);
129 b343c297 2021-10-11 stsp
130 b343c297 2021-10-11 stsp if (bloom->bits % 8) {
131 b343c297 2021-10-11 stsp bloom->bytes = (bloom->bits / 8) + 1;
132 b343c297 2021-10-11 stsp } else {
133 b343c297 2021-10-11 stsp bloom->bytes = bloom->bits / 8;
134 b343c297 2021-10-11 stsp }
135 b343c297 2021-10-11 stsp
136 b343c297 2021-10-11 stsp bloom->hashes = (int)ceil(0.693147180559945 * bloom->bpe); // ln(2)
137 b343c297 2021-10-11 stsp
138 b343c297 2021-10-11 stsp bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char));
139 b343c297 2021-10-11 stsp if (bloom->bf == NULL) { // LCOV_EXCL_START
140 b343c297 2021-10-11 stsp return 1;
141 b343c297 2021-10-11 stsp } // LCOV_EXCL_STOP
142 b343c297 2021-10-11 stsp
143 b343c297 2021-10-11 stsp bloom->ready = 1;
144 b343c297 2021-10-11 stsp return 0;
145 b343c297 2021-10-11 stsp }
146 b343c297 2021-10-11 stsp
147 b343c297 2021-10-11 stsp
148 b343c297 2021-10-11 stsp int bloom_check(struct bloom * bloom, const void * buffer, int len)
149 b343c297 2021-10-11 stsp {
150 b343c297 2021-10-11 stsp return bloom_check_add(bloom, buffer, len, 0);
151 b343c297 2021-10-11 stsp }
152 b343c297 2021-10-11 stsp
153 b343c297 2021-10-11 stsp
154 b343c297 2021-10-11 stsp int bloom_add(struct bloom * bloom, const void * buffer, int len)
155 b343c297 2021-10-11 stsp {
156 b343c297 2021-10-11 stsp return bloom_check_add(bloom, buffer, len, 1);
157 b343c297 2021-10-11 stsp }
158 b343c297 2021-10-11 stsp
159 b343c297 2021-10-11 stsp
160 b343c297 2021-10-11 stsp void bloom_print(struct bloom * bloom)
161 b343c297 2021-10-11 stsp {
162 b343c297 2021-10-11 stsp printf("bloom at %p\n", (void *)bloom);
163 b343c297 2021-10-11 stsp printf(" ->entries = %d\n", bloom->entries);
164 b343c297 2021-10-11 stsp printf(" ->error = %f\n", bloom->error);
165 b343c297 2021-10-11 stsp printf(" ->bits = %d\n", bloom->bits);
166 b343c297 2021-10-11 stsp printf(" ->bits per elem = %f\n", bloom->bpe);
167 b343c297 2021-10-11 stsp printf(" ->bytes = %d\n", bloom->bytes);
168 b343c297 2021-10-11 stsp printf(" ->hash functions = %d\n", bloom->hashes);
169 b343c297 2021-10-11 stsp }
170 b343c297 2021-10-11 stsp
171 b343c297 2021-10-11 stsp
172 b343c297 2021-10-11 stsp void bloom_free(struct bloom * bloom)
173 b343c297 2021-10-11 stsp {
174 b343c297 2021-10-11 stsp if (bloom->ready) {
175 b343c297 2021-10-11 stsp free(bloom->bf);
176 b343c297 2021-10-11 stsp }
177 b343c297 2021-10-11 stsp bloom->ready = 0;
178 b343c297 2021-10-11 stsp }
179 b343c297 2021-10-11 stsp
180 b343c297 2021-10-11 stsp
181 b343c297 2021-10-11 stsp int bloom_reset(struct bloom * bloom)
182 b343c297 2021-10-11 stsp {
183 b343c297 2021-10-11 stsp if (!bloom->ready) return 1;
184 b343c297 2021-10-11 stsp memset(bloom->bf, 0, bloom->bytes);
185 b343c297 2021-10-11 stsp return 0;
186 b343c297 2021-10-11 stsp }