Blame


1 b343c297 2021-10-11 stsp /*
2 b343c297 2021-10-11 stsp * Copyright (c) 2012-2017, Jyri J. Virkki
3 b343c297 2021-10-11 stsp * All rights reserved.
4 b343c297 2021-10-11 stsp *
5 b343c297 2021-10-11 stsp * Redistribution and use in source and binary forms, with or without
6 b343c297 2021-10-11 stsp * modification, are permitted provided that the following conditions are
7 b343c297 2021-10-11 stsp * met:
8 b343c297 2021-10-11 stsp *
9 b343c297 2021-10-11 stsp * 1. Redistributions of source code must retain the above copyright
10 b343c297 2021-10-11 stsp * notice, this list of conditions and the following disclaimer.
11 b343c297 2021-10-11 stsp *
12 b343c297 2021-10-11 stsp * 2. Redistributions in binary form must reproduce the above copyright
13 b343c297 2021-10-11 stsp * notice, this list of conditions and the following disclaimer in the
14 b343c297 2021-10-11 stsp * documentation and/or other materials provided with the distribution.
15 b343c297 2021-10-11 stsp *
16 b343c297 2021-10-11 stsp * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 b343c297 2021-10-11 stsp * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 b343c297 2021-10-11 stsp * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 b343c297 2021-10-11 stsp * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 b343c297 2021-10-11 stsp * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 b343c297 2021-10-11 stsp * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 b343c297 2021-10-11 stsp * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 b343c297 2021-10-11 stsp * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 b343c297 2021-10-11 stsp * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 b343c297 2021-10-11 stsp * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 b343c297 2021-10-11 stsp * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 b343c297 2021-10-11 stsp */
28 b343c297 2021-10-11 stsp
29 b343c297 2021-10-11 stsp /* Obtained from https://github.com/jvirkki/libbloom */
30 b343c297 2021-10-11 stsp
31 b343c297 2021-10-11 stsp #ifndef _BLOOM_H
32 b343c297 2021-10-11 stsp #define _BLOOM_H
33 b343c297 2021-10-11 stsp
34 b343c297 2021-10-11 stsp #ifdef __cplusplus
35 b343c297 2021-10-11 stsp extern "C" {
36 b343c297 2021-10-11 stsp #endif
37 b343c297 2021-10-11 stsp
38 b343c297 2021-10-11 stsp
39 b343c297 2021-10-11 stsp /** ***************************************************************************
40 b343c297 2021-10-11 stsp * Structure to keep track of one bloom filter. Caller needs to
41 b343c297 2021-10-11 stsp * allocate this and pass it to the functions below. First call for
42 b343c297 2021-10-11 stsp * every struct must be to bloom_init().
43 b343c297 2021-10-11 stsp *
44 b343c297 2021-10-11 stsp */
45 b343c297 2021-10-11 stsp struct bloom
46 b343c297 2021-10-11 stsp {
47 b343c297 2021-10-11 stsp // These fields are part of the public interface of this structure.
48 b343c297 2021-10-11 stsp // Client code may read these values if desired. Client code MUST NOT
49 b343c297 2021-10-11 stsp // modify any of these.
50 b343c297 2021-10-11 stsp int entries;
51 b343c297 2021-10-11 stsp double error;
52 b343c297 2021-10-11 stsp int bits;
53 b343c297 2021-10-11 stsp int bytes;
54 b343c297 2021-10-11 stsp int hashes;
55 d6a28ffe 2022-05-20 op uint32_t seed;
56 b343c297 2021-10-11 stsp
57 b343c297 2021-10-11 stsp // Fields below are private to the implementation. These may go away or
58 b343c297 2021-10-11 stsp // change incompatibly at any moment. Client code MUST NOT access or rely
59 b343c297 2021-10-11 stsp // on these.
60 b343c297 2021-10-11 stsp double bpe;
61 b343c297 2021-10-11 stsp unsigned char * bf;
62 b343c297 2021-10-11 stsp int ready;
63 b343c297 2021-10-11 stsp };
64 b343c297 2021-10-11 stsp
65 b343c297 2021-10-11 stsp
66 b343c297 2021-10-11 stsp /** ***************************************************************************
67 b343c297 2021-10-11 stsp * Initialize the bloom filter for use.
68 b343c297 2021-10-11 stsp *
69 b343c297 2021-10-11 stsp * The filter is initialized with a bit field and number of hash functions
70 b343c297 2021-10-11 stsp * according to the computations from the wikipedia entry:
71 b343c297 2021-10-11 stsp * http://en.wikipedia.org/wiki/Bloom_filter
72 b343c297 2021-10-11 stsp *
73 b343c297 2021-10-11 stsp * Optimal number of bits is:
74 b343c297 2021-10-11 stsp * bits = (entries * ln(error)) / ln(2)^2
75 b343c297 2021-10-11 stsp *
76 b343c297 2021-10-11 stsp * Optimal number of hash functions is:
77 b343c297 2021-10-11 stsp * hashes = bpe * ln(2)
78 b343c297 2021-10-11 stsp *
79 b343c297 2021-10-11 stsp * Parameters:
80 b343c297 2021-10-11 stsp * -----------
81 b343c297 2021-10-11 stsp * bloom - Pointer to an allocated struct bloom (see above).
82 b343c297 2021-10-11 stsp * entries - The expected number of entries which will be inserted.
83 b343c297 2021-10-11 stsp * Must be at least 1000 (in practice, likely much larger).
84 b343c297 2021-10-11 stsp * error - Probability of collision (as long as entries are not
85 b343c297 2021-10-11 stsp * exceeded).
86 b343c297 2021-10-11 stsp *
87 b343c297 2021-10-11 stsp * Return:
88 b343c297 2021-10-11 stsp * -------
89 b343c297 2021-10-11 stsp * 0 - on success
90 b343c297 2021-10-11 stsp * 1 - on failure
91 b343c297 2021-10-11 stsp *
92 b343c297 2021-10-11 stsp */
93 b343c297 2021-10-11 stsp int bloom_init(struct bloom * bloom, int entries, double error);
94 b343c297 2021-10-11 stsp
95 b343c297 2021-10-11 stsp
96 b343c297 2021-10-11 stsp /** ***************************************************************************
97 b343c297 2021-10-11 stsp * Deprecated, use bloom_init()
98 b343c297 2021-10-11 stsp *
99 b343c297 2021-10-11 stsp */
100 b343c297 2021-10-11 stsp int bloom_init_size(struct bloom * bloom, int entries, double error,
101 b343c297 2021-10-11 stsp unsigned int cache_size);
102 b343c297 2021-10-11 stsp
103 b343c297 2021-10-11 stsp
104 b343c297 2021-10-11 stsp /** ***************************************************************************
105 b343c297 2021-10-11 stsp * Check if the given element is in the bloom filter. Remember this may
106 b343c297 2021-10-11 stsp * return false positive if a collision occurred.
107 b343c297 2021-10-11 stsp *
108 b343c297 2021-10-11 stsp * Parameters:
109 b343c297 2021-10-11 stsp * -----------
110 b343c297 2021-10-11 stsp * bloom - Pointer to an allocated struct bloom (see above).
111 b343c297 2021-10-11 stsp * buffer - Pointer to buffer containing element to check.
112 b343c297 2021-10-11 stsp * len - Size of 'buffer'.
113 b343c297 2021-10-11 stsp *
114 b343c297 2021-10-11 stsp * Return:
115 b343c297 2021-10-11 stsp * -------
116 b343c297 2021-10-11 stsp * 0 - element is not present
117 b343c297 2021-10-11 stsp * 1 - element is present (or false positive due to collision)
118 b343c297 2021-10-11 stsp * -1 - bloom not initialized
119 b343c297 2021-10-11 stsp *
120 b343c297 2021-10-11 stsp */
121 b343c297 2021-10-11 stsp int bloom_check(struct bloom * bloom, const void * buffer, int len);
122 b343c297 2021-10-11 stsp
123 b343c297 2021-10-11 stsp
124 b343c297 2021-10-11 stsp /** ***************************************************************************
125 b343c297 2021-10-11 stsp * Add the given element to the bloom filter.
126 b343c297 2021-10-11 stsp * The return code indicates if the element (or a collision) was already in,
127 b343c297 2021-10-11 stsp * so for the common check+add use case, no need to call check separately.
128 b343c297 2021-10-11 stsp *
129 b343c297 2021-10-11 stsp * Parameters:
130 b343c297 2021-10-11 stsp * -----------
131 b343c297 2021-10-11 stsp * bloom - Pointer to an allocated struct bloom (see above).
132 b343c297 2021-10-11 stsp * buffer - Pointer to buffer containing element to add.
133 b343c297 2021-10-11 stsp * len - Size of 'buffer'.
134 b343c297 2021-10-11 stsp *
135 b343c297 2021-10-11 stsp * Return:
136 b343c297 2021-10-11 stsp * -------
137 b343c297 2021-10-11 stsp * 0 - element was not present and was added
138 b343c297 2021-10-11 stsp * 1 - element (or a collision) had already been added previously
139 b343c297 2021-10-11 stsp * -1 - bloom not initialized
140 b343c297 2021-10-11 stsp *
141 b343c297 2021-10-11 stsp */
142 b343c297 2021-10-11 stsp int bloom_add(struct bloom * bloom, const void * buffer, int len);
143 b343c297 2021-10-11 stsp
144 b343c297 2021-10-11 stsp
145 b343c297 2021-10-11 stsp /** ***************************************************************************
146 b343c297 2021-10-11 stsp * Print (to stdout) info about this bloom filter. Debugging aid.
147 b343c297 2021-10-11 stsp *
148 b343c297 2021-10-11 stsp */
149 b343c297 2021-10-11 stsp void bloom_print(struct bloom * bloom);
150 b343c297 2021-10-11 stsp
151 b343c297 2021-10-11 stsp
152 b343c297 2021-10-11 stsp /** ***************************************************************************
153 b343c297 2021-10-11 stsp * Deallocate internal storage.
154 b343c297 2021-10-11 stsp *
155 b343c297 2021-10-11 stsp * Upon return, the bloom struct is no longer usable. You may call bloom_init
156 b343c297 2021-10-11 stsp * again on the same struct to reinitialize it again.
157 b343c297 2021-10-11 stsp *
158 b343c297 2021-10-11 stsp * Parameters:
159 b343c297 2021-10-11 stsp * -----------
160 b343c297 2021-10-11 stsp * bloom - Pointer to an allocated struct bloom (see above).
161 b343c297 2021-10-11 stsp *
162 b343c297 2021-10-11 stsp * Return: none
163 b343c297 2021-10-11 stsp *
164 b343c297 2021-10-11 stsp */
165 b343c297 2021-10-11 stsp void bloom_free(struct bloom * bloom);
166 b343c297 2021-10-11 stsp
167 b343c297 2021-10-11 stsp /** ***************************************************************************
168 b343c297 2021-10-11 stsp * Erase internal storage.
169 b343c297 2021-10-11 stsp *
170 b343c297 2021-10-11 stsp * Erases all elements. Upon return, the bloom struct returns to its initial
171 b343c297 2021-10-11 stsp * (initialized) state.
172 b343c297 2021-10-11 stsp *
173 b343c297 2021-10-11 stsp * Parameters:
174 b343c297 2021-10-11 stsp * -----------
175 b343c297 2021-10-11 stsp * bloom - Pointer to an allocated struct bloom (see above).
176 b343c297 2021-10-11 stsp *
177 b343c297 2021-10-11 stsp * Return:
178 b343c297 2021-10-11 stsp * 0 - on success
179 b343c297 2021-10-11 stsp * 1 - on failure
180 b343c297 2021-10-11 stsp *
181 b343c297 2021-10-11 stsp */
182 b343c297 2021-10-11 stsp int bloom_reset(struct bloom * bloom);
183 b343c297 2021-10-11 stsp
184 b343c297 2021-10-11 stsp #ifdef __cplusplus
185 b343c297 2021-10-11 stsp }
186 b343c297 2021-10-11 stsp #endif
187 b343c297 2021-10-11 stsp
188 b343c297 2021-10-11 stsp #endif