Blob


1 /*
2 * Copyright (c) 2019, 2020 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/queue.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <stdint.h>
22 #include <sha1.h>
23 #include <sha2.h>
24 #include <stdio.h>
25 #include <zlib.h>
26 #include <limits.h>
27 #include <time.h>
28 #include <errno.h>
29 #include <siphash.h>
31 #include "got_object.h"
32 #include "got_error.h"
34 #include "got_lib_delta.h"
35 #include "got_lib_inflate.h"
36 #include "got_lib_object.h"
37 #include "got_lib_delta_cache.h"
39 #ifndef nitems
40 #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
41 #endif
43 #define GOT_DELTA_CACHE_MIN_BUCKETS 64
44 #define GOT_DELTA_CACHE_MAX_BUCKETS 2048
45 #define GOT_DELTA_CACHE_MAX_CHAIN 2
46 #define GOT_DELTA_CACHE_MAX_DELTA_SIZE 1024
47 #define GOT_DELTA_CACHE_MAX_FULLTEXT_SIZE 524288
50 struct got_cached_delta {
51 off_t offset;
52 uint8_t *data;
53 size_t len;
54 uint8_t *fulltext;
55 size_t fulltext_len;
56 };
58 struct got_delta_cache_head {
59 struct got_cached_delta entries[GOT_DELTA_CACHE_MAX_CHAIN];
60 unsigned int nchain;
61 };
63 struct got_delta_cache {
64 struct got_delta_cache_head *buckets;
65 unsigned int nbuckets;
66 unsigned int totelem;
67 int cache_search;
68 int cache_hit;
69 int cache_hit_fulltext;
70 int cache_miss;
71 int cache_evict;
72 int cache_toolarge;
73 int cache_toolarge_fulltext;
74 int cache_maxtoolarge;
75 int cache_maxtoolarge_fulltext;
76 unsigned int flags;
77 #define GOT_DELTA_CACHE_F_NOMEM 0x01
78 SIPHASH_KEY key;
79 };
81 const struct got_error *
82 got_delta_cache_alloc(struct got_delta_cache **new)
83 {
84 const struct got_error *err;
85 struct got_delta_cache *cache;
87 *new = NULL;
89 cache = calloc(1, sizeof(*cache));
90 if (cache == NULL)
91 return got_error_from_errno("calloc");
93 cache->buckets = calloc(GOT_DELTA_CACHE_MIN_BUCKETS,
94 sizeof(cache->buckets[0]));
95 if (cache->buckets == NULL) {
96 err = got_error_from_errno("calloc");
97 free(cache);
98 return err;
99 }
100 cache->nbuckets = GOT_DELTA_CACHE_MIN_BUCKETS;
102 arc4random_buf(&cache->key, sizeof(cache->key));
103 *new = cache;
104 return NULL;
107 void
108 got_delta_cache_free(struct got_delta_cache *cache)
110 struct got_cached_delta *delta;
111 unsigned int i;
113 #ifdef GOT_DELTA_CACHE_DEBUG
114 fprintf(stderr, "%s: delta cache: %u elements, %d searches, %d hits, "
115 "%d fulltext hits, %d missed, %d evicted, %d too large (max %d), "
116 "%d too large fulltext (max %d)\n",
117 getprogname(), cache->totelem, cache->cache_search,
118 cache->cache_hit, cache->cache_hit_fulltext,
119 cache->cache_miss, cache->cache_evict, cache->cache_toolarge,
120 cache->cache_maxtoolarge,
121 cache->cache_toolarge_fulltext,
122 cache->cache_maxtoolarge_fulltext);
123 #endif
124 for (i = 0; i < cache->nbuckets; i++) {
125 struct got_delta_cache_head *head;
126 int j;
127 head = &cache->buckets[i];
128 for (j = 0; j < head->nchain; j++) {
129 delta = &head->entries[j];
130 free(delta->data);
133 free(cache->buckets);
134 free(cache);
137 static uint64_t
138 delta_cache_hash(struct got_delta_cache *cache, off_t delta_offset)
140 return SipHash24(&cache->key, &delta_offset, sizeof(delta_offset));
143 #ifndef GOT_NO_DELTA_CACHE
144 static const struct got_error *
145 delta_cache_resize(struct got_delta_cache *cache, unsigned int nbuckets)
147 struct got_delta_cache_head *buckets;
148 size_t i;
150 buckets = calloc(nbuckets, sizeof(buckets[0]));
151 if (buckets == NULL) {
152 if (errno != ENOMEM)
153 return got_error_from_errno("calloc");
154 /* Proceed with our current amount of hash buckets. */
155 cache->flags |= GOT_DELTA_CACHE_F_NOMEM;
156 return NULL;
159 arc4random_buf(&cache->key, sizeof(cache->key));
161 for (i = 0; i < cache->nbuckets; i++) {
162 unsigned int j;
163 for (j = 0; j < cache->buckets[i].nchain; j++) {
164 struct got_delta_cache_head *head;
165 struct got_cached_delta *delta;
166 uint64_t idx;
167 delta = &cache->buckets[i].entries[j];
168 idx = delta_cache_hash(cache, delta->offset) % nbuckets;
169 head = &buckets[idx];
170 if (head->nchain < nitems(head->entries)) {
171 struct got_cached_delta *new_delta;
172 new_delta = &head->entries[head->nchain];
173 memcpy(new_delta, delta, sizeof(*new_delta));
174 head->nchain++;
175 } else {
176 free(delta->data);
177 cache->totelem--;
182 free(cache->buckets);
183 cache->buckets = buckets;
184 cache->nbuckets = nbuckets;
185 return NULL;
188 static const struct got_error *
189 delta_cache_grow(struct got_delta_cache *cache)
191 unsigned int nbuckets;
193 if ((cache->flags & GOT_DELTA_CACHE_F_NOMEM) ||
194 cache->nbuckets == GOT_DELTA_CACHE_MAX_BUCKETS)
195 return NULL;
197 if (cache->nbuckets >= GOT_DELTA_CACHE_MAX_BUCKETS / 2)
198 nbuckets = GOT_DELTA_CACHE_MAX_BUCKETS;
199 else
200 nbuckets = cache->nbuckets * 2;
202 return delta_cache_resize(cache, nbuckets);
204 #endif
206 const struct got_error *
207 got_delta_cache_add(struct got_delta_cache *cache,
208 off_t delta_data_offset, uint8_t *delta_data, size_t delta_len)
210 #ifdef GOT_NO_DELTA_CACHE
211 return got_error(GOT_ERR_NO_SPACE);
212 #else
213 const struct got_error *err = NULL;
214 struct got_cached_delta *delta;
215 struct got_delta_cache_head *head;
216 uint64_t idx;
218 if (delta_len > GOT_DELTA_CACHE_MAX_DELTA_SIZE) {
219 cache->cache_toolarge++;
220 if (delta_len > cache->cache_maxtoolarge)
221 cache->cache_maxtoolarge = delta_len;
222 return got_error(GOT_ERR_NO_SPACE);
225 if (cache->nbuckets * 3 < cache->totelem * 4) {
226 err = delta_cache_grow(cache);
227 if (err)
228 return err;
231 idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets;
232 head = &cache->buckets[idx];
233 if (head->nchain >= nitems(head->entries)) {
234 delta = &head->entries[head->nchain - 1];
235 free(delta->data);
236 free(delta->fulltext);
237 memset(delta, 0, sizeof(*delta));
238 head->nchain--;
239 cache->totelem--;
240 cache->cache_evict++;
243 delta = &head->entries[head->nchain];
244 delta->offset = delta_data_offset;
245 delta->data = delta_data;
246 delta->len = delta_len;
247 delta->fulltext = NULL;
248 delta->fulltext_len = 0;
249 head->nchain++;
250 cache->totelem++;
252 return NULL;
253 #endif
256 const struct got_error *
257 got_delta_cache_add_fulltext(struct got_delta_cache *cache,
258 off_t delta_data_offset, uint8_t *fulltext, size_t fulltext_len)
260 #ifdef GOT_NO_DELTA_CACHE
261 return got_error(GOT_ERR_NO_SPACE);
262 #else
263 struct got_cached_delta *delta;
264 struct got_delta_cache_head *head;
265 uint64_t idx;
266 int i;
268 if (fulltext_len > GOT_DELTA_CACHE_MAX_FULLTEXT_SIZE) {
269 cache->cache_toolarge_fulltext++;
270 if (fulltext_len > cache->cache_maxtoolarge)
271 cache->cache_maxtoolarge_fulltext = fulltext_len;
272 return got_error(GOT_ERR_NO_SPACE);
275 idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets;
276 head = &cache->buckets[idx];
278 for (i = 0; i < head->nchain; i++) {
279 delta = &head->entries[i];
280 if (delta->offset != delta_data_offset)
281 continue;
282 if (i > 0) {
283 struct got_cached_delta tmp;
285 memcpy(&tmp, &head->entries[0], sizeof(tmp));
286 memcpy(&head->entries[0], &head->entries[i],
287 sizeof(head->entries[0]));
288 memcpy(&head->entries[i], &tmp,
289 sizeof(head->entries[i]));
290 delta = &head->entries[0];
292 delta->fulltext = malloc(fulltext_len);
293 if (delta->fulltext == NULL)
294 return got_error_from_errno("malloc");
295 memcpy(delta->fulltext, fulltext, fulltext_len);
296 delta->fulltext_len = fulltext_len;
297 break;
300 return NULL;
301 #endif
304 void
305 got_delta_cache_get(uint8_t **delta_data, size_t *delta_len,
306 uint8_t **fulltext, size_t *fulltext_len,
307 struct got_delta_cache *cache, off_t delta_data_offset)
309 uint64_t idx;
310 struct got_delta_cache_head *head;
311 struct got_cached_delta *delta;
312 int i;
314 idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets;
315 head = &cache->buckets[idx];
317 cache->cache_search++;
318 *delta_data = NULL;
319 *delta_len = 0;
320 if (fulltext)
321 *fulltext = NULL;
322 if (fulltext_len)
323 *fulltext_len = 0;
324 for (i = 0; i < head->nchain; i++) {
325 delta = &head->entries[i];
326 if (delta->offset != delta_data_offset)
327 continue;
328 cache->cache_hit++;
329 if (i > 0) {
330 struct got_cached_delta tmp;
331 memcpy(&tmp, &head->entries[0], sizeof(tmp));
332 memcpy(&head->entries[0], &head->entries[i],
333 sizeof(head->entries[0]));
334 memcpy(&head->entries[i], &tmp,
335 sizeof(head->entries[i]));
336 delta = &head->entries[0];
338 *delta_data = delta->data;
339 *delta_len = delta->len;
340 if (fulltext && fulltext_len &&
341 delta->fulltext && delta->fulltext_len) {
342 *fulltext = delta->fulltext;
343 *fulltext_len = delta->fulltext_len;
344 cache->cache_hit_fulltext++;
347 return;
350 cache->cache_miss++;