Blob


1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
5 typedef struct ASum ASum;
7 struct ASum
8 {
9 Arena *arena;
10 ASum *next;
11 };
13 static void sealarena(Arena *arena);
14 static int okarena(Arena *arena);
15 static int loadarena(Arena *arena);
16 static CIBlock *getcib(Arena *arena, int clump, int writing, CIBlock *rock);
17 static void putcib(Arena *arena, CIBlock *cib);
18 static void sumproc(void *);
20 static QLock sumlock;
21 static Rendez sumwait;
22 static ASum *sumq;
24 int
25 initarenasum(void)
26 {
27 sumwait.l = &sumlock;
29 if(vtproc(sumproc, nil) < 0){
30 seterr(EOk, "can't start arena checksum slave: %r");
31 return -1;
32 }
33 return 0;
34 }
36 /*
37 * make an Arena, and initialize it based upon the disk header and trailer.
38 */
39 Arena*
40 initarena(Part *part, u64int base, u64int size, u32int blocksize)
41 {
42 Arena *arena;
44 arena = MKZ(Arena);
45 arena->part = part;
46 arena->blocksize = blocksize;
47 arena->clumpmax = arena->blocksize / ClumpInfoSize;
48 arena->base = base + blocksize;
49 arena->size = size - 2 * blocksize;
51 if(loadarena(arena) < 0){
52 seterr(ECorrupt, "arena header or trailer corrupted");
53 freearena(arena);
54 return nil;
55 }
56 if(okarena(arena) < 0){
57 freearena(arena);
58 return nil;
59 }
61 if(arena->sealed && scorecmp(zeroscore, arena->score)==0)
62 backsumarena(arena);
64 return arena;
65 }
67 void
68 freearena(Arena *arena)
69 {
70 if(arena == nil)
71 return;
72 if(arena->cib.data != nil){
73 putdblock(arena->cib.data);
74 arena->cib.data = nil;
75 }
76 free(arena);
77 }
79 Arena*
80 newarena(Part *part, char *name, u64int base, u64int size, u32int blocksize)
81 {
82 Arena *arena;
84 if(nameok(name) < 0){
85 seterr(EOk, "illegal arena name", name);
86 return nil;
87 }
88 arena = MKZ(Arena);
89 arena->part = part;
90 arena->version = ArenaVersion;
91 arena->blocksize = blocksize;
92 arena->clumpmax = arena->blocksize / ClumpInfoSize;
93 arena->base = base + blocksize;
94 arena->size = size - 2 * blocksize;
96 namecp(arena->name, name);
98 if(wbarena(arena)<0 || wbarenahead(arena)<0){
99 freearena(arena);
100 return nil;
103 return arena;
106 int
107 readclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
109 CIBlock *cib, r;
111 cib = getcib(arena, clump, 0, &r);
112 if(cib == nil)
113 return -1;
114 unpackclumpinfo(ci, &cib->data->data[cib->offset]);
115 putcib(arena, cib);
116 return 0;
119 int
120 readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
122 CIBlock *cib, r;
123 int i;
125 for(i = 0; i < n; i++){
126 cib = getcib(arena, clump + i, 0, &r);
127 if(cib == nil)
128 break;
129 unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
130 putcib(arena, cib);
132 return i;
135 /*
136 * write directory information for one clump
137 * must be called the arena locked
138 */
139 int
140 writeclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
142 CIBlock *cib, r;
144 cib = getcib(arena, clump, 1, &r);
145 if(cib == nil)
146 return -1;
147 dirtydblock(cib->data, DirtyArenaCib);
148 packclumpinfo(ci, &cib->data->data[cib->offset]);
149 putcib(arena, cib);
150 return 0;
153 u64int
154 arenadirsize(Arena *arena, u32int clumps)
156 return ((clumps / arena->clumpmax) + 1) * arena->blocksize;
159 /*
160 * read a clump of data
161 * n is a hint of the size of the data, not including the header
162 * make sure it won't run off the end, then return the number of bytes actually read
163 */
164 u32int
165 readarena(Arena *arena, u64int aa, u8int *buf, long n)
167 DBlock *b;
168 u64int a;
169 u32int blocksize, off, m;
170 long nn;
172 if(n == 0)
173 return -1;
175 qlock(&arena->lock);
176 a = arena->size - arenadirsize(arena, arena->clumps);
177 qunlock(&arena->lock);
178 if(aa >= a){
179 seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->clumps, aa, a, arena->size - arenadirsize(arena, arena->clumps - 1));
180 return -1;
182 if(aa + n > a)
183 n = a - aa;
185 blocksize = arena->blocksize;
186 a = arena->base + aa;
187 off = a & (blocksize - 1);
188 a -= off;
189 nn = 0;
190 for(;;){
191 b = getdblock(arena->part, a, 1);
192 if(b == nil)
193 return -1;
194 m = blocksize - off;
195 if(m > n - nn)
196 m = n - nn;
197 memmove(&buf[nn], &b->data[off], m);
198 putdblock(b);
199 nn += m;
200 if(nn == n)
201 break;
202 off = 0;
203 a += blocksize;
205 return n;
208 /*
209 * write some data to the clump section at a given offset
210 * used to fix up corrupted arenas.
211 */
212 u32int
213 writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
215 DBlock *b;
216 u64int a;
217 u32int blocksize, off, m;
218 long nn;
219 int ok;
221 if(n == 0)
222 return -1;
224 qlock(&arena->lock);
225 a = arena->size - arenadirsize(arena, arena->clumps);
226 if(aa >= a || aa + n > a){
227 qunlock(&arena->lock);
228 seterr(EOk, "writing beyond arena clump storage");
229 return -1;
232 blocksize = arena->blocksize;
233 a = arena->base + aa;
234 off = a & (blocksize - 1);
235 a -= off;
236 nn = 0;
237 for(;;){
238 b = getdblock(arena->part, a, off != 0 || off + n < blocksize);
239 if(b == nil){
240 qunlock(&arena->lock);
241 return -1;
243 dirtydblock(b, DirtyArena);
244 m = blocksize - off;
245 if(m > n - nn)
246 m = n - nn;
247 memmove(&b->data[off], &clbuf[nn], m);
248 // ok = writepart(arena->part, a, b->data, blocksize);
249 ok = 0;
250 putdblock(b);
251 if(ok < 0){
252 qunlock(&arena->lock);
253 return -1;
255 nn += m;
256 if(nn == n)
257 break;
258 off = 0;
259 a += blocksize;
261 qunlock(&arena->lock);
262 return n;
265 /*
266 * allocate space for the clump and write it,
267 * updating the arena directory
268 ZZZ question: should this distinguish between an arena
269 filling up and real errors writing the clump?
270 */
271 u64int
272 writeaclump(Arena *arena, Clump *c, u8int *clbuf)
274 DBlock *b;
275 u64int a, aa;
276 u32int clump, n, nn, m, off, blocksize;
277 int ok;
279 n = c->info.size + ClumpSize;
280 qlock(&arena->lock);
281 aa = arena->used;
282 if(arena->sealed
283 || aa + n + U32Size + arenadirsize(arena, arena->clumps + 1) > arena->size){
284 if(!arena->sealed)
285 sealarena(arena);
286 qunlock(&arena->lock);
287 return TWID64;
289 if(packclump(c, &clbuf[0]) < 0){
290 qunlock(&arena->lock);
291 return TWID64;
294 /*
295 * write the data out one block at a time
296 */
297 blocksize = arena->blocksize;
298 a = arena->base + aa;
299 off = a & (blocksize - 1);
300 a -= off;
301 nn = 0;
302 for(;;){
303 b = getdblock(arena->part, a, off != 0);
304 if(b == nil){
305 qunlock(&arena->lock);
306 return TWID64;
308 dirtydblock(b, DirtyArena);
309 m = blocksize - off;
310 if(m > n - nn)
311 m = n - nn;
312 memmove(&b->data[off], &clbuf[nn], m);
313 // ok = writepart(arena->part, a, b->data, blocksize);
314 ok = 0;
315 putdblock(b);
316 if(ok < 0){
317 qunlock(&arena->lock);
318 return TWID64;
320 nn += m;
321 if(nn == n)
322 break;
323 off = 0;
324 a += blocksize;
327 arena->used += c->info.size + ClumpSize;
328 arena->uncsize += c->info.uncsize;
329 if(c->info.size < c->info.uncsize)
330 arena->cclumps++;
332 clump = arena->clumps++;
333 if(arena->clumps == 0)
334 sysfatal("clumps wrapped\n");
335 arena->wtime = now();
336 if(arena->ctime == 0)
337 arena->ctime = arena->wtime;
339 writeclumpinfo(arena, clump, &c->info);
340 //ZZZ make this an enum param
341 if((clump & 0x1ff) == 0x1ff){
342 flushciblocks(arena);
343 wbarena(arena);
346 qunlock(&arena->lock);
347 return aa;
350 /*
351 * once sealed, an arena never has any data added to it.
352 * it should only be changed to fix errors.
353 * this also syncs the clump directory.
354 */
355 static void
356 sealarena(Arena *arena)
358 flushciblocks(arena);
359 flushdcache();
360 arena->sealed = 1;
361 wbarena(arena);
362 backsumarena(arena);
365 void
366 backsumarena(Arena *arena)
368 ASum *as;
370 as = MK(ASum);
371 if(as == nil)
372 return;
373 qlock(&sumlock);
374 as->arena = arena;
375 as->next = sumq;
376 sumq = as;
377 rwakeup(&sumwait);
378 qunlock(&sumlock);
381 static void
382 sumproc(void *unused)
384 ASum *as;
385 Arena *arena;
387 USED(unused);
389 for(;;){
390 qlock(&sumlock);
391 while(sumq == nil)
392 rsleep(&sumwait);
393 as = sumq;
394 sumq = as->next;
395 qunlock(&sumlock);
396 arena = as->arena;
397 free(as);
399 sumarena(arena);
403 void
404 sumarena(Arena *arena)
406 ZBlock *b;
407 DigestState s;
408 u64int a, e;
409 u32int bs;
410 u8int score[VtScoreSize];
412 bs = MaxIoSize;
413 if(bs < arena->blocksize)
414 bs = arena->blocksize;
416 /*
417 * read & sum all blocks except the last one
418 */
419 memset(&s, 0, sizeof s);
420 b = alloczblock(bs, 0);
421 e = arena->base + arena->size;
422 for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
423 if(a + bs > e)
424 bs = arena->blocksize;
425 if(readpart(arena->part, a, b->data, bs) < 0)
426 goto ReadErr;
427 sha1(b->data, bs, nil, &s);
430 /*
431 * the last one is special, since it may already have the checksum included
432 */
433 bs = arena->blocksize;
434 if(readpart(arena->part, e, b->data, bs) < 0){
435 ReadErr:
436 logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a);
437 freezblock(b);
438 return;
441 sha1(b->data, bs-VtScoreSize, nil, &s);
442 sha1(zeroscore, VtScoreSize, nil, &s);
443 sha1(nil, 0, score, &s);
445 /*
446 * check for no checksum or the same
448 * the writepart is okay because we flushed the dcache in sealarena
449 */
450 if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0){
451 if(scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0)
452 logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
453 arena->name, &b->data[bs - VtScoreSize], score);
454 scorecp(&b->data[bs - VtScoreSize], score);
455 if(writepart(arena->part, e, b->data, bs) < 0)
456 logerr(EOk, "sumarena can't write sum for %s: %r", arena->name);
458 freezblock(b);
460 qlock(&arena->lock);
461 scorecp(arena->score, score);
462 qunlock(&arena->lock);
465 /*
466 * write the arena trailer block to the partition
467 */
468 int
469 wbarena(Arena *arena)
471 DBlock *b;
472 int bad;
474 if((b = getdblock(arena->part, arena->base + arena->size, 0)) == nil){
475 logerr(EAdmin, "can't write arena trailer: %r");
476 return -1;
478 dirtydblock(b, DirtyArenaTrailer);
479 bad = okarena(arena)<0 || packarena(arena, b->data)<0;
480 putdblock(b);
481 if(bad)
482 return -1;
483 return 0;
486 int
487 wbarenahead(Arena *arena)
489 ZBlock *b;
490 ArenaHead head;
491 int bad;
493 namecp(head.name, arena->name);
494 head.version = arena->version;
495 head.size = arena->size + 2 * arena->blocksize;
496 head.blocksize = arena->blocksize;
497 b = alloczblock(arena->blocksize, 1);
498 if(b == nil){
499 logerr(EAdmin, "can't write arena header: %r");
500 ///ZZZ add error message?
501 return -1;
503 /*
504 * this writepart is okay because it only happens
505 * during initialization.
506 */
507 bad = packarenahead(&head, b->data)<0 ||
508 writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0;
509 freezblock(b);
510 if(bad)
511 return -1;
512 return 0;
515 /*
516 * read the arena header and trailer blocks from disk
517 */
518 static int
519 loadarena(Arena *arena)
521 ArenaHead head;
522 ZBlock *b;
524 b = alloczblock(arena->blocksize, 0);
525 if(b == nil)
526 return -1;
527 if(readpart(arena->part, arena->base + arena->size, b->data, arena->blocksize) < 0){
528 freezblock(b);
529 return -1;
531 if(unpackarena(arena, b->data) < 0){
532 freezblock(b);
533 return -1;
535 if(arena->version != ArenaVersion){
536 seterr(EAdmin, "unknown arena version %d", arena->version);
537 freezblock(b);
538 return -1;
540 scorecp(arena->score, &b->data[arena->blocksize - VtScoreSize]);
542 if(readpart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize) < 0){
543 logerr(EAdmin, "can't read arena header: %r");
544 freezblock(b);
545 return 0;
547 if(unpackarenahead(&head, b->data) < 0)
548 logerr(ECorrupt, "corrupted arena header: %r");
549 else if(namecmp(arena->name, head.name)!=0
550 || arena->version != head.version
551 || arena->blocksize != head.blocksize
552 || arena->size + 2 * arena->blocksize != head.size)
553 logerr(ECorrupt, "arena header inconsistent with arena data");
554 freezblock(b);
556 return 0;
559 static int
560 okarena(Arena *arena)
562 u64int dsize;
563 int ok;
565 ok = 0;
566 dsize = arenadirsize(arena, arena->clumps);
567 if(arena->used + dsize > arena->size){
568 seterr(ECorrupt, "arena used > size");
569 ok = -1;
572 if(arena->cclumps > arena->clumps)
573 logerr(ECorrupt, "arena has more compressed clumps than total clumps");
575 if(arena->uncsize + arena->clumps * ClumpSize + arena->blocksize < arena->used)
576 logerr(ECorrupt, "arena uncompressed size inconsistent with used space %lld %d %lld", arena->uncsize, arena->clumps, arena->used);
578 if(arena->ctime > arena->wtime)
579 logerr(ECorrupt, "arena creation time after last write time");
581 return ok;
584 static CIBlock*
585 getcib(Arena *arena, int clump, int writing, CIBlock *rock)
587 int read;
588 CIBlock *cib;
589 u32int block, off;
591 if(clump >= arena->clumps){
592 seterr(EOk, "clump directory access out of range");
593 return nil;
595 block = clump / arena->clumpmax;
596 off = (clump - block * arena->clumpmax) * ClumpInfoSize;
598 /*
599 if(arena->cib.block == block
600 && arena->cib.data != nil){
601 arena->cib.offset = off;
602 return &arena->cib;
605 if(writing){
606 flushciblocks(arena);
607 cib = &arena->cib;
608 }else
609 cib = rock;
610 */
611 cib = rock;
613 qlock(&stats.lock);
614 stats.cireads++;
615 qunlock(&stats.lock);
617 cib->block = block;
618 cib->offset = off;
620 read = 1;
621 if(writing && off == 0 && clump == arena->clumps-1)
622 read = 0;
624 cib->data = getdblock(arena->part, arena->base + arena->size - (block + 1) * arena->blocksize, read);
625 if(cib->data == nil)
626 return nil;
627 return cib;
630 static void
631 putcib(Arena *arena, CIBlock *cib)
633 if(cib != &arena->cib){
634 putdblock(cib->data);
635 cib->data = nil;
639 /*
640 * must be called with arena locked
642 * cache turned off now that dcache does write caching too.
643 */
644 int
645 flushciblocks(Arena *arena)
647 int ok;
649 if(arena->cib.data == nil)
650 return 0;
651 qlock(&stats.lock);
652 stats.ciwrites++;
653 qunlock(&stats.lock);
654 // ok = writepart(arena->part, arena->base + arena->size - (arena->cib.block + 1) * arena->blocksize, arena->cib.data->data, arena->blocksize);
655 ok = 0;
656 if(ok < 0)
657 seterr(EAdmin, "failed writing arena directory block");
658 putdblock(arena->cib.data);
659 arena->cib.data = nil;
660 return ok;