1 6f4d00ee 2013-09-23 0intro #include "stdinc.h"
2 6f4d00ee 2013-09-23 0intro #include "dat.h"
3 6f4d00ee 2013-09-23 0intro #include "fns.h"
4 6f4d00ee 2013-09-23 0intro #include "error.h"
6 6f4d00ee 2013-09-23 0intro #include "9.h" /* for cacheFlush */
8 6f4d00ee 2013-09-23 0intro typedef struct FreeList FreeList;
9 6f4d00ee 2013-09-23 0intro typedef struct BAddr BAddr;
12 6f4d00ee 2013-09-23 0intro BadHeap = ~0,
16 6f4d00ee 2013-09-23 0intro * Store data to the memory cache in c->size blocks
17 6f4d00ee 2013-09-23 0intro * with the block zero extended to fill it out. When writing to
18 6f4d00ee 2013-09-23 0intro * Venti, the block will be zero truncated. The walker will also check
19 6f4d00ee 2013-09-23 0intro * that the block fits within psize or dsize as the case may be.
22 6f4d00ee 2013-09-23 0intro struct Cache
28 6f4d00ee 2013-09-23 0intro Disk *disk;
29 6f4d00ee 2013-09-23 0intro int size; /* block size */
30 6f4d00ee 2013-09-23 0intro int ndmap; /* size of per-block dirty pointer map used in blockWrite */
31 4b576658 2013-09-23 0intro VtConn *z;
32 6f4d00ee 2013-09-23 0intro u32int now; /* ticks for usage timestamps */
33 6f4d00ee 2013-09-23 0intro Block **heads; /* hash table for finding address */
34 6f4d00ee 2013-09-23 0intro int nheap; /* number of available victims */
35 6f4d00ee 2013-09-23 0intro Block **heap; /* heap for locating victims */
36 6f4d00ee 2013-09-23 0intro long nblocks; /* number of blocks allocated */
37 6f4d00ee 2013-09-23 0intro Block *blocks; /* array of block descriptors */
38 6f4d00ee 2013-09-23 0intro u8int *mem; /* memory for all block data & blists */
40 6f4d00ee 2013-09-23 0intro BList *blfree;
41 4b576658 2013-09-23 0intro Rendez blrend;
43 6f4d00ee 2013-09-23 0intro int ndirty; /* number of dirty blocks in the cache */
44 6f4d00ee 2013-09-23 0intro int maxdirty; /* max number of dirty blocks */
45 6f4d00ee 2013-09-23 0intro u32int vers;
47 6f4d00ee 2013-09-23 0intro long hashSize;
49 6f4d00ee 2013-09-23 0intro FreeList *fl;
51 4b576658 2013-09-23 0intro Rendez die; /* daemon threads should die when QLock != nil */
53 4b576658 2013-09-23 0intro Rendez flush;
54 4b576658 2013-09-23 0intro Rendez flushwait;
55 4b576658 2013-09-23 0intro Rendez heapwait;
56 6f4d00ee 2013-09-23 0intro BAddr *baddr;
57 6f4d00ee 2013-09-23 0intro int bw, br, be;
58 6f4d00ee 2013-09-23 0intro int nflush;
60 6f4d00ee 2013-09-23 0intro Periodic *sync;
62 6f4d00ee 2013-09-23 0intro /* unlink daemon */
63 6f4d00ee 2013-09-23 0intro BList *uhead;
64 6f4d00ee 2013-09-23 0intro BList *utail;
65 4b576658 2013-09-23 0intro Rendez unlink;
67 6f4d00ee 2013-09-23 0intro /* block counts */
68 6f4d00ee 2013-09-23 0intro int nused;
69 6f4d00ee 2013-09-23 0intro int ndisk;
72 6f4d00ee 2013-09-23 0intro struct BList {
74 6f4d00ee 2013-09-23 0intro u32int addr;
75 6f4d00ee 2013-09-23 0intro uchar type;
76 6f4d00ee 2013-09-23 0intro u32int tag;
77 6f4d00ee 2013-09-23 0intro u32int epoch;
78 6f4d00ee 2013-09-23 0intro u32int vers;
80 6f4d00ee 2013-09-23 0intro int recurse; /* for block unlink */
82 6f4d00ee 2013-09-23 0intro /* for roll back */
83 6f4d00ee 2013-09-23 0intro int index; /* -1 indicates not valid */
85 6f4d00ee 2013-09-23 0intro uchar score[VtScoreSize];
86 6f4d00ee 2013-09-23 0intro uchar entry[VtEntrySize];
88 6f4d00ee 2013-09-23 0intro BList *next;
91 6f4d00ee 2013-09-23 0intro struct BAddr {
93 6f4d00ee 2013-09-23 0intro u32int addr;
94 6f4d00ee 2013-09-23 0intro u32int vers;
97 6f4d00ee 2013-09-23 0intro struct FreeList {
99 6f4d00ee 2013-09-23 0intro u32int last; /* last block allocated */
100 6f4d00ee 2013-09-23 0intro u32int end; /* end of data partition */
101 6f4d00ee 2013-09-23 0intro u32int nused; /* number of used blocks */
102 6f4d00ee 2013-09-23 0intro u32int epochLow; /* low epoch when last updated nused */
105 6f4d00ee 2013-09-23 0intro static FreeList *flAlloc(u32int end);
106 6f4d00ee 2013-09-23 0intro static void flFree(FreeList *fl);
108 6f4d00ee 2013-09-23 0intro static Block *cacheBumpBlock(Cache *c);
109 6f4d00ee 2013-09-23 0intro static void heapDel(Block*);
110 6f4d00ee 2013-09-23 0intro static void heapIns(Block*);
111 6f4d00ee 2013-09-23 0intro static void cacheCheck(Cache*);
112 6f4d00ee 2013-09-23 0intro static void unlinkThread(void *a);
113 6f4d00ee 2013-09-23 0intro static void flushThread(void *a);
114 6f4d00ee 2013-09-23 0intro static void unlinkBody(Cache *c);
115 6f4d00ee 2013-09-23 0intro static int cacheFlushBlock(Cache *c);
116 6f4d00ee 2013-09-23 0intro static void cacheSync(void*);
117 6f4d00ee 2013-09-23 0intro static BList *blistAlloc(Block*);
118 6f4d00ee 2013-09-23 0intro static void blistFree(Cache*, BList*);
119 6f4d00ee 2013-09-23 0intro static void doRemoveLink(Cache*, BList*);
122 6f4d00ee 2013-09-23 0intro * Mapping from local block type to Venti type
124 6f4d00ee 2013-09-23 0intro int vtType[BtMax] = {
125 6f4d00ee 2013-09-23 0intro VtDataType, /* BtData | 0 */
126 4b576658 2013-09-23 0intro VtDataType+1, /* BtData | 1 */
127 4b576658 2013-09-23 0intro VtDataType+2, /* BtData | 2 */
128 4b576658 2013-09-23 0intro VtDataType+3, /* BtData | 3 */
129 4b576658 2013-09-23 0intro VtDataType+4, /* BtData | 4 */
130 4b576658 2013-09-23 0intro VtDataType+5, /* BtData | 5 */
131 4b576658 2013-09-23 0intro VtDataType+6, /* BtData | 6 */
132 4b576658 2013-09-23 0intro VtDataType+7, /* BtData | 7 */
133 6f4d00ee 2013-09-23 0intro VtDirType, /* BtDir | 0 */
134 4b576658 2013-09-23 0intro VtDirType+1, /* BtDir | 1 */
135 4b576658 2013-09-23 0intro VtDirType+2, /* BtDir | 2 */
136 4b576658 2013-09-23 0intro VtDirType+3, /* BtDir | 3 */
137 4b576658 2013-09-23 0intro VtDirType+4, /* BtDir | 4 */
138 4b576658 2013-09-23 0intro VtDirType+5, /* BtDir | 5 */
139 4b576658 2013-09-23 0intro VtDirType+6, /* BtDir | 6 */
140 4b576658 2013-09-23 0intro VtDirType+7, /* BtDir | 7 */
144 6f4d00ee 2013-09-23 0intro * Allocate the memory cache.
147 4b576658 2013-09-23 0intro cacheAlloc(Disk *disk, VtConn *z, ulong nblocks, int mode)
150 6f4d00ee 2013-09-23 0intro Cache *c;
151 6f4d00ee 2013-09-23 0intro Block *b;
152 6f4d00ee 2013-09-23 0intro BList *bl;
153 6f4d00ee 2013-09-23 0intro u8int *p;
156 4b576658 2013-09-23 0intro c = vtmallocz(sizeof(Cache));
158 6f4d00ee 2013-09-23 0intro /* reasonable number of BList elements */
159 6f4d00ee 2013-09-23 0intro nbl = nblocks * 4;
161 6f4d00ee 2013-09-23 0intro c->ref = 1;
162 6f4d00ee 2013-09-23 0intro c->disk = disk;
163 6f4d00ee 2013-09-23 0intro c->z = z;
164 6f4d00ee 2013-09-23 0intro c->size = diskBlockSize(disk);
165 6f4d00ee 2013-09-23 0intro bwatchSetBlockSize(c->size);
166 6f4d00ee 2013-09-23 0intro /* round c->size up to be a nice multiple */
167 6f4d00ee 2013-09-23 0intro c->size = (c->size + 127) & ~127;
168 6f4d00ee 2013-09-23 0intro c->ndmap = (c->size/20 + 7) / 8;
169 6f4d00ee 2013-09-23 0intro c->nblocks = nblocks;
170 6f4d00ee 2013-09-23 0intro c->hashSize = nblocks;
171 4b576658 2013-09-23 0intro c->heads = vtmallocz(c->hashSize*sizeof(Block*));
172 4b576658 2013-09-23 0intro c->heap = vtmallocz(nblocks*sizeof(Block*));
173 4b576658 2013-09-23 0intro c->blocks = vtmallocz(nblocks*sizeof(Block));
174 4b576658 2013-09-23 0intro c->mem = vtmallocz(nblocks * (c->size + c->ndmap) + nbl * sizeof(BList));
175 4b576658 2013-09-23 0intro c->baddr = vtmallocz(nblocks * sizeof(BAddr));
176 6f4d00ee 2013-09-23 0intro c->mode = mode;
177 6f4d00ee 2013-09-23 0intro c->vers++;
178 6f4d00ee 2013-09-23 0intro p = c->mem;
179 6f4d00ee 2013-09-23 0intro for(i = 0; i < nblocks; i++){
180 6f4d00ee 2013-09-23 0intro b = &c->blocks[i];
181 6f4d00ee 2013-09-23 0intro b->c = c;
182 6f4d00ee 2013-09-23 0intro b->data = p;
183 6f4d00ee 2013-09-23 0intro b->heap = i;
184 4b576658 2013-09-23 0intro b->ioready.l = &b->lk;
185 6f4d00ee 2013-09-23 0intro c->heap[i] = b;
186 6f4d00ee 2013-09-23 0intro p += c->size;
188 6f4d00ee 2013-09-23 0intro c->nheap = nblocks;
189 6f4d00ee 2013-09-23 0intro for(i = 0; i < nbl; i++){
190 6f4d00ee 2013-09-23 0intro bl = (BList*)p;
191 6f4d00ee 2013-09-23 0intro bl->next = c->blfree;
192 6f4d00ee 2013-09-23 0intro c->blfree = bl;
193 6f4d00ee 2013-09-23 0intro p += sizeof(BList);
195 6f4d00ee 2013-09-23 0intro /* separate loop to keep blocks and blists reasonably aligned */
196 6f4d00ee 2013-09-23 0intro for(i = 0; i < nblocks; i++){
197 6f4d00ee 2013-09-23 0intro b = &c->blocks[i];
198 6f4d00ee 2013-09-23 0intro b->dmap = p;
199 6f4d00ee 2013-09-23 0intro p += c->ndmap;
202 4b576658 2013-09-23 0intro c->blrend.l = &c->lk;
204 6f4d00ee 2013-09-23 0intro c->maxdirty = nblocks*(DirtyPercentage*0.01);
206 6f4d00ee 2013-09-23 0intro c->fl = flAlloc(diskSize(disk, PartData));
208 4b576658 2013-09-23 0intro c->unlink.l = &c->lk;
209 4b576658 2013-09-23 0intro c->flush.l = &c->lk;
210 4b576658 2013-09-23 0intro c->flushwait.l = &c->lk;
211 4b576658 2013-09-23 0intro c->heapwait.l = &c->lk;
212 6f4d00ee 2013-09-23 0intro c->sync = periodicAlloc(cacheSync, c, 30*1000);
214 6f4d00ee 2013-09-23 0intro if(mode == OReadWrite){
215 6f4d00ee 2013-09-23 0intro c->ref += 2;
216 4b576658 2013-09-23 0intro proccreate(unlinkThread, c, STACK);
217 4b576658 2013-09-23 0intro proccreate(flushThread, c, STACK);
219 6f4d00ee 2013-09-23 0intro cacheCheck(c);
221 6f4d00ee 2013-09-23 0intro return c;
225 6f4d00ee 2013-09-23 0intro * Free the whole memory cache, flushing all dirty blocks to the disk.
228 6f4d00ee 2013-09-23 0intro cacheFree(Cache *c)
232 6f4d00ee 2013-09-23 0intro /* kill off daemon threads */
233 4b576658 2013-09-23 0intro qlock(&c->lk);
234 4b576658 2013-09-23 0intro c->die.l = &c->lk;
235 6f4d00ee 2013-09-23 0intro periodicKill(c->sync);
236 4b576658 2013-09-23 0intro rwakeup(&c->flush);
237 4b576658 2013-09-23 0intro rwakeup(&c->unlink);
238 6f4d00ee 2013-09-23 0intro while(c->ref > 1)
239 4b576658 2013-09-23 0intro rsleep(&c->die);
241 6f4d00ee 2013-09-23 0intro /* flush everything out */
243 6f4d00ee 2013-09-23 0intro unlinkBody(c);
244 4b576658 2013-09-23 0intro qunlock(&c->lk);
245 6f4d00ee 2013-09-23 0intro while(cacheFlushBlock(c))
247 6f4d00ee 2013-09-23 0intro diskFlush(c->disk);
248 4b576658 2013-09-23 0intro qlock(&c->lk);
249 6f4d00ee 2013-09-23 0intro } while(c->uhead || c->ndirty);
250 4b576658 2013-09-23 0intro qunlock(&c->lk);
252 6f4d00ee 2013-09-23 0intro cacheCheck(c);
254 6f4d00ee 2013-09-23 0intro for(i = 0; i < c->nblocks; i++){
255 6f4d00ee 2013-09-23 0intro assert(c->blocks[i].ref == 0);
257 6f4d00ee 2013-09-23 0intro flFree(c->fl);
258 4b576658 2013-09-23 0intro vtfree(c->baddr);
259 4b576658 2013-09-23 0intro vtfree(c->heads);
260 4b576658 2013-09-23 0intro vtfree(c->blocks);
261 4b576658 2013-09-23 0intro vtfree(c->mem);
262 6f4d00ee 2013-09-23 0intro diskFree(c->disk);
263 6f4d00ee 2013-09-23 0intro /* don't close vtSession */
264 4b576658 2013-09-23 0intro vtfree(c);
267 6f4d00ee 2013-09-23 0intro static void
268 6f4d00ee 2013-09-23 0intro cacheDump(Cache *c)
271 6f4d00ee 2013-09-23 0intro Block *b;
273 6f4d00ee 2013-09-23 0intro for(i = 0; i < c->nblocks; i++){
274 6f4d00ee 2013-09-23 0intro b = &c->blocks[i];
275 6f4d00ee 2013-09-23 0intro fprint(2, "%d. p=%d a=%ud %V t=%d ref=%d state=%s io=%s pc=%#p\n",
276 6f4d00ee 2013-09-23 0intro i, b->part, b->addr, b->score, b->l.type, b->ref,
277 6f4d00ee 2013-09-23 0intro bsStr(b->l.state), bioStr(b->iostate), b->pc);
281 6f4d00ee 2013-09-23 0intro static void
282 6f4d00ee 2013-09-23 0intro cacheCheck(Cache *c)
284 6f4d00ee 2013-09-23 0intro u32int size, now;
285 6f4d00ee 2013-09-23 0intro int i, k, refed;
286 6f4d00ee 2013-09-23 0intro Block *b;
288 6f4d00ee 2013-09-23 0intro size = c->size;
289 6f4d00ee 2013-09-23 0intro now = c->now;
291 6f4d00ee 2013-09-23 0intro for(i = 0; i < c->nheap; i++){
292 6f4d00ee 2013-09-23 0intro if(c->heap[i]->heap != i)
293 4b576658 2013-09-23 0intro sysfatal("mis-heaped at %d: %d", i, c->heap[i]->heap);
294 6f4d00ee 2013-09-23 0intro if(i > 0 && c->heap[(i - 1) >> 1]->used - now > c->heap[i]->used - now)
295 4b576658 2013-09-23 0intro sysfatal("bad heap ordering");
296 6f4d00ee 2013-09-23 0intro k = (i << 1) + 1;
297 6f4d00ee 2013-09-23 0intro if(k < c->nheap && c->heap[i]->used - now > c->heap[k]->used - now)
298 4b576658 2013-09-23 0intro sysfatal("bad heap ordering");
300 6f4d00ee 2013-09-23 0intro if(k < c->nheap && c->heap[i]->used - now > c->heap[k]->used - now)
301 4b576658 2013-09-23 0intro sysfatal("bad heap ordering");
304 6f4d00ee 2013-09-23 0intro refed = 0;
305 6f4d00ee 2013-09-23 0intro for(i = 0; i < c->nblocks; i++){
306 6f4d00ee 2013-09-23 0intro b = &c->blocks[i];
307 6f4d00ee 2013-09-23 0intro if(b->data != &c->mem[i * size])
308 4b576658 2013-09-23 0intro sysfatal("mis-blocked at %d", i);
309 6f4d00ee 2013-09-23 0intro if(b->ref && b->heap == BadHeap){
313 6f4d00ee 2013-09-23 0intro if(c->nheap + refed != c->nblocks){
314 6f4d00ee 2013-09-23 0intro fprint(2, "%s: cacheCheck: nheap %d refed %d nblocks %ld\n", argv0, c->nheap, refed, c->nblocks);
315 6f4d00ee 2013-09-23 0intro cacheDump(c);
317 6f4d00ee 2013-09-23 0intro assert(c->nheap + refed == c->nblocks);
318 6f4d00ee 2013-09-23 0intro refed = 0;
319 6f4d00ee 2013-09-23 0intro for(i = 0; i < c->nblocks; i++){
320 6f4d00ee 2013-09-23 0intro b = &c->blocks[i];
321 6f4d00ee 2013-09-23 0intro if(b->ref){
322 6f4d00ee 2013-09-23 0intro if(1)fprint(2, "%s: p=%d a=%ud %V ref=%d %L\n", argv0, b->part, b->addr, b->score, b->ref, &b->l);
326 6f4d00ee 2013-09-23 0intro if(refed > 0)fprint(2, "%s: cacheCheck: in used %d\n", argv0, refed);
331 6f4d00ee 2013-09-23 0intro * locate the block with the oldest second to last use.
332 6f4d00ee 2013-09-23 0intro * remove it from the heap, and fix up the heap.
334 6f4d00ee 2013-09-23 0intro /* called with c->lk held */
335 6f4d00ee 2013-09-23 0intro static Block *
336 6f4d00ee 2013-09-23 0intro cacheBumpBlock(Cache *c)
338 6f4d00ee 2013-09-23 0intro int printed;
339 6f4d00ee 2013-09-23 0intro Block *b;
342 6f4d00ee 2013-09-23 0intro * locate the block with the oldest second to last use.
343 6f4d00ee 2013-09-23 0intro * remove it from the heap, and fix up the heap.
345 6f4d00ee 2013-09-23 0intro printed = 0;
346 6f4d00ee 2013-09-23 0intro if(c->nheap == 0){
347 6f4d00ee 2013-09-23 0intro while(c->nheap == 0){
348 4b576658 2013-09-23 0intro rwakeup(&c->flush);
349 4b576658 2013-09-23 0intro rsleep(&c->heapwait);
350 6f4d00ee 2013-09-23 0intro if(c->nheap == 0){
351 6f4d00ee 2013-09-23 0intro printed = 1;
352 6f4d00ee 2013-09-23 0intro fprint(2, "%s: entire cache is busy, %d dirty "
353 6f4d00ee 2013-09-23 0intro "-- waking flush thread\n",
354 6f4d00ee 2013-09-23 0intro argv0, c->ndirty);
357 6f4d00ee 2013-09-23 0intro if(printed)
358 6f4d00ee 2013-09-23 0intro fprint(2, "%s: cache is okay again, %d dirty\n",
359 6f4d00ee 2013-09-23 0intro argv0, c->ndirty);
362 6f4d00ee 2013-09-23 0intro b = c->heap[0];
363 6f4d00ee 2013-09-23 0intro heapDel(b);
365 6f4d00ee 2013-09-23 0intro assert(b->heap == BadHeap);
366 6f4d00ee 2013-09-23 0intro assert(b->ref == 0);
367 6f4d00ee 2013-09-23 0intro assert(b->iostate != BioDirty && b->iostate != BioReading && b->iostate != BioWriting);
368 6f4d00ee 2013-09-23 0intro assert(b->prior == nil);
369 6f4d00ee 2013-09-23 0intro assert(b->uhead == nil);
372 6f4d00ee 2013-09-23 0intro * unchain the block from hash chain
374 6f4d00ee 2013-09-23 0intro if(b->prev){
375 6f4d00ee 2013-09-23 0intro *(b->prev) = b->next;
376 6f4d00ee 2013-09-23 0intro if(b->next)
377 6f4d00ee 2013-09-23 0intro b->next->prev = b->prev;
378 6f4d00ee 2013-09-23 0intro b->prev = nil;
382 6f4d00ee 2013-09-23 0intro if(0)fprint(2, "%s: dropping %d:%x:%V\n", argv0, b->part, b->addr, b->score);
383 6f4d00ee 2013-09-23 0intro /* set block to a reasonable state */
384 6f4d00ee 2013-09-23 0intro b->ref = 1;
385 6f4d00ee 2013-09-23 0intro b->part = PartError;
386 6f4d00ee 2013-09-23 0intro memset(&b->l, 0, sizeof(b->l));
387 6f4d00ee 2013-09-23 0intro b->iostate = BioEmpty;
389 6f4d00ee 2013-09-23 0intro return b;
393 6f4d00ee 2013-09-23 0intro * look for a particular version of the block in the memory cache.
395 6f4d00ee 2013-09-23 0intro static Block *
396 6f4d00ee 2013-09-23 0intro _cacheLocalLookup(Cache *c, int part, u32int addr, u32int vers,
397 6f4d00ee 2013-09-23 0intro int waitlock, int *lockfailure)
399 6f4d00ee 2013-09-23 0intro Block *b;
402 6f4d00ee 2013-09-23 0intro h = addr % c->hashSize;
404 6f4d00ee 2013-09-23 0intro if(lockfailure)
405 6f4d00ee 2013-09-23 0intro *lockfailure = 0;
408 6f4d00ee 2013-09-23 0intro * look for the block in the cache
410 4b576658 2013-09-23 0intro qlock(&c->lk);
411 6f4d00ee 2013-09-23 0intro for(b = c->heads[h]; b != nil; b = b->next){
412 6f4d00ee 2013-09-23 0intro if(b->part == part && b->addr == addr)
415 6f4d00ee 2013-09-23 0intro if(b == nil || b->vers != vers){
416 4b576658 2013-09-23 0intro qunlock(&c->lk);
417 6f4d00ee 2013-09-23 0intro return nil;
419 4b576658 2013-09-23 0intro if(!waitlock && !canqlock(&b->lk)){
420 6f4d00ee 2013-09-23 0intro *lockfailure = 1;
421 4b576658 2013-09-23 0intro qunlock(&c->lk);
422 6f4d00ee 2013-09-23 0intro return nil;
424 6f4d00ee 2013-09-23 0intro heapDel(b);
425 6f4d00ee 2013-09-23 0intro b->ref++;
426 4b576658 2013-09-23 0intro qunlock(&c->lk);
428 6f4d00ee 2013-09-23 0intro bwatchLock(b);
429 6f4d00ee 2013-09-23 0intro if(waitlock)
430 4b576658 2013-09-23 0intro qlock(&b->lk);
431 6f4d00ee 2013-09-23 0intro b->nlock = 1;
434 6f4d00ee 2013-09-23 0intro switch(b->iostate){
437 6f4d00ee 2013-09-23 0intro case BioEmpty:
438 6f4d00ee 2013-09-23 0intro case BioLabel:
439 6f4d00ee 2013-09-23 0intro case BioClean:
440 6f4d00ee 2013-09-23 0intro case BioDirty:
441 6f4d00ee 2013-09-23 0intro if(b->vers != vers){
442 6f4d00ee 2013-09-23 0intro blockPut(b);
443 6f4d00ee 2013-09-23 0intro return nil;
445 6f4d00ee 2013-09-23 0intro return b;
446 6f4d00ee 2013-09-23 0intro case BioReading:
447 6f4d00ee 2013-09-23 0intro case BioWriting:
448 4b576658 2013-09-23 0intro rsleep(&b->ioready);
450 6f4d00ee 2013-09-23 0intro case BioVentiError:
451 6f4d00ee 2013-09-23 0intro blockPut(b);
452 4b576658 2013-09-23 0intro werrstr("venti i/o error block 0x%.8ux", addr);
453 6f4d00ee 2013-09-23 0intro return nil;
454 6f4d00ee 2013-09-23 0intro case BioReadError:
455 6f4d00ee 2013-09-23 0intro blockPut(b);
456 4b576658 2013-09-23 0intro werrstr("error reading block 0x%.8ux", addr);
457 6f4d00ee 2013-09-23 0intro return nil;
460 6f4d00ee 2013-09-23 0intro /* NOT REACHED */
465 6f4d00ee 2013-09-23 0intro * fetch a local (on-disk) block from the memory cache.
466 6f4d00ee 2013-09-23 0intro * if it's not there, load it, bumping some other block.
469 6f4d00ee 2013-09-23 0intro _cacheLocal(Cache *c, int part, u32int addr, int mode, u32int epoch)
471 6f4d00ee 2013-09-23 0intro Block *b;
474 6f4d00ee 2013-09-23 0intro assert(part != PartVenti);
476 6f4d00ee 2013-09-23 0intro h = addr % c->hashSize;
479 6f4d00ee 2013-09-23 0intro * look for the block in the cache
481 4b576658 2013-09-23 0intro qlock(&c->lk);
482 6f4d00ee 2013-09-23 0intro for(b = c->heads[h]; b != nil; b = b->next){
483 6f4d00ee 2013-09-23 0intro if(b->part != part || b->addr != addr)
484 6f4d00ee 2013-09-23 0intro continue;
485 6f4d00ee 2013-09-23 0intro if(epoch && b->l.epoch != epoch){
486 6f4d00ee 2013-09-23 0intro fprint(2, "%s: _cacheLocal want epoch %ud got %ud\n", argv0, epoch, b->l.epoch);
487 4b576658 2013-09-23 0intro qunlock(&c->lk);
488 4b576658 2013-09-23 0intro werrstr(ELabelMismatch);
489 6f4d00ee 2013-09-23 0intro return nil;
491 6f4d00ee 2013-09-23 0intro heapDel(b);
492 6f4d00ee 2013-09-23 0intro b->ref++;
496 6f4d00ee 2013-09-23 0intro if(b == nil){
497 6f4d00ee 2013-09-23 0intro b = cacheBumpBlock(c);
499 6f4d00ee 2013-09-23 0intro b->part = part;
500 6f4d00ee 2013-09-23 0intro b->addr = addr;
501 6f4d00ee 2013-09-23 0intro localToGlobal(addr, b->score);
503 6f4d00ee 2013-09-23 0intro /* chain onto correct hash */
504 6f4d00ee 2013-09-23 0intro b->next = c->heads[h];
505 6f4d00ee 2013-09-23 0intro c->heads[h] = b;
506 6f4d00ee 2013-09-23 0intro if(b->next != nil)
507 6f4d00ee 2013-09-23 0intro b->next->prev = &b->next;
508 6f4d00ee 2013-09-23 0intro b->prev = &c->heads[h];
511 4b576658 2013-09-23 0intro qunlock(&c->lk);
514 6f4d00ee 2013-09-23 0intro * BUG: what if the epoch changes right here?
515 6f4d00ee 2013-09-23 0intro * In the worst case, we could end up in some weird
516 6f4d00ee 2013-09-23 0intro * lock loop, because the block we want no longer exists,
517 6f4d00ee 2013-09-23 0intro * and instead we're trying to lock a block we have no
518 6f4d00ee 2013-09-23 0intro * business grabbing.
520 6f4d00ee 2013-09-23 0intro * For now, I'm not going to worry about it.
523 6f4d00ee 2013-09-23 0intro if(0)fprint(2, "%s: cacheLocal: %d: %d %x\n", argv0, getpid(), b->part, b->addr);
524 6f4d00ee 2013-09-23 0intro bwatchLock(b);
525 4b576658 2013-09-23 0intro qlock(&b->lk);
526 6f4d00ee 2013-09-23 0intro b->nlock = 1;
528 6f4d00ee 2013-09-23 0intro if(part == PartData && b->iostate == BioEmpty){
529 6f4d00ee 2013-09-23 0intro if(!readLabel(c, &b->l, addr)){
530 6f4d00ee 2013-09-23 0intro blockPut(b);
531 6f4d00ee 2013-09-23 0intro return nil;
533 6f4d00ee 2013-09-23 0intro blockSetIOState(b, BioLabel);
535 6f4d00ee 2013-09-23 0intro if(epoch && b->l.epoch != epoch){
536 6f4d00ee 2013-09-23 0intro blockPut(b);
537 6f4d00ee 2013-09-23 0intro fprint(2, "%s: _cacheLocal want epoch %ud got %ud\n", argv0, epoch, b->l.epoch);
538 4b576658 2013-09-23 0intro werrstr(ELabelMismatch);
539 6f4d00ee 2013-09-23 0intro return nil;
542 6f4d00ee 2013-09-23 0intro b->pc = getcallerpc(&c);
544 6f4d00ee 2013-09-23 0intro switch(b->iostate){
547 6f4d00ee 2013-09-23 0intro case BioLabel:
548 6f4d00ee 2013-09-23 0intro if(mode == OOverWrite)
550 6f4d00ee 2013-09-23 0intro * leave iostate as BioLabel because data
551 6f4d00ee 2013-09-23 0intro * hasn't been read.
553 6f4d00ee 2013-09-23 0intro return b;
554 6f4d00ee 2013-09-23 0intro /* fall through */
555 6f4d00ee 2013-09-23 0intro case BioEmpty:
556 6f4d00ee 2013-09-23 0intro diskRead(c->disk, b);
557 4b576658 2013-09-23 0intro rsleep(&b->ioready);
559 6f4d00ee 2013-09-23 0intro case BioClean:
560 6f4d00ee 2013-09-23 0intro case BioDirty:
561 6f4d00ee 2013-09-23 0intro return b;
562 6f4d00ee 2013-09-23 0intro case BioReading:
563 6f4d00ee 2013-09-23 0intro case BioWriting:
564 4b576658 2013-09-23 0intro rsleep(&b->ioready);
566 6f4d00ee 2013-09-23 0intro case BioReadError:
567 6f4d00ee 2013-09-23 0intro blockSetIOState(b, BioEmpty);
568 6f4d00ee 2013-09-23 0intro blockPut(b);
569 4b576658 2013-09-23 0intro werrstr("error reading block 0x%.8ux", addr);
570 6f4d00ee 2013-09-23 0intro return nil;
573 6f4d00ee 2013-09-23 0intro /* NOT REACHED */
577 6f4d00ee 2013-09-23 0intro cacheLocal(Cache *c, int part, u32int addr, int mode)
579 6f4d00ee 2013-09-23 0intro return _cacheLocal(c, part, addr, mode, 0);
583 6f4d00ee 2013-09-23 0intro * fetch a local (on-disk) block from the memory cache.
584 6f4d00ee 2013-09-23 0intro * if it's not there, load it, bumping some other block.
585 6f4d00ee 2013-09-23 0intro * check tag and type.
588 6f4d00ee 2013-09-23 0intro cacheLocalData(Cache *c, u32int addr, int type, u32int tag, int mode, u32int epoch)
590 6f4d00ee 2013-09-23 0intro Block *b;
592 6f4d00ee 2013-09-23 0intro b = _cacheLocal(c, PartData, addr, mode, epoch);
593 6f4d00ee 2013-09-23 0intro if(b == nil)
594 6f4d00ee 2013-09-23 0intro return nil;
595 6f4d00ee 2013-09-23 0intro if(b->l.type != type || b->l.tag != tag){
596 6f4d00ee 2013-09-23 0intro fprint(2, "%s: cacheLocalData: addr=%d type got %d exp %d: tag got %ux exp %ux\n",
597 6f4d00ee 2013-09-23 0intro argv0, addr, b->l.type, type, b->l.tag, tag);
598 4b576658 2013-09-23 0intro werrstr(ELabelMismatch);
599 6f4d00ee 2013-09-23 0intro blockPut(b);
600 6f4d00ee 2013-09-23 0intro return nil;
602 6f4d00ee 2013-09-23 0intro b->pc = getcallerpc(&c);
603 6f4d00ee 2013-09-23 0intro return b;
607 6f4d00ee 2013-09-23 0intro * fetch a global (Venti) block from the memory cache.
608 6f4d00ee 2013-09-23 0intro * if it's not there, load it, bumping some other block.
609 6f4d00ee 2013-09-23 0intro * check tag and type if it's really a local block in disguise.
612 6f4d00ee 2013-09-23 0intro cacheGlobal(Cache *c, uchar score[VtScoreSize], int type, u32int tag, int mode)
615 6f4d00ee 2013-09-23 0intro Block *b;
617 6f4d00ee 2013-09-23 0intro u32int addr;
619 6f4d00ee 2013-09-23 0intro addr = globalToLocal(score);
620 6f4d00ee 2013-09-23 0intro if(addr != NilBlock){
621 6f4d00ee 2013-09-23 0intro b = cacheLocalData(c, addr, type, tag, mode, 0);
623 6f4d00ee 2013-09-23 0intro b->pc = getcallerpc(&c);
624 6f4d00ee 2013-09-23 0intro return b;
627 6f4d00ee 2013-09-23 0intro h = (u32int)(score[0]|(score[1]<<8)|(score[2]<<16)|(score[3]<<24)) % c->hashSize;
630 6f4d00ee 2013-09-23 0intro * look for the block in the cache
632 4b576658 2013-09-23 0intro qlock(&c->lk);
633 6f4d00ee 2013-09-23 0intro for(b = c->heads[h]; b != nil; b = b->next){
634 6f4d00ee 2013-09-23 0intro if(b->part != PartVenti || memcmp(b->score, score, VtScoreSize) != 0 || b->l.type != type)
635 6f4d00ee 2013-09-23 0intro continue;
636 6f4d00ee 2013-09-23 0intro heapDel(b);
637 6f4d00ee 2013-09-23 0intro b->ref++;
641 6f4d00ee 2013-09-23 0intro if(b == nil){
642 6f4d00ee 2013-09-23 0intro if(0)fprint(2, "%s: cacheGlobal %V %d\n", argv0, score, type);
644 6f4d00ee 2013-09-23 0intro b = cacheBumpBlock(c);
646 6f4d00ee 2013-09-23 0intro b->part = PartVenti;
647 6f4d00ee 2013-09-23 0intro b->addr = NilBlock;
648 6f4d00ee 2013-09-23 0intro b->l.type = type;
649 6f4d00ee 2013-09-23 0intro memmove(b->score, score, VtScoreSize);
651 6f4d00ee 2013-09-23 0intro /* chain onto correct hash */
652 6f4d00ee 2013-09-23 0intro b->next = c->heads[h];
653 6f4d00ee 2013-09-23 0intro c->heads[h] = b;
654 6f4d00ee 2013-09-23 0intro if(b->next != nil)
655 6f4d00ee 2013-09-23 0intro b->next->prev = &b->next;
656 6f4d00ee 2013-09-23 0intro b->prev = &c->heads[h];
658 4b576658 2013-09-23 0intro qunlock(&c->lk);
660 6f4d00ee 2013-09-23 0intro bwatchLock(b);
661 4b576658 2013-09-23 0intro qlock(&b->lk);
662 6f4d00ee 2013-09-23 0intro b->nlock = 1;
663 6f4d00ee 2013-09-23 0intro b->pc = getcallerpc(&c);
665 6f4d00ee 2013-09-23 0intro switch(b->iostate){
668 6f4d00ee 2013-09-23 0intro case BioEmpty:
669 4b576658 2013-09-23 0intro n = vtread(c->z, score, vtType[type], b->data, c->size);
670 4b576658 2013-09-23 0intro if(n < 0 || vtsha1check(score, b->data, n) < 0){
671 6f4d00ee 2013-09-23 0intro blockSetIOState(b, BioVentiError);
672 6f4d00ee 2013-09-23 0intro blockPut(b);
674 6f4d00ee 2013-09-23 0intro "venti error reading block %V or wrong score: %r",
676 6f4d00ee 2013-09-23 0intro return nil;
678 4b576658 2013-09-23 0intro vtzeroextend(vtType[type], b->data, n, c->size);
679 6f4d00ee 2013-09-23 0intro blockSetIOState(b, BioClean);
680 6f4d00ee 2013-09-23 0intro return b;
681 6f4d00ee 2013-09-23 0intro case BioClean:
682 6f4d00ee 2013-09-23 0intro return b;
683 6f4d00ee 2013-09-23 0intro case BioVentiError:
684 6f4d00ee 2013-09-23 0intro blockPut(b);
685 4b576658 2013-09-23 0intro werrstr("venti i/o error or wrong score, block %V", score);
686 6f4d00ee 2013-09-23 0intro return nil;
687 6f4d00ee 2013-09-23 0intro case BioReadError:
688 6f4d00ee 2013-09-23 0intro blockPut(b);
689 4b576658 2013-09-23 0intro werrstr("error reading block %V", b->score);
690 6f4d00ee 2013-09-23 0intro return nil;
692 6f4d00ee 2013-09-23 0intro /* NOT REACHED */
696 6f4d00ee 2013-09-23 0intro * allocate a new on-disk block and load it into the memory cache.
697 6f4d00ee 2013-09-23 0intro * BUG: if the disk is full, should we flush some of it to Venti?
699 6f4d00ee 2013-09-23 0intro static u32int lastAlloc;
702 6f4d00ee 2013-09-23 0intro cacheAllocBlock(Cache *c, int type, u32int tag, u32int epoch, u32int epochLow)
704 6f4d00ee 2013-09-23 0intro FreeList *fl;
705 6f4d00ee 2013-09-23 0intro u32int addr;
706 6f4d00ee 2013-09-23 0intro Block *b;
707 6f4d00ee 2013-09-23 0intro int n, nwrap;
708 6f4d00ee 2013-09-23 0intro Label lab;
710 6f4d00ee 2013-09-23 0intro n = c->size / LabelSize;
711 6f4d00ee 2013-09-23 0intro fl = c->fl;
713 4b576658 2013-09-23 0intro qlock(&fl->lk);
714 6f4d00ee 2013-09-23 0intro addr = fl->last;
715 6f4d00ee 2013-09-23 0intro b = cacheLocal(c, PartLabel, addr/n, OReadOnly);
716 6f4d00ee 2013-09-23 0intro if(b == nil){
717 4b576658 2013-09-23 0intro fprint(2, "%s: cacheAllocBlock: xxx %r\n", argv0);
718 4b576658 2013-09-23 0intro qunlock(&fl->lk);
719 6f4d00ee 2013-09-23 0intro return nil;
721 6f4d00ee 2013-09-23 0intro nwrap = 0;
723 6f4d00ee 2013-09-23 0intro if(++addr >= fl->end){
724 6f4d00ee 2013-09-23 0intro addr = 0;
725 6f4d00ee 2013-09-23 0intro if(++nwrap >= 2){
726 6f4d00ee 2013-09-23 0intro blockPut(b);
727 4b576658 2013-09-23 0intro werrstr("disk is full");
729 6f4d00ee 2013-09-23 0intro * try to avoid a continuous spew of console
730 6f4d00ee 2013-09-23 0intro * messages.
732 6f4d00ee 2013-09-23 0intro if (fl->last != 0)
733 4b576658 2013-09-23 0intro fprint(2, "%s: cacheAllocBlock: xxx1 %r\n",
735 6f4d00ee 2013-09-23 0intro fl->last = 0;
736 4b576658 2013-09-23 0intro qunlock(&fl->lk);
737 6f4d00ee 2013-09-23 0intro return nil;
740 6f4d00ee 2013-09-23 0intro if(addr%n == 0){
741 6f4d00ee 2013-09-23 0intro blockPut(b);
742 6f4d00ee 2013-09-23 0intro b = cacheLocal(c, PartLabel, addr/n, OReadOnly);
743 6f4d00ee 2013-09-23 0intro if(b == nil){
744 6f4d00ee 2013-09-23 0intro fl->last = addr;
745 4b576658 2013-09-23 0intro fprint(2, "%s: cacheAllocBlock: xxx2 %r\n", argv0);
746 4b576658 2013-09-23 0intro qunlock(&fl->lk);
747 6f4d00ee 2013-09-23 0intro return nil;
750 6f4d00ee 2013-09-23 0intro if(!labelUnpack(&lab, b->data, addr%n))
751 6f4d00ee 2013-09-23 0intro continue;
752 6f4d00ee 2013-09-23 0intro if(lab.state == BsFree)
753 6f4d00ee 2013-09-23 0intro goto Found;
754 6f4d00ee 2013-09-23 0intro if(lab.state&BsClosed)
755 6f4d00ee 2013-09-23 0intro if(lab.epochClose <= epochLow || lab.epoch==lab.epochClose)
756 6f4d00ee 2013-09-23 0intro goto Found;
759 6f4d00ee 2013-09-23 0intro blockPut(b);
760 6f4d00ee 2013-09-23 0intro b = cacheLocal(c, PartData, addr, OOverWrite);
761 6f4d00ee 2013-09-23 0intro if(b == nil){
762 4b576658 2013-09-23 0intro fprint(2, "%s: cacheAllocBlock: xxx3 %r\n", argv0);
763 6f4d00ee 2013-09-23 0intro return nil;
765 6f4d00ee 2013-09-23 0intro assert(b->iostate == BioLabel || b->iostate == BioClean);
766 6f4d00ee 2013-09-23 0intro fl->last = addr;
767 6f4d00ee 2013-09-23 0intro lab.type = type;
768 6f4d00ee 2013-09-23 0intro lab.tag = tag;
769 6f4d00ee 2013-09-23 0intro lab.state = BsAlloc;
770 6f4d00ee 2013-09-23 0intro lab.epoch = epoch;
771 6f4d00ee 2013-09-23 0intro lab.epochClose = ~(u32int)0;
772 6f4d00ee 2013-09-23 0intro if(!blockSetLabel(b, &lab, 1)){
773 4b576658 2013-09-23 0intro fprint(2, "%s: cacheAllocBlock: xxx4 %r\n", argv0);
774 6f4d00ee 2013-09-23 0intro blockPut(b);
775 6f4d00ee 2013-09-23 0intro return nil;
777 4b576658 2013-09-23 0intro vtzeroextend(vtType[type], b->data, 0, c->size);
778 6f4d00ee 2013-09-23 0intro if(0)diskWrite(c->disk, b);
780 6f4d00ee 2013-09-23 0intro if(0)fprint(2, "%s: fsAlloc %ud type=%d tag = %ux\n", argv0, addr, type, tag);
781 6f4d00ee 2013-09-23 0intro lastAlloc = addr;
782 6f4d00ee 2013-09-23 0intro fl->nused++;
783 4b576658 2013-09-23 0intro qunlock(&fl->lk);
784 6f4d00ee 2013-09-23 0intro b->pc = getcallerpc(&c);
785 6f4d00ee 2013-09-23 0intro return b;
789 6f4d00ee 2013-09-23 0intro cacheDirty(Cache *c)
791 6f4d00ee 2013-09-23 0intro return c->ndirty;
795 6f4d00ee 2013-09-23 0intro cacheCountUsed(Cache *c, u32int epochLow, u32int *used, u32int *total, u32int *bsize)
798 6f4d00ee 2013-09-23 0intro u32int addr, nused;
799 6f4d00ee 2013-09-23 0intro Block *b;
800 6f4d00ee 2013-09-23 0intro Label lab;
801 6f4d00ee 2013-09-23 0intro FreeList *fl;
803 6f4d00ee 2013-09-23 0intro fl = c->fl;
804 6f4d00ee 2013-09-23 0intro n = c->size / LabelSize;
805 6f4d00ee 2013-09-23 0intro *bsize = c->size;
806 4b576658 2013-09-23 0intro qlock(&fl->lk);
807 6f4d00ee 2013-09-23 0intro if(fl->epochLow == epochLow){
808 6f4d00ee 2013-09-23 0intro *used = fl->nused;
809 6f4d00ee 2013-09-23 0intro *total = fl->end;
810 4b576658 2013-09-23 0intro qunlock(&fl->lk);
814 6f4d00ee 2013-09-23 0intro nused = 0;
815 6f4d00ee 2013-09-23 0intro for(addr=0; addr<fl->end; addr++){
816 6f4d00ee 2013-09-23 0intro if(addr%n == 0){
817 6f4d00ee 2013-09-23 0intro blockPut(b);
818 6f4d00ee 2013-09-23 0intro b = cacheLocal(c, PartLabel, addr/n, OReadOnly);
819 6f4d00ee 2013-09-23 0intro if(b == nil){
820 4b576658 2013-09-23 0intro fprint(2, "%s: flCountUsed: loading %ux: %r\n",
821 6f4d00ee 2013-09-23 0intro argv0, addr/n);
825 6f4d00ee 2013-09-23 0intro if(!labelUnpack(&lab, b->data, addr%n))
826 6f4d00ee 2013-09-23 0intro continue;
827 6f4d00ee 2013-09-23 0intro if(lab.state == BsFree)
828 6f4d00ee 2013-09-23 0intro continue;
829 6f4d00ee 2013-09-23 0intro if(lab.state&BsClosed)
830 6f4d00ee 2013-09-23 0intro if(lab.epochClose <= epochLow || lab.epoch==lab.epochClose)
831 6f4d00ee 2013-09-23 0intro continue;
834 6f4d00ee 2013-09-23 0intro blockPut(b);
835 6f4d00ee 2013-09-23 0intro if(addr == fl->end){
836 6f4d00ee 2013-09-23 0intro fl->nused = nused;
837 6f4d00ee 2013-09-23 0intro fl->epochLow = epochLow;
839 6f4d00ee 2013-09-23 0intro *used = nused;
840 6f4d00ee 2013-09-23 0intro *total = fl->end;
841 4b576658 2013-09-23 0intro qunlock(&fl->lk);
845 6f4d00ee 2013-09-23 0intro static FreeList *
846 6f4d00ee 2013-09-23 0intro flAlloc(u32int end)
848 6f4d00ee 2013-09-23 0intro FreeList *fl;
850 4b576658 2013-09-23 0intro fl = vtmallocz(sizeof(*fl));
851 6f4d00ee 2013-09-23 0intro fl->last = 0;
852 6f4d00ee 2013-09-23 0intro fl->end = end;
853 6f4d00ee 2013-09-23 0intro return fl;
856 6f4d00ee 2013-09-23 0intro static void
857 6f4d00ee 2013-09-23 0intro flFree(FreeList *fl)
859 4b576658 2013-09-23 0intro vtfree(fl);
863 6f4d00ee 2013-09-23 0intro cacheLocalSize(Cache *c, int part)
865 6f4d00ee 2013-09-23 0intro return diskSize(c->disk, part);
869 6f4d00ee 2013-09-23 0intro * The thread that has locked b may refer to it by
870 6f4d00ee 2013-09-23 0intro * multiple names. Nlock counts the number of
871 6f4d00ee 2013-09-23 0intro * references the locking thread holds. It will call
872 6f4d00ee 2013-09-23 0intro * blockPut once per reference.
875 6f4d00ee 2013-09-23 0intro blockDupLock(Block *b)
877 6f4d00ee 2013-09-23 0intro assert(b->nlock > 0);
878 6f4d00ee 2013-09-23 0intro b->nlock++;
882 6f4d00ee 2013-09-23 0intro * we're done with the block.
883 6f4d00ee 2013-09-23 0intro * unlock it. can't use it after calling this.
886 6f4d00ee 2013-09-23 0intro blockPut(Block* b)
888 6f4d00ee 2013-09-23 0intro Cache *c;
890 6f4d00ee 2013-09-23 0intro if(b == nil)
893 6f4d00ee 2013-09-23 0intro if(0)fprint(2, "%s: blockPut: %d: %d %x %d %s\n", argv0, getpid(), b->part, b->addr, c->nheap, bioStr(b->iostate));
895 6f4d00ee 2013-09-23 0intro if(b->iostate == BioDirty)
896 6f4d00ee 2013-09-23 0intro bwatchDependency(b);
898 6f4d00ee 2013-09-23 0intro if(--b->nlock > 0)
902 6f4d00ee 2013-09-23 0intro * b->nlock should probably stay at zero while
903 4b576658 2013-09-23 0intro * the block is unlocked, but diskThread and rsleep
904 4b576658 2013-09-23 0intro * conspire to assume that they can just qlock(&b->lk); blockPut(b),
905 6f4d00ee 2013-09-23 0intro * so we have to keep b->nlock set to 1 even
906 6f4d00ee 2013-09-23 0intro * when the block is unlocked.
908 6f4d00ee 2013-09-23 0intro assert(b->nlock == 0);
909 6f4d00ee 2013-09-23 0intro b->nlock = 1;
910 6f4d00ee 2013-09-23 0intro // b->pc = 0;
912 6f4d00ee 2013-09-23 0intro bwatchUnlock(b);
913 4b576658 2013-09-23 0intro qunlock(&b->lk);
914 6f4d00ee 2013-09-23 0intro c = b->c;
915 4b576658 2013-09-23 0intro qlock(&c->lk);
917 6f4d00ee 2013-09-23 0intro if(--b->ref > 0){
918 4b576658 2013-09-23 0intro qunlock(&c->lk);
922 6f4d00ee 2013-09-23 0intro assert(b->ref == 0);
923 6f4d00ee 2013-09-23 0intro switch(b->iostate){
925 6f4d00ee 2013-09-23 0intro b->used = c->now++;
926 6f4d00ee 2013-09-23 0intro heapIns(b);
928 6f4d00ee 2013-09-23 0intro case BioEmpty:
929 6f4d00ee 2013-09-23 0intro case BioLabel:
930 6f4d00ee 2013-09-23 0intro if(c->nheap == 0)
931 6f4d00ee 2013-09-23 0intro b->used = c->now++;
933 6f4d00ee 2013-09-23 0intro b->used = c->heap[0]->used;
934 6f4d00ee 2013-09-23 0intro heapIns(b);
936 6f4d00ee 2013-09-23 0intro case BioDirty:
939 4b576658 2013-09-23 0intro qunlock(&c->lk);
943 6f4d00ee 2013-09-23 0intro * set the label associated with a block.
946 6f4d00ee 2013-09-23 0intro _blockSetLabel(Block *b, Label *l)
949 6f4d00ee 2013-09-23 0intro Block *bb;
950 6f4d00ee 2013-09-23 0intro u32int a;
951 6f4d00ee 2013-09-23 0intro Cache *c;
953 6f4d00ee 2013-09-23 0intro c = b->c;
955 6f4d00ee 2013-09-23 0intro assert(b->part == PartData);
956 6f4d00ee 2013-09-23 0intro assert(b->iostate == BioLabel || b->iostate == BioClean || b->iostate == BioDirty);
957 6f4d00ee 2013-09-23 0intro lpb = c->size / LabelSize;
958 6f4d00ee 2013-09-23 0intro a = b->addr / lpb;
959 6f4d00ee 2013-09-23 0intro bb = cacheLocal(c, PartLabel, a, OReadWrite);
960 6f4d00ee 2013-09-23 0intro if(bb == nil){
961 6f4d00ee 2013-09-23 0intro blockPut(b);
962 6f4d00ee 2013-09-23 0intro return nil;
964 6f4d00ee 2013-09-23 0intro b->l = *l;
965 6f4d00ee 2013-09-23 0intro labelPack(l, bb->data, b->addr%lpb);
966 6f4d00ee 2013-09-23 0intro blockDirty(bb);
967 6f4d00ee 2013-09-23 0intro return bb;
971 6f4d00ee 2013-09-23 0intro blockSetLabel(Block *b, Label *l, int allocating)
973 6f4d00ee 2013-09-23 0intro Block *lb;
975 6f4d00ee 2013-09-23 0intro lb = _blockSetLabel(b, l);
976 6f4d00ee 2013-09-23 0intro if(lb == nil)
977 6f4d00ee 2013-09-23 0intro return 0;
980 6f4d00ee 2013-09-23 0intro * If we're allocating the block, make sure the label (bl)
981 6f4d00ee 2013-09-23 0intro * goes to disk before the data block (b) itself. This is to help
982 6f4d00ee 2013-09-23 0intro * the blocks that in turn depend on b.
984 6f4d00ee 2013-09-23 0intro * Suppose bx depends on (must be written out after) b.
985 6f4d00ee 2013-09-23 0intro * Once we write b we'll think it's safe to write bx.
986 6f4d00ee 2013-09-23 0intro * Bx can't get at b unless it has a valid label, though.
988 6f4d00ee 2013-09-23 0intro * Allocation is the only case in which having a current label
989 6f4d00ee 2013-09-23 0intro * is vital because:
991 6f4d00ee 2013-09-23 0intro * - l.type is set at allocation and never changes.
992 6f4d00ee 2013-09-23 0intro * - l.tag is set at allocation and never changes.
993 6f4d00ee 2013-09-23 0intro * - l.state is not checked when we load blocks.
994 6f4d00ee 2013-09-23 0intro * - the archiver cares deeply about l.state being
995 6f4d00ee 2013-09-23 0intro * BaActive vs. BaCopied, but that's handled
996 6f4d00ee 2013-09-23 0intro * by direct calls to _blockSetLabel.
999 6f4d00ee 2013-09-23 0intro if(allocating)
1000 6f4d00ee 2013-09-23 0intro blockDependency(b, lb, -1, nil, nil);
1001 6f4d00ee 2013-09-23 0intro blockPut(lb);
1002 6f4d00ee 2013-09-23 0intro return 1;
1006 6f4d00ee 2013-09-23 0intro * Record that bb must be written out before b.
1007 6f4d00ee 2013-09-23 0intro * If index is given, we're about to overwrite the score/e
1008 6f4d00ee 2013-09-23 0intro * at that index in the block. Save the old value so we
1009 6f4d00ee 2013-09-23 0intro * can write a safer ``old'' version of the block if pressed.
1012 6f4d00ee 2013-09-23 0intro blockDependency(Block *b, Block *bb, int index, uchar *score, Entry *e)
1014 6f4d00ee 2013-09-23 0intro BList *p;
1016 6f4d00ee 2013-09-23 0intro if(bb->iostate == BioClean)
1020 6f4d00ee 2013-09-23 0intro * Dependencies for blocks containing Entry structures
1021 6f4d00ee 2013-09-23 0intro * or scores must always be explained. The problem with
1022 6f4d00ee 2013-09-23 0intro * only explaining some of them is this. Suppose we have two
1023 6f4d00ee 2013-09-23 0intro * dependencies for the same field, the first explained
1024 6f4d00ee 2013-09-23 0intro * and the second not. We try to write the block when the first
1025 6f4d00ee 2013-09-23 0intro * dependency is not written but the second is. We will roll back
1026 6f4d00ee 2013-09-23 0intro * the first change even though the second trumps it.
1028 6f4d00ee 2013-09-23 0intro if(index == -1 && bb->part == PartData)
1029 6f4d00ee 2013-09-23 0intro assert(b->l.type == BtData);
1031 6f4d00ee 2013-09-23 0intro if(bb->iostate != BioDirty){
1032 6f4d00ee 2013-09-23 0intro fprint(2, "%s: %d:%x:%d iostate is %d in blockDependency\n",
1033 6f4d00ee 2013-09-23 0intro argv0, bb->part, bb->addr, bb->l.type, bb->iostate);
1034 6f4d00ee 2013-09-23 0intro abort();
1037 6f4d00ee 2013-09-23 0intro p = blistAlloc(bb);
1038 6f4d00ee 2013-09-23 0intro if(p == nil)
1041 6f4d00ee 2013-09-23 0intro assert(bb->iostate == BioDirty);
1042 6f4d00ee 2013-09-23 0intro if(0)fprint(2, "%s: %d:%x:%d depends on %d:%x:%d\n", argv0, b->part, b->addr, b->l.type, bb->part, bb->addr, bb->l.type);
1044 6f4d00ee 2013-09-23 0intro p->part = bb->part;
1045 6f4d00ee 2013-09-23 0intro p->addr = bb->addr;
1046 6f4d00ee 2013-09-23 0intro p->type = bb->l.type;
1047 6f4d00ee 2013-09-23 0intro p->vers = bb->vers;
1048 6f4d00ee 2013-09-23 0intro p->index = index;
1049 6f4d00ee 2013-09-23 0intro if(p->index >= 0){
1051 6f4d00ee 2013-09-23 0intro * This test would just be b->l.type==BtDir except
1052 6f4d00ee 2013-09-23 0intro * we need to exclude the super block.
1054 6f4d00ee 2013-09-23 0intro if(b->l.type == BtDir && b->part == PartData)
1055 6f4d00ee 2013-09-23 0intro entryPack(e, p->old.entry, 0);
1057 6f4d00ee 2013-09-23 0intro memmove(p->old.score, score, VtScoreSize);
1059 6f4d00ee 2013-09-23 0intro p->next = b->prior;
1060 6f4d00ee 2013-09-23 0intro b->prior = p;
1064 6f4d00ee 2013-09-23 0intro * Mark an in-memory block as dirty. If there are too many
1065 fa325e9b 2020-01-10 cross * dirty blocks, start writing some out to disk.
1067 6f4d00ee 2013-09-23 0intro * If there were way too many dirty blocks, we used to
1068 fa325e9b 2020-01-10 cross * try to do some flushing ourselves, but it's just too dangerous --
1069 6f4d00ee 2013-09-23 0intro * it implies that the callers cannot have any of our priors locked,
1070 6f4d00ee 2013-09-23 0intro * but this is hard to avoid in some cases.
1073 6f4d00ee 2013-09-23 0intro blockDirty(Block *b)
1075 6f4d00ee 2013-09-23 0intro Cache *c;
1077 6f4d00ee 2013-09-23 0intro c = b->c;
1079 6f4d00ee 2013-09-23 0intro assert(b->part != PartVenti);
1081 6f4d00ee 2013-09-23 0intro if(b->iostate == BioDirty)
1082 6f4d00ee 2013-09-23 0intro return 1;
1083 6f4d00ee 2013-09-23 0intro assert(b->iostate == BioClean || b->iostate == BioLabel);
1085 4b576658 2013-09-23 0intro qlock(&c->lk);
1086 6f4d00ee 2013-09-23 0intro b->iostate = BioDirty;
1087 6f4d00ee 2013-09-23 0intro c->ndirty++;
1088 6f4d00ee 2013-09-23 0intro if(c->ndirty > (c->maxdirty>>1))
1089 4b576658 2013-09-23 0intro rwakeup(&c->flush);
1090 4b576658 2013-09-23 0intro qunlock(&c->lk);
1092 6f4d00ee 2013-09-23 0intro return 1;
1096 6f4d00ee 2013-09-23 0intro * We've decided to write out b. Maybe b has some pointers to blocks
1097 6f4d00ee 2013-09-23 0intro * that haven't yet been written to disk. If so, construct a slightly out-of-date
1098 6f4d00ee 2013-09-23 0intro * copy of b that is safe to write out. (diskThread will make sure the block
1099 6f4d00ee 2013-09-23 0intro * remains marked as dirty.)
1102 6f4d00ee 2013-09-23 0intro blockRollback(Block *b, uchar *buf)
1104 6f4d00ee 2013-09-23 0intro u32int addr;
1105 6f4d00ee 2013-09-23 0intro BList *p;
1106 6f4d00ee 2013-09-23 0intro Super super;
1108 6f4d00ee 2013-09-23 0intro /* easy case */
1109 6f4d00ee 2013-09-23 0intro if(b->prior == nil)
1110 6f4d00ee 2013-09-23 0intro return b->data;
1112 6f4d00ee 2013-09-23 0intro memmove(buf, b->data, b->c->size);
1113 6f4d00ee 2013-09-23 0intro for(p=b->prior; p; p=p->next){
1115 6f4d00ee 2013-09-23 0intro * we know p->index >= 0 because blockWrite has vetted this block for us.
1117 6f4d00ee 2013-09-23 0intro assert(p->index >= 0);
1118 6f4d00ee 2013-09-23 0intro assert(b->part == PartSuper || (b->part == PartData && b->l.type != BtData));
1119 6f4d00ee 2013-09-23 0intro if(b->part == PartSuper){
1120 6f4d00ee 2013-09-23 0intro assert(p->index == 0);
1121 6f4d00ee 2013-09-23 0intro superUnpack(&super, buf);
1122 6f4d00ee 2013-09-23 0intro addr = globalToLocal(p->old.score);
1123 6f4d00ee 2013-09-23 0intro if(addr == NilBlock){
1124 6f4d00ee 2013-09-23 0intro fprint(2, "%s: rolling back super block: "
1125 6f4d00ee 2013-09-23 0intro "bad replacement addr %V\n",
1126 6f4d00ee 2013-09-23 0intro argv0, p->old.score);
1127 6f4d00ee 2013-09-23 0intro abort();
1129 6f4d00ee 2013-09-23 0intro super.active = addr;
1130 6f4d00ee 2013-09-23 0intro superPack(&super, buf);
1131 6f4d00ee 2013-09-23 0intro continue;
1133 6f4d00ee 2013-09-23 0intro if(b->l.type == BtDir)
1134 6f4d00ee 2013-09-23 0intro memmove(buf+p->index*VtEntrySize, p->old.entry, VtEntrySize);
1136 6f4d00ee 2013-09-23 0intro memmove(buf+p->index*VtScoreSize, p->old.score, VtScoreSize);
1138 6f4d00ee 2013-09-23 0intro return buf;
1142 6f4d00ee 2013-09-23 0intro * Try to write block b.
1143 6f4d00ee 2013-09-23 0intro * If b depends on other blocks:
1145 6f4d00ee 2013-09-23 0intro * If the block has been written out, remove the dependency.
1146 6f4d00ee 2013-09-23 0intro * If the dependency is replaced by a more recent dependency,
1147 6f4d00ee 2013-09-23 0intro * throw it out.
1148 6f4d00ee 2013-09-23 0intro * If we know how to write out an old version of b that doesn't
1149 6f4d00ee 2013-09-23 0intro * depend on it, do that.
1151 6f4d00ee 2013-09-23 0intro * Otherwise, bail.
1154 6f4d00ee 2013-09-23 0intro blockWrite(Block *b, int waitlock)
1156 6f4d00ee 2013-09-23 0intro uchar *dmap;
1157 6f4d00ee 2013-09-23 0intro Cache *c;
1158 6f4d00ee 2013-09-23 0intro BList *p, **pp;
1159 6f4d00ee 2013-09-23 0intro Block *bb;
1160 6f4d00ee 2013-09-23 0intro int lockfail;
1162 6f4d00ee 2013-09-23 0intro c = b->c;
1164 6f4d00ee 2013-09-23 0intro if(b->iostate != BioDirty)
1165 6f4d00ee 2013-09-23 0intro return 1;
1167 6f4d00ee 2013-09-23 0intro dmap = b->dmap;
1168 6f4d00ee 2013-09-23 0intro memset(dmap, 0, c->ndmap);
1169 6f4d00ee 2013-09-23 0intro pp = &b->prior;
1170 6f4d00ee 2013-09-23 0intro for(p=*pp; p; p=*pp){
1171 6f4d00ee 2013-09-23 0intro if(p->index >= 0){
1172 6f4d00ee 2013-09-23 0intro /* more recent dependency has succeeded; this one can go */
1173 6f4d00ee 2013-09-23 0intro if(dmap[p->index/8] & (1<<(p->index%8)))
1174 6f4d00ee 2013-09-23 0intro goto ignblock;
1177 6f4d00ee 2013-09-23 0intro lockfail = 0;
1178 6f4d00ee 2013-09-23 0intro bb = _cacheLocalLookup(c, p->part, p->addr, p->vers, waitlock,
1179 6f4d00ee 2013-09-23 0intro &lockfail);
1180 6f4d00ee 2013-09-23 0intro if(bb == nil){
1181 6f4d00ee 2013-09-23 0intro if(lockfail)
1182 6f4d00ee 2013-09-23 0intro return 0;
1183 6f4d00ee 2013-09-23 0intro /* block not in cache => was written already */
1184 6f4d00ee 2013-09-23 0intro dmap[p->index/8] |= 1<<(p->index%8);
1185 6f4d00ee 2013-09-23 0intro goto ignblock;
1189 6f4d00ee 2013-09-23 0intro * same version of block is still in cache.
1191 6f4d00ee 2013-09-23 0intro * the assertion is true because the block still has version p->vers,
1192 6f4d00ee 2013-09-23 0intro * which means it hasn't been written out since we last saw it.
1194 6f4d00ee 2013-09-23 0intro if(bb->iostate != BioDirty){
1195 6f4d00ee 2013-09-23 0intro fprint(2, "%s: %d:%x:%d iostate is %d in blockWrite\n",
1196 6f4d00ee 2013-09-23 0intro argv0, bb->part, bb->addr, bb->l.type, bb->iostate);
1197 6f4d00ee 2013-09-23 0intro /* probably BioWriting if it happens? */
1198 6f4d00ee 2013-09-23 0intro if(bb->iostate == BioClean)
1199 6f4d00ee 2013-09-23 0intro goto ignblock;
1202 6f4d00ee 2013-09-23 0intro blockPut(bb);
1204 6f4d00ee 2013-09-23 0intro if(p->index < 0){
1206 6f4d00ee 2013-09-23 0intro * We don't know how to temporarily undo
1207 6f4d00ee 2013-09-23 0intro * b's dependency on bb, so just don't write b yet.
1209 6f4d00ee 2013-09-23 0intro if(0) fprint(2, "%s: blockWrite skipping %d %x %d %d; need to write %d %x %d\n",
1210 6f4d00ee 2013-09-23 0intro argv0, b->part, b->addr, b->vers, b->l.type, p->part, p->addr, bb->vers);
1211 6f4d00ee 2013-09-23 0intro return 0;
1213 6f4d00ee 2013-09-23 0intro /* keep walking down the list */
1214 6f4d00ee 2013-09-23 0intro pp = &p->next;
1215 6f4d00ee 2013-09-23 0intro continue;
1217 6f4d00ee 2013-09-23 0intro ignblock:
1218 6f4d00ee 2013-09-23 0intro *pp = p->next;
1219 6f4d00ee 2013-09-23 0intro blistFree(c, p);
1220 6f4d00ee 2013-09-23 0intro continue;
1224 6f4d00ee 2013-09-23 0intro * DiskWrite must never be called with a double-locked block.
1225 6f4d00ee 2013-09-23 0intro * This call to diskWrite is okay because blockWrite is only called
1226 6f4d00ee 2013-09-23 0intro * from the cache flush thread, which never double-locks a block.
1228 6f4d00ee 2013-09-23 0intro diskWrite(c->disk, b);
1229 6f4d00ee 2013-09-23 0intro return 1;
1233 6f4d00ee 2013-09-23 0intro * Change the I/O state of block b.
1234 6f4d00ee 2013-09-23 0intro * Just an assignment except for magic in
1235 6f4d00ee 2013-09-23 0intro * switch statement (read comments there).
1238 6f4d00ee 2013-09-23 0intro blockSetIOState(Block *b, int iostate)
1240 6f4d00ee 2013-09-23 0intro int dowakeup;
1241 6f4d00ee 2013-09-23 0intro Cache *c;
1242 6f4d00ee 2013-09-23 0intro BList *p, *q;
1244 6f4d00ee 2013-09-23 0intro if(0) fprint(2, "%s: iostate part=%d addr=%x %s->%s\n", argv0, b->part, b->addr, bioStr(b->iostate), bioStr(iostate));
1246 6f4d00ee 2013-09-23 0intro c = b->c;
1248 6f4d00ee 2013-09-23 0intro dowakeup = 0;
1249 6f4d00ee 2013-09-23 0intro switch(iostate){
1250 6f4d00ee 2013-09-23 0intro default:
1251 6f4d00ee 2013-09-23 0intro abort();
1252 6f4d00ee 2013-09-23 0intro case BioEmpty:
1253 6f4d00ee 2013-09-23 0intro assert(!b->uhead);
1255 6f4d00ee 2013-09-23 0intro case BioLabel:
1256 6f4d00ee 2013-09-23 0intro assert(!b->uhead);
1258 6f4d00ee 2013-09-23 0intro case BioClean:
1259 6f4d00ee 2013-09-23 0intro bwatchDependency(b);
1261 6f4d00ee 2013-09-23 0intro * If b->prior is set, it means a write just finished.
1262 6f4d00ee 2013-09-23 0intro * The prior list isn't needed anymore.
1264 6f4d00ee 2013-09-23 0intro for(p=b->prior; p; p=q){
1265 6f4d00ee 2013-09-23 0intro q = p->next;
1266 6f4d00ee 2013-09-23 0intro blistFree(c, p);
1268 6f4d00ee 2013-09-23 0intro b->prior = nil;
1270 6f4d00ee 2013-09-23 0intro * Freeing a block or just finished a write.
1271 6f4d00ee 2013-09-23 0intro * Move the blocks from the per-block unlink
1272 6f4d00ee 2013-09-23 0intro * queue to the cache unlink queue.
1274 6f4d00ee 2013-09-23 0intro if(b->iostate == BioDirty || b->iostate == BioWriting){
1275 4b576658 2013-09-23 0intro qlock(&c->lk);
1276 6f4d00ee 2013-09-23 0intro c->ndirty--;
1277 6f4d00ee 2013-09-23 0intro b->iostate = iostate; /* change here to keep in sync with ndirty */
1278 6f4d00ee 2013-09-23 0intro b->vers = c->vers++;
1279 6f4d00ee 2013-09-23 0intro if(b->uhead){
1280 6f4d00ee 2013-09-23 0intro /* add unlink blocks to unlink queue */
1281 6f4d00ee 2013-09-23 0intro if(c->uhead == nil){
1282 6f4d00ee 2013-09-23 0intro c->uhead = b->uhead;
1283 4b576658 2013-09-23 0intro rwakeup(&c->unlink);
1285 6f4d00ee 2013-09-23 0intro c->utail->next = b->uhead;
1286 6f4d00ee 2013-09-23 0intro c->utail = b->utail;
1287 6f4d00ee 2013-09-23 0intro b->uhead = nil;
1289 4b576658 2013-09-23 0intro qunlock(&c->lk);
1291 6f4d00ee 2013-09-23 0intro assert(!b->uhead);
1292 6f4d00ee 2013-09-23 0intro dowakeup = 1;
1294 6f4d00ee 2013-09-23 0intro case BioDirty:
1296 6f4d00ee 2013-09-23 0intro * Wrote out an old version of the block (see blockRollback).
1297 6f4d00ee 2013-09-23 0intro * Bump a version count, leave it dirty.
1299 6f4d00ee 2013-09-23 0intro if(b->iostate == BioWriting){
1300 4b576658 2013-09-23 0intro qlock(&c->lk);
1301 6f4d00ee 2013-09-23 0intro b->vers = c->vers++;
1302 4b576658 2013-09-23 0intro qunlock(&c->lk);
1303 6f4d00ee 2013-09-23 0intro dowakeup = 1;
1306 6f4d00ee 2013-09-23 0intro case BioReading:
1307 6f4d00ee 2013-09-23 0intro case BioWriting:
1309 6f4d00ee 2013-09-23 0intro * Adding block to disk queue. Bump reference count.
1310 6f4d00ee 2013-09-23 0intro * diskThread decs the count later by calling blockPut.
1311 6f4d00ee 2013-09-23 0intro * This is here because we need to lock c->lk to
1312 6f4d00ee 2013-09-23 0intro * manipulate the ref count.
1314 4b576658 2013-09-23 0intro qlock(&c->lk);
1315 6f4d00ee 2013-09-23 0intro b->ref++;
1316 4b576658 2013-09-23 0intro qunlock(&c->lk);
1318 6f4d00ee 2013-09-23 0intro case BioReadError:
1319 6f4d00ee 2013-09-23 0intro case BioVentiError:
1323 6f4d00ee 2013-09-23 0intro dowakeup = 1;
1326 6f4d00ee 2013-09-23 0intro b->iostate = iostate;
1328 6f4d00ee 2013-09-23 0intro * Now that the state has changed, we can wake the waiters.
1330 6f4d00ee 2013-09-23 0intro if(dowakeup)
1331 4b576658 2013-09-23 0intro rwakeupall(&b->ioready);
1335 fa325e9b 2020-01-10 cross * The active file system is a tree of blocks.
1336 6f4d00ee 2013-09-23 0intro * When we add snapshots to the mix, the entire file system
1337 6f4d00ee 2013-09-23 0intro * becomes a dag and thus requires a bit more care.
1339 6f4d00ee 2013-09-23 0intro * The life of the file system is divided into epochs. A snapshot
1340 6f4d00ee 2013-09-23 0intro * ends one epoch and begins the next. Each file system block
1341 6f4d00ee 2013-09-23 0intro * is marked with the epoch in which it was created (b.epoch).
1342 6f4d00ee 2013-09-23 0intro * When the block is unlinked from the file system (closed), it is marked
1343 fa325e9b 2020-01-10 cross * with the epoch in which it was removed (b.epochClose).
1344 fa325e9b 2020-01-10 cross * Once we have discarded or archived all snapshots up to
1345 6f4d00ee 2013-09-23 0intro * b.epochClose, we can reclaim the block.
1347 6f4d00ee 2013-09-23 0intro * If a block was created in a past epoch but is not yet closed,
1348 6f4d00ee 2013-09-23 0intro * it is treated as copy-on-write. Of course, in order to insert the
1349 6f4d00ee 2013-09-23 0intro * new pointer into the tree, the parent must be made writable,
1350 6f4d00ee 2013-09-23 0intro * and so on up the tree. The recursion stops because the root
1351 6f4d00ee 2013-09-23 0intro * block is always writable.
1353 6f4d00ee 2013-09-23 0intro * If blocks are never closed, they will never be reused, and
1354 6f4d00ee 2013-09-23 0intro * we will run out of disk space. But marking a block as closed
1355 6f4d00ee 2013-09-23 0intro * requires some care about dependencies and write orderings.
1357 6f4d00ee 2013-09-23 0intro * (1) If a block p points at a copy-on-write block b and we
1358 6f4d00ee 2013-09-23 0intro * copy b to create bb, then p must be written out after bb and
1359 6f4d00ee 2013-09-23 0intro * lbb (bb's label block).
1361 6f4d00ee 2013-09-23 0intro * (2) We have to mark b as closed, but only after we switch
1362 fa325e9b 2020-01-10 cross * the pointer, so lb must be written out after p. In fact, we
1363 6f4d00ee 2013-09-23 0intro * can't even update the in-memory copy, or the cache might
1364 6f4d00ee 2013-09-23 0intro * mistakenly give out b for reuse before p gets written.
1366 6f4d00ee 2013-09-23 0intro * CacheAllocBlock's call to blockSetLabel records a "bb after lbb" dependency.
1367 6f4d00ee 2013-09-23 0intro * The caller is expected to record a "p after bb" dependency
1368 6f4d00ee 2013-09-23 0intro * to finish (1), and also expected to call blockRemoveLink
1369 6f4d00ee 2013-09-23 0intro * to arrange for (2) to happen once p is written.
1371 6f4d00ee 2013-09-23 0intro * Until (2) happens, some pieces of the code (e.g., the archiver)
1372 fa325e9b 2020-01-10 cross * still need to know whether a block has been copied, so we
1373 6f4d00ee 2013-09-23 0intro * set the BsCopied bit in the label and force that to disk *before*
1374 6f4d00ee 2013-09-23 0intro * the copy gets written out.
1377 6f4d00ee 2013-09-23 0intro blockCopy(Block *b, u32int tag, u32int ehi, u32int elo)
1379 6f4d00ee 2013-09-23 0intro Block *bb, *lb;
1380 6f4d00ee 2013-09-23 0intro Label l;
1382 6f4d00ee 2013-09-23 0intro if((b->l.state&BsClosed) || b->l.epoch >= ehi)
1383 6f4d00ee 2013-09-23 0intro fprint(2, "%s: blockCopy %#ux %L but fs is [%ud,%ud]\n",
1384 6f4d00ee 2013-09-23 0intro argv0, b->addr, &b->l, elo, ehi);
1386 6f4d00ee 2013-09-23 0intro bb = cacheAllocBlock(b->c, b->l.type, tag, ehi, elo);
1387 6f4d00ee 2013-09-23 0intro if(bb == nil){
1388 6f4d00ee 2013-09-23 0intro blockPut(b);
1389 6f4d00ee 2013-09-23 0intro return nil;
1393 6f4d00ee 2013-09-23 0intro * Update label so we know the block has been copied.
1394 6f4d00ee 2013-09-23 0intro * (It will be marked closed once it has been unlinked from
1395 6f4d00ee 2013-09-23 0intro * the tree.) This must follow cacheAllocBlock since we
1396 6f4d00ee 2013-09-23 0intro * can't be holding onto lb when we call cacheAllocBlock.
1398 6f4d00ee 2013-09-23 0intro if((b->l.state&BsCopied)==0)
1399 6f4d00ee 2013-09-23 0intro if(b->part == PartData){ /* not the superblock */
1400 6f4d00ee 2013-09-23 0intro l = b->l;
1401 6f4d00ee 2013-09-23 0intro l.state |= BsCopied;
1402 6f4d00ee 2013-09-23 0intro lb = _blockSetLabel(b, &l);
1403 6f4d00ee 2013-09-23 0intro if(lb == nil){
1404 6f4d00ee 2013-09-23 0intro /* can't set label => can't copy block */
1405 6f4d00ee 2013-09-23 0intro blockPut(b);
1406 6f4d00ee 2013-09-23 0intro l.type = BtMax;
1407 6f4d00ee 2013-09-23 0intro l.state = BsFree;
1408 6f4d00ee 2013-09-23 0intro l.epoch = 0;
1409 6f4d00ee 2013-09-23 0intro l.epochClose = 0;
1410 6f4d00ee 2013-09-23 0intro l.tag = 0;
1411 6f4d00ee 2013-09-23 0intro blockSetLabel(bb, &l, 0);
1412 6f4d00ee 2013-09-23 0intro blockPut(bb);
1413 6f4d00ee 2013-09-23 0intro return nil;
1415 6f4d00ee 2013-09-23 0intro blockDependency(bb, lb, -1, nil, nil);
1416 6f4d00ee 2013-09-23 0intro blockPut(lb);
1419 6f4d00ee 2013-09-23 0intro memmove(bb->data, b->data, b->c->size);
1420 6f4d00ee 2013-09-23 0intro blockDirty(bb);
1421 6f4d00ee 2013-09-23 0intro blockPut(b);
1422 6f4d00ee 2013-09-23 0intro return bb;
1426 6f4d00ee 2013-09-23 0intro * Block b once pointed at the block bb at addr/type/tag, but no longer does.
1427 6f4d00ee 2013-09-23 0intro * If recurse is set, we are unlinking all of bb's children as well.
1429 6f4d00ee 2013-09-23 0intro * We can't reclaim bb (or its kids) until the block b gets written to disk. We add
1430 6f4d00ee 2013-09-23 0intro * the relevant information to b's list of unlinked blocks. Once b is written,
1431 6f4d00ee 2013-09-23 0intro * the list will be queued for processing.
1433 6f4d00ee 2013-09-23 0intro * If b depends on bb, it doesn't anymore, so we remove bb from the prior list.
1436 6f4d00ee 2013-09-23 0intro blockRemoveLink(Block *b, u32int addr, int type, u32int tag, int recurse)
1438 6f4d00ee 2013-09-23 0intro BList *p, **pp, bl;
1440 6f4d00ee 2013-09-23 0intro /* remove bb from prior list */
1441 6f4d00ee 2013-09-23 0intro for(pp=&b->prior; (p=*pp)!=nil; ){
1442 6f4d00ee 2013-09-23 0intro if(p->part == PartData && p->addr == addr){
1443 6f4d00ee 2013-09-23 0intro *pp = p->next;
1444 6f4d00ee 2013-09-23 0intro blistFree(b->c, p);
1446 6f4d00ee 2013-09-23 0intro pp = &p->next;
1449 6f4d00ee 2013-09-23 0intro bl.part = PartData;
1450 6f4d00ee 2013-09-23 0intro bl.addr = addr;
1451 6f4d00ee 2013-09-23 0intro bl.type = type;
1452 6f4d00ee 2013-09-23 0intro bl.tag = tag;
1453 6f4d00ee 2013-09-23 0intro if(b->l.epoch == 0)
1454 6f4d00ee 2013-09-23 0intro assert(b->part == PartSuper);
1455 6f4d00ee 2013-09-23 0intro bl.epoch = b->l.epoch;
1456 6f4d00ee 2013-09-23 0intro bl.next = nil;
1457 6f4d00ee 2013-09-23 0intro bl.recurse = recurse;
1459 6f4d00ee 2013-09-23 0intro if(b->part == PartSuper && b->iostate == BioClean)
1460 6f4d00ee 2013-09-23 0intro p = nil;
1462 6f4d00ee 2013-09-23 0intro p = blistAlloc(b);
1463 6f4d00ee 2013-09-23 0intro if(p == nil){
1465 6f4d00ee 2013-09-23 0intro * b has already been written to disk.
1467 6f4d00ee 2013-09-23 0intro doRemoveLink(b->c, &bl);
1471 6f4d00ee 2013-09-23 0intro /* Uhead is only processed when the block goes from Dirty -> Clean */
1472 6f4d00ee 2013-09-23 0intro assert(b->iostate == BioDirty);
1474 6f4d00ee 2013-09-23 0intro *p = bl;
1475 6f4d00ee 2013-09-23 0intro if(b->uhead == nil)
1476 6f4d00ee 2013-09-23 0intro b->uhead = p;
1478 6f4d00ee 2013-09-23 0intro b->utail->next = p;
1479 6f4d00ee 2013-09-23 0intro b->utail = p;
1483 6f4d00ee 2013-09-23 0intro * Process removal of a single block and perhaps its children.
1485 6f4d00ee 2013-09-23 0intro static void
1486 6f4d00ee 2013-09-23 0intro doRemoveLink(Cache *c, BList *p)
1488 6f4d00ee 2013-09-23 0intro int i, n, recurse;
1489 6f4d00ee 2013-09-23 0intro u32int a;
1490 6f4d00ee 2013-09-23 0intro Block *b;
1491 6f4d00ee 2013-09-23 0intro Label l;
1492 6f4d00ee 2013-09-23 0intro BList bl;
1494 6f4d00ee 2013-09-23 0intro recurse = (p->recurse && p->type != BtData && p->type != BtDir);
1497 6f4d00ee 2013-09-23 0intro * We're not really going to overwrite b, but if we're not
1498 6f4d00ee 2013-09-23 0intro * going to look at its contents, there is no point in reading
1499 6f4d00ee 2013-09-23 0intro * them from the disk.
1501 6f4d00ee 2013-09-23 0intro b = cacheLocalData(c, p->addr, p->type, p->tag, recurse ? OReadOnly : OOverWrite, 0);
1502 6f4d00ee 2013-09-23 0intro if(b == nil)
1506 6f4d00ee 2013-09-23 0intro * When we're unlinking from the superblock, close with the next epoch.
1508 6f4d00ee 2013-09-23 0intro if(p->epoch == 0)
1509 6f4d00ee 2013-09-23 0intro p->epoch = b->l.epoch+1;
1511 6f4d00ee 2013-09-23 0intro /* sanity check */
1512 6f4d00ee 2013-09-23 0intro if(b->l.epoch > p->epoch){
1513 6f4d00ee 2013-09-23 0intro fprint(2, "%s: doRemoveLink: strange epoch %ud > %ud\n",
1514 6f4d00ee 2013-09-23 0intro argv0, b->l.epoch, p->epoch);
1515 6f4d00ee 2013-09-23 0intro blockPut(b);
1519 6f4d00ee 2013-09-23 0intro if(recurse){
1520 6f4d00ee 2013-09-23 0intro n = c->size / VtScoreSize;
1521 6f4d00ee 2013-09-23 0intro for(i=0; i<n; i++){
1522 6f4d00ee 2013-09-23 0intro a = globalToLocal(b->data + i*VtScoreSize);
1523 6f4d00ee 2013-09-23 0intro if(a == NilBlock || !readLabel(c, &l, a))
1524 6f4d00ee 2013-09-23 0intro continue;
1525 6f4d00ee 2013-09-23 0intro if(l.state&BsClosed)
1526 6f4d00ee 2013-09-23 0intro continue;
1528 6f4d00ee 2013-09-23 0intro * If stack space becomes an issue...
1529 6f4d00ee 2013-09-23 0intro p->addr = a;
1530 6f4d00ee 2013-09-23 0intro p->type = l.type;
1531 6f4d00ee 2013-09-23 0intro p->tag = l.tag;
1532 6f4d00ee 2013-09-23 0intro doRemoveLink(c, p);
1535 6f4d00ee 2013-09-23 0intro bl.part = PartData;
1536 6f4d00ee 2013-09-23 0intro bl.addr = a;
1537 6f4d00ee 2013-09-23 0intro bl.type = l.type;
1538 6f4d00ee 2013-09-23 0intro bl.tag = l.tag;
1539 6f4d00ee 2013-09-23 0intro bl.epoch = p->epoch;
1540 6f4d00ee 2013-09-23 0intro bl.next = nil;
1541 6f4d00ee 2013-09-23 0intro bl.recurse = 1;
1542 6f4d00ee 2013-09-23 0intro /* give up the block lock - share with others */
1543 6f4d00ee 2013-09-23 0intro blockPut(b);
1544 6f4d00ee 2013-09-23 0intro doRemoveLink(c, &bl);
1545 6f4d00ee 2013-09-23 0intro b = cacheLocalData(c, p->addr, p->type, p->tag, OReadOnly, 0);
1546 6f4d00ee 2013-09-23 0intro if(b == nil){
1547 6f4d00ee 2013-09-23 0intro fprint(2, "%s: warning: lost block in doRemoveLink\n",
1554 6f4d00ee 2013-09-23 0intro l = b->l;
1555 6f4d00ee 2013-09-23 0intro l.state |= BsClosed;
1556 6f4d00ee 2013-09-23 0intro l.epochClose = p->epoch;
1557 6f4d00ee 2013-09-23 0intro if(l.epochClose == l.epoch){
1558 4b576658 2013-09-23 0intro qlock(&c->fl->lk);
1559 6f4d00ee 2013-09-23 0intro if(l.epoch == c->fl->epochLow)
1560 6f4d00ee 2013-09-23 0intro c->fl->nused--;
1561 6f4d00ee 2013-09-23 0intro blockSetLabel(b, &l, 0);
1562 4b576658 2013-09-23 0intro qunlock(&c->fl->lk);
1564 6f4d00ee 2013-09-23 0intro blockSetLabel(b, &l, 0);
1565 6f4d00ee 2013-09-23 0intro blockPut(b);
1569 6f4d00ee 2013-09-23 0intro * Allocate a BList so that we can record a dependency
1570 6f4d00ee 2013-09-23 0intro * or queue a removal related to block b.
1571 6f4d00ee 2013-09-23 0intro * If we can't find a BList, we write out b and return nil.
1573 6f4d00ee 2013-09-23 0intro static BList *
1574 6f4d00ee 2013-09-23 0intro blistAlloc(Block *b)
1576 6f4d00ee 2013-09-23 0intro Cache *c;
1577 6f4d00ee 2013-09-23 0intro BList *p;
1579 6f4d00ee 2013-09-23 0intro if(b->iostate != BioDirty){
1581 6f4d00ee 2013-09-23 0intro * should not happen anymore -
1582 6f4d00ee 2013-09-23 0intro * blockDirty used to flush but no longer does.
1584 6f4d00ee 2013-09-23 0intro assert(b->iostate == BioClean);
1585 6f4d00ee 2013-09-23 0intro fprint(2, "%s: blistAlloc: called on clean block\n", argv0);
1586 6f4d00ee 2013-09-23 0intro return nil;
1589 6f4d00ee 2013-09-23 0intro c = b->c;
1590 4b576658 2013-09-23 0intro qlock(&c->lk);
1591 6f4d00ee 2013-09-23 0intro if(c->blfree == nil){
1593 6f4d00ee 2013-09-23 0intro * No free BLists. What are our options?
1596 6f4d00ee 2013-09-23 0intro /* Block has no priors? Just write it. */
1597 6f4d00ee 2013-09-23 0intro if(b->prior == nil){
1598 4b576658 2013-09-23 0intro qunlock(&c->lk);
1599 6f4d00ee 2013-09-23 0intro diskWriteAndWait(c->disk, b);
1600 6f4d00ee 2013-09-23 0intro return nil;
1604 6f4d00ee 2013-09-23 0intro * Wake the flush thread, which will hopefully free up
1605 6f4d00ee 2013-09-23 0intro * some BLists for us. We used to flush a block from
1606 6f4d00ee 2013-09-23 0intro * our own prior list and reclaim that BList, but this is
1607 6f4d00ee 2013-09-23 0intro * a no-no: some of the blocks on our prior list may
1608 6f4d00ee 2013-09-23 0intro * be locked by our caller. Or maybe their label blocks
1609 6f4d00ee 2013-09-23 0intro * are locked by our caller. In any event, it's too hard
1610 6f4d00ee 2013-09-23 0intro * to make sure we can do I/O for ourselves. Instead,
1611 6f4d00ee 2013-09-23 0intro * we assume the flush thread will find something.
1612 6f4d00ee 2013-09-23 0intro * (The flush thread never blocks waiting for a block,
1613 6f4d00ee 2013-09-23 0intro * so it can't deadlock like we can.)
1615 6f4d00ee 2013-09-23 0intro while(c->blfree == nil){
1616 4b576658 2013-09-23 0intro rwakeup(&c->flush);
1617 4b576658 2013-09-23 0intro rsleep(&c->blrend);
1618 6f4d00ee 2013-09-23 0intro if(c->blfree == nil)
1619 6f4d00ee 2013-09-23 0intro fprint(2, "%s: flushing for blists\n", argv0);
1623 6f4d00ee 2013-09-23 0intro p = c->blfree;
1624 6f4d00ee 2013-09-23 0intro c->blfree = p->next;
1625 4b576658 2013-09-23 0intro qunlock(&c->lk);
1626 6f4d00ee 2013-09-23 0intro return p;
1629 6f4d00ee 2013-09-23 0intro static void
1630 6f4d00ee 2013-09-23 0intro blistFree(Cache *c, BList *bl)
1632 4b576658 2013-09-23 0intro qlock(&c->lk);
1633 6f4d00ee 2013-09-23 0intro bl->next = c->blfree;
1634 6f4d00ee 2013-09-23 0intro c->blfree = bl;
1635 4b576658 2013-09-23 0intro rwakeup(&c->blrend);
1636 4b576658 2013-09-23 0intro qunlock(&c->lk);
1640 6f4d00ee 2013-09-23 0intro bsStr(int state)
1642 6f4d00ee 2013-09-23 0intro static char s[100];
1644 6f4d00ee 2013-09-23 0intro if(state == BsFree)
1645 6f4d00ee 2013-09-23 0intro return "Free";
1646 6f4d00ee 2013-09-23 0intro if(state == BsBad)
1647 6f4d00ee 2013-09-23 0intro return "Bad";
1649 6f4d00ee 2013-09-23 0intro sprint(s, "%x", state);
1650 6f4d00ee 2013-09-23 0intro if(!(state&BsAlloc))
1651 6f4d00ee 2013-09-23 0intro strcat(s, ",Free"); /* should not happen */
1652 6f4d00ee 2013-09-23 0intro if(state&BsCopied)
1653 6f4d00ee 2013-09-23 0intro strcat(s, ",Copied");
1654 6f4d00ee 2013-09-23 0intro if(state&BsVenti)
1655 6f4d00ee 2013-09-23 0intro strcat(s, ",Venti");
1656 6f4d00ee 2013-09-23 0intro if(state&BsClosed)
1657 6f4d00ee 2013-09-23 0intro strcat(s, ",Closed");
1658 6f4d00ee 2013-09-23 0intro return s;
1662 6f4d00ee 2013-09-23 0intro bioStr(int iostate)
1664 6f4d00ee 2013-09-23 0intro switch(iostate){
1665 6f4d00ee 2013-09-23 0intro default:
1666 6f4d00ee 2013-09-23 0intro return "Unknown!!";
1667 6f4d00ee 2013-09-23 0intro case BioEmpty:
1668 6f4d00ee 2013-09-23 0intro return "Empty";
1669 6f4d00ee 2013-09-23 0intro case BioLabel:
1670 6f4d00ee 2013-09-23 0intro return "Label";
1671 6f4d00ee 2013-09-23 0intro case BioClean:
1672 6f4d00ee 2013-09-23 0intro return "Clean";
1673 6f4d00ee 2013-09-23 0intro case BioDirty:
1674 6f4d00ee 2013-09-23 0intro return "Dirty";
1675 6f4d00ee 2013-09-23 0intro case BioReading:
1676 6f4d00ee 2013-09-23 0intro return "Reading";
1677 6f4d00ee 2013-09-23 0intro case BioWriting:
1678 6f4d00ee 2013-09-23 0intro return "Writing";
1679 6f4d00ee 2013-09-23 0intro case BioReadError:
1680 6f4d00ee 2013-09-23 0intro return "ReadError";
1681 6f4d00ee 2013-09-23 0intro case BioVentiError:
1682 6f4d00ee 2013-09-23 0intro return "VentiError";
1683 6f4d00ee 2013-09-23 0intro case BioMax:
1684 6f4d00ee 2013-09-23 0intro return "Max";
1688 6f4d00ee 2013-09-23 0intro static char *bttab[] = {
1689 6f4d00ee 2013-09-23 0intro "BtData",
1690 6f4d00ee 2013-09-23 0intro "BtData+1",
1691 6f4d00ee 2013-09-23 0intro "BtData+2",
1692 6f4d00ee 2013-09-23 0intro "BtData+3",
1693 6f4d00ee 2013-09-23 0intro "BtData+4",
1694 6f4d00ee 2013-09-23 0intro "BtData+5",
1695 6f4d00ee 2013-09-23 0intro "BtData+6",
1696 6f4d00ee 2013-09-23 0intro "BtData+7",
1697 6f4d00ee 2013-09-23 0intro "BtDir",
1698 6f4d00ee 2013-09-23 0intro "BtDir+1",
1699 6f4d00ee 2013-09-23 0intro "BtDir+2",
1700 6f4d00ee 2013-09-23 0intro "BtDir+3",
1701 6f4d00ee 2013-09-23 0intro "BtDir+4",
1702 6f4d00ee 2013-09-23 0intro "BtDir+5",
1703 6f4d00ee 2013-09-23 0intro "BtDir+6",
1704 6f4d00ee 2013-09-23 0intro "BtDir+7",
1708 6f4d00ee 2013-09-23 0intro btStr(int type)
1710 6f4d00ee 2013-09-23 0intro if(type < nelem(bttab))
1711 6f4d00ee 2013-09-23 0intro return bttab[type];
1712 6f4d00ee 2013-09-23 0intro return "unknown";
1716 6f4d00ee 2013-09-23 0intro labelFmt(Fmt *f)
1718 6f4d00ee 2013-09-23 0intro Label *l;
1720 6f4d00ee 2013-09-23 0intro l = va_arg(f->args, Label*);
1721 6f4d00ee 2013-09-23 0intro return fmtprint(f, "%s,%s,e=%ud,%d,tag=%#ux",
1722 6f4d00ee 2013-09-23 0intro btStr(l->type), bsStr(l->state), l->epoch, (int)l->epochClose, l->tag);
1726 6f4d00ee 2013-09-23 0intro scoreFmt(Fmt *f)
1728 6f4d00ee 2013-09-23 0intro uchar *v;
1730 6f4d00ee 2013-09-23 0intro u32int addr;
1732 6f4d00ee 2013-09-23 0intro v = va_arg(f->args, uchar*);
1733 6f4d00ee 2013-09-23 0intro if(v == nil){
1734 6f4d00ee 2013-09-23 0intro fmtprint(f, "*");
1735 6f4d00ee 2013-09-23 0intro }else if((addr = globalToLocal(v)) != NilBlock)
1736 6f4d00ee 2013-09-23 0intro fmtprint(f, "0x%.8ux", addr);
1738 6f4d00ee 2013-09-23 0intro for(i = 0; i < VtScoreSize; i++)
1739 6f4d00ee 2013-09-23 0intro fmtprint(f, "%2.2ux", v[i]);
1742 6f4d00ee 2013-09-23 0intro return 0;
1745 6f4d00ee 2013-09-23 0intro static int
1746 6f4d00ee 2013-09-23 0intro upHeap(int i, Block *b)
1748 6f4d00ee 2013-09-23 0intro Block *bb;
1749 6f4d00ee 2013-09-23 0intro u32int now;
1751 6f4d00ee 2013-09-23 0intro Cache *c;
1753 6f4d00ee 2013-09-23 0intro c = b->c;
1754 6f4d00ee 2013-09-23 0intro now = c->now;
1755 6f4d00ee 2013-09-23 0intro for(; i != 0; i = p){
1756 6f4d00ee 2013-09-23 0intro p = (i - 1) >> 1;
1757 6f4d00ee 2013-09-23 0intro bb = c->heap[p];
1758 6f4d00ee 2013-09-23 0intro if(b->used - now >= bb->used - now)
1760 6f4d00ee 2013-09-23 0intro c->heap[i] = bb;
1761 6f4d00ee 2013-09-23 0intro bb->heap = i;
1763 6f4d00ee 2013-09-23 0intro c->heap[i] = b;
1764 6f4d00ee 2013-09-23 0intro b->heap = i;
1766 6f4d00ee 2013-09-23 0intro return i;
1769 6f4d00ee 2013-09-23 0intro static int
1770 6f4d00ee 2013-09-23 0intro downHeap(int i, Block *b)
1772 6f4d00ee 2013-09-23 0intro Block *bb;
1773 6f4d00ee 2013-09-23 0intro u32int now;
1775 6f4d00ee 2013-09-23 0intro Cache *c;
1777 6f4d00ee 2013-09-23 0intro c = b->c;
1778 6f4d00ee 2013-09-23 0intro now = c->now;
1779 6f4d00ee 2013-09-23 0intro for(; ; i = k){
1780 6f4d00ee 2013-09-23 0intro k = (i << 1) + 1;
1781 6f4d00ee 2013-09-23 0intro if(k >= c->nheap)
1783 6f4d00ee 2013-09-23 0intro if(k + 1 < c->nheap && c->heap[k]->used - now > c->heap[k + 1]->used - now)
1785 6f4d00ee 2013-09-23 0intro bb = c->heap[k];
1786 6f4d00ee 2013-09-23 0intro if(b->used - now <= bb->used - now)
1788 6f4d00ee 2013-09-23 0intro c->heap[i] = bb;
1789 6f4d00ee 2013-09-23 0intro bb->heap = i;
1791 6f4d00ee 2013-09-23 0intro c->heap[i] = b;
1792 6f4d00ee 2013-09-23 0intro b->heap = i;
1793 6f4d00ee 2013-09-23 0intro return i;
1797 6f4d00ee 2013-09-23 0intro * Delete a block from the heap.
1798 6f4d00ee 2013-09-23 0intro * Called with c->lk held.
1800 6f4d00ee 2013-09-23 0intro static void
1801 6f4d00ee 2013-09-23 0intro heapDel(Block *b)
1803 6f4d00ee 2013-09-23 0intro int i, si;
1804 6f4d00ee 2013-09-23 0intro Cache *c;
1806 6f4d00ee 2013-09-23 0intro c = b->c;
1808 6f4d00ee 2013-09-23 0intro si = b->heap;
1809 6f4d00ee 2013-09-23 0intro if(si == BadHeap)
1811 6f4d00ee 2013-09-23 0intro b->heap = BadHeap;
1812 6f4d00ee 2013-09-23 0intro c->nheap--;
1813 6f4d00ee 2013-09-23 0intro if(si == c->nheap)
1815 6f4d00ee 2013-09-23 0intro b = c->heap[c->nheap];
1816 6f4d00ee 2013-09-23 0intro i = upHeap(si, b);
1817 6f4d00ee 2013-09-23 0intro if(i == si)
1818 6f4d00ee 2013-09-23 0intro downHeap(i, b);
1822 6f4d00ee 2013-09-23 0intro * Insert a block into the heap.
1823 6f4d00ee 2013-09-23 0intro * Called with c->lk held.
1825 6f4d00ee 2013-09-23 0intro static void
1826 6f4d00ee 2013-09-23 0intro heapIns(Block *b)
1828 6f4d00ee 2013-09-23 0intro assert(b->heap == BadHeap);
1829 6f4d00ee 2013-09-23 0intro upHeap(b->c->nheap++, b);
1830 4b576658 2013-09-23 0intro rwakeup(&b->c->heapwait);
1834 6f4d00ee 2013-09-23 0intro * Get just the label for a block.
1837 6f4d00ee 2013-09-23 0intro readLabel(Cache *c, Label *l, u32int addr)
1839 6f4d00ee 2013-09-23 0intro int lpb;
1840 6f4d00ee 2013-09-23 0intro Block *b;
1841 6f4d00ee 2013-09-23 0intro u32int a;
1843 6f4d00ee 2013-09-23 0intro lpb = c->size / LabelSize;
1844 6f4d00ee 2013-09-23 0intro a = addr / lpb;
1845 6f4d00ee 2013-09-23 0intro b = cacheLocal(c, PartLabel, a, OReadOnly);
1846 6f4d00ee 2013-09-23 0intro if(b == nil){
1847 6f4d00ee 2013-09-23 0intro blockPut(b);
1848 6f4d00ee 2013-09-23 0intro return 0;
1851 6f4d00ee 2013-09-23 0intro if(!labelUnpack(l, b->data, addr%lpb)){
1852 6f4d00ee 2013-09-23 0intro blockPut(b);
1853 6f4d00ee 2013-09-23 0intro return 0;
1855 6f4d00ee 2013-09-23 0intro blockPut(b);
1856 6f4d00ee 2013-09-23 0intro return 1;
1860 6f4d00ee 2013-09-23 0intro * Process unlink queue.
1861 6f4d00ee 2013-09-23 0intro * Called with c->lk held.
1863 6f4d00ee 2013-09-23 0intro static void
1864 6f4d00ee 2013-09-23 0intro unlinkBody(Cache *c)
1866 6f4d00ee 2013-09-23 0intro BList *p;
1868 6f4d00ee 2013-09-23 0intro while(c->uhead != nil){
1869 6f4d00ee 2013-09-23 0intro p = c->uhead;
1870 6f4d00ee 2013-09-23 0intro c->uhead = p->next;
1871 4b576658 2013-09-23 0intro qunlock(&c->lk);
1872 6f4d00ee 2013-09-23 0intro doRemoveLink(c, p);
1873 4b576658 2013-09-23 0intro qlock(&c->lk);
1874 6f4d00ee 2013-09-23 0intro p->next = c->blfree;
1875 6f4d00ee 2013-09-23 0intro c->blfree = p;
1880 6f4d00ee 2013-09-23 0intro * Occasionally unlink the blocks on the cache unlink queue.
1882 6f4d00ee 2013-09-23 0intro static void
1883 6f4d00ee 2013-09-23 0intro unlinkThread(void *a)
1885 6f4d00ee 2013-09-23 0intro Cache *c = a;
1887 4b576658 2013-09-23 0intro threadsetname("unlink");
1889 4b576658 2013-09-23 0intro qlock(&c->lk);
1890 6f4d00ee 2013-09-23 0intro for(;;){
1891 4b576658 2013-09-23 0intro while(c->uhead == nil && c->die.l == nil)
1892 4b576658 2013-09-23 0intro rsleep(&c->unlink);
1893 4b576658 2013-09-23 0intro if(c->die.l != nil)
1895 6f4d00ee 2013-09-23 0intro unlinkBody(c);
1897 6f4d00ee 2013-09-23 0intro c->ref--;
1898 4b576658 2013-09-23 0intro rwakeup(&c->die);
1899 4b576658 2013-09-23 0intro qunlock(&c->lk);
1902 6f4d00ee 2013-09-23 0intro static int
1903 b29ebaab 2013-10-23 0intro baddrCmp(const void *a0, const void *a1)
1905 6f4d00ee 2013-09-23 0intro BAddr *b0, *b1;
1906 b29ebaab 2013-10-23 0intro b0 = (BAddr*)a0;
1907 b29ebaab 2013-10-23 0intro b1 = (BAddr*)a1;
1909 6f4d00ee 2013-09-23 0intro if(b0->part < b1->part)
1910 6f4d00ee 2013-09-23 0intro return -1;
1911 6f4d00ee 2013-09-23 0intro if(b0->part > b1->part)
1912 6f4d00ee 2013-09-23 0intro return 1;
1913 6f4d00ee 2013-09-23 0intro if(b0->addr < b1->addr)
1914 6f4d00ee 2013-09-23 0intro return -1;
1915 6f4d00ee 2013-09-23 0intro if(b0->addr > b1->addr)
1916 6f4d00ee 2013-09-23 0intro return 1;
1917 6f4d00ee 2013-09-23 0intro return 0;
1921 6f4d00ee 2013-09-23 0intro * Scan the block list for dirty blocks; add them to the list c->baddr.
1923 6f4d00ee 2013-09-23 0intro static void
1924 6f4d00ee 2013-09-23 0intro flushFill(Cache *c)
1926 6f4d00ee 2013-09-23 0intro int i, ndirty;
1927 6f4d00ee 2013-09-23 0intro BAddr *p;
1928 6f4d00ee 2013-09-23 0intro Block *b;
1930 4b576658 2013-09-23 0intro qlock(&c->lk);
1931 6f4d00ee 2013-09-23 0intro if(c->ndirty == 0){
1932 4b576658 2013-09-23 0intro qunlock(&c->lk);
1936 6f4d00ee 2013-09-23 0intro p = c->baddr;
1937 6f4d00ee 2013-09-23 0intro ndirty = 0;
1938 6f4d00ee 2013-09-23 0intro for(i=0; i<c->nblocks; i++){
1939 6f4d00ee 2013-09-23 0intro b = c->blocks + i;
1940 6f4d00ee 2013-09-23 0intro if(b->part == PartError)
1941 6f4d00ee 2013-09-23 0intro continue;
1942 6f4d00ee 2013-09-23 0intro if(b->iostate == BioDirty || b->iostate == BioWriting)
1943 6f4d00ee 2013-09-23 0intro ndirty++;
1944 6f4d00ee 2013-09-23 0intro if(b->iostate != BioDirty)
1945 6f4d00ee 2013-09-23 0intro continue;
1946 6f4d00ee 2013-09-23 0intro p->part = b->part;
1947 6f4d00ee 2013-09-23 0intro p->addr = b->addr;
1948 6f4d00ee 2013-09-23 0intro p->vers = b->vers;
1951 6f4d00ee 2013-09-23 0intro if(ndirty != c->ndirty){
1952 6f4d00ee 2013-09-23 0intro fprint(2, "%s: ndirty mismatch expected %d found %d\n",
1953 6f4d00ee 2013-09-23 0intro argv0, c->ndirty, ndirty);
1954 6f4d00ee 2013-09-23 0intro c->ndirty = ndirty;
1956 4b576658 2013-09-23 0intro qunlock(&c->lk);
1958 6f4d00ee 2013-09-23 0intro c->bw = p - c->baddr;
1959 6f4d00ee 2013-09-23 0intro qsort(c->baddr, c->bw, sizeof(BAddr), baddrCmp);
1963 6f4d00ee 2013-09-23 0intro * This is not thread safe, i.e. it can't be called from multiple threads.
1965 6f4d00ee 2013-09-23 0intro * It's okay how we use it, because it only gets called in
1966 6f4d00ee 2013-09-23 0intro * the flushThread. And cacheFree, but only after
1967 6f4d00ee 2013-09-23 0intro * cacheFree has killed off the flushThread.
1969 6f4d00ee 2013-09-23 0intro static int
1970 6f4d00ee 2013-09-23 0intro cacheFlushBlock(Cache *c)
1972 6f4d00ee 2013-09-23 0intro Block *b;
1973 6f4d00ee 2013-09-23 0intro BAddr *p;
1974 6f4d00ee 2013-09-23 0intro int lockfail, nfail;
1976 6f4d00ee 2013-09-23 0intro nfail = 0;
1977 6f4d00ee 2013-09-23 0intro for(;;){
1978 6f4d00ee 2013-09-23 0intro if(c->br == c->be){
1979 6f4d00ee 2013-09-23 0intro if(c->bw == 0 || c->bw == c->be)
1980 6f4d00ee 2013-09-23 0intro flushFill(c);
1981 6f4d00ee 2013-09-23 0intro c->br = 0;
1982 6f4d00ee 2013-09-23 0intro c->be = c->bw;
1983 6f4d00ee 2013-09-23 0intro c->bw = 0;
1984 6f4d00ee 2013-09-23 0intro c->nflush = 0;
1987 6f4d00ee 2013-09-23 0intro if(c->br == c->be)
1988 6f4d00ee 2013-09-23 0intro return 0;
1989 6f4d00ee 2013-09-23 0intro p = c->baddr + c->br;
1990 6f4d00ee 2013-09-23 0intro c->br++;
1991 6f4d00ee 2013-09-23 0intro b = _cacheLocalLookup(c, p->part, p->addr, p->vers, Nowaitlock,
1992 6f4d00ee 2013-09-23 0intro &lockfail);
1994 6f4d00ee 2013-09-23 0intro if(b && blockWrite(b, Nowaitlock)){
1995 6f4d00ee 2013-09-23 0intro c->nflush++;
1996 6f4d00ee 2013-09-23 0intro blockPut(b);
1997 6f4d00ee 2013-09-23 0intro return 1;
2000 6f4d00ee 2013-09-23 0intro blockPut(b);
2003 6f4d00ee 2013-09-23 0intro * Why didn't we write the block?
2006 6f4d00ee 2013-09-23 0intro /* Block already written out */
2007 6f4d00ee 2013-09-23 0intro if(b == nil && !lockfail)
2008 6f4d00ee 2013-09-23 0intro continue;
2010 6f4d00ee 2013-09-23 0intro /* Failed to acquire lock; sleep if happens a lot. */
2011 6f4d00ee 2013-09-23 0intro if(lockfail && ++nfail > 100){
2012 6f4d00ee 2013-09-23 0intro sleep(500);
2013 6f4d00ee 2013-09-23 0intro nfail = 0;
2015 6f4d00ee 2013-09-23 0intro /* Requeue block. */
2016 6f4d00ee 2013-09-23 0intro if(c->bw < c->be)
2017 6f4d00ee 2013-09-23 0intro c->baddr[c->bw++] = *p;
2022 6f4d00ee 2013-09-23 0intro * Occasionally flush dirty blocks from memory to the disk.
2024 6f4d00ee 2013-09-23 0intro static void
2025 6f4d00ee 2013-09-23 0intro flushThread(void *a)
2027 6f4d00ee 2013-09-23 0intro Cache *c = a;
2030 4b576658 2013-09-23 0intro threadsetname("flush");
2031 4b576658 2013-09-23 0intro qlock(&c->lk);
2032 4b576658 2013-09-23 0intro while(c->die.l == nil){
2033 4b576658 2013-09-23 0intro rsleep(&c->flush);
2034 4b576658 2013-09-23 0intro qunlock(&c->lk);
2035 6f4d00ee 2013-09-23 0intro for(i=0; i<FlushSize; i++)
2036 6f4d00ee 2013-09-23 0intro if(!cacheFlushBlock(c)){
2038 6f4d00ee 2013-09-23 0intro * If i==0, could be someone is waking us repeatedly
2039 6f4d00ee 2013-09-23 0intro * to flush the cache but there's no work to do.
2040 6f4d00ee 2013-09-23 0intro * Pause a little.
2042 6f4d00ee 2013-09-23 0intro if(i==0){
2043 6f4d00ee 2013-09-23 0intro // fprint(2, "%s: flushthread found "
2044 6f4d00ee 2013-09-23 0intro // "nothing to flush - %d dirty\n",
2045 6f4d00ee 2013-09-23 0intro // argv0, c->ndirty);
2046 6f4d00ee 2013-09-23 0intro sleep(250);
2050 6f4d00ee 2013-09-23 0intro if(i==0 && c->ndirty){
2052 6f4d00ee 2013-09-23 0intro * All the blocks are being written right now -- there's nothing to do.
2053 6f4d00ee 2013-09-23 0intro * We might be spinning with cacheFlush though -- he'll just keep
2054 6f4d00ee 2013-09-23 0intro * kicking us until c->ndirty goes down. Probably we should sleep
2055 6f4d00ee 2013-09-23 0intro * on something that the diskThread can kick, but for now we'll
2056 6f4d00ee 2013-09-23 0intro * just pause for a little while waiting for disks to finish.
2058 6f4d00ee 2013-09-23 0intro sleep(100);
2060 4b576658 2013-09-23 0intro qlock(&c->lk);
2061 4b576658 2013-09-23 0intro rwakeupall(&c->flushwait);
2063 6f4d00ee 2013-09-23 0intro c->ref--;
2064 4b576658 2013-09-23 0intro rwakeup(&c->die);
2065 4b576658 2013-09-23 0intro qunlock(&c->lk);
2069 6f4d00ee 2013-09-23 0intro * Flush the cache.
2072 6f4d00ee 2013-09-23 0intro cacheFlush(Cache *c, int wait)
2074 4b576658 2013-09-23 0intro qlock(&c->lk);
2075 6f4d00ee 2013-09-23 0intro if(wait){
2076 6f4d00ee 2013-09-23 0intro while(c->ndirty){
2077 6f4d00ee 2013-09-23 0intro // consPrint("cacheFlush: %d dirty blocks, uhead %p\n",
2078 6f4d00ee 2013-09-23 0intro // c->ndirty, c->uhead);
2079 4b576658 2013-09-23 0intro rwakeup(&c->flush);
2080 4b576658 2013-09-23 0intro rsleep(&c->flushwait);
2082 6f4d00ee 2013-09-23 0intro // consPrint("cacheFlush: done (uhead %p)\n", c->ndirty, c->uhead);
2083 6f4d00ee 2013-09-23 0intro }else if(c->ndirty)
2084 4b576658 2013-09-23 0intro rwakeup(&c->flush);
2085 4b576658 2013-09-23 0intro qunlock(&c->lk);
2089 6f4d00ee 2013-09-23 0intro * Kick the flushThread every 30 seconds.
2091 6f4d00ee 2013-09-23 0intro static void
2092 6f4d00ee 2013-09-23 0intro cacheSync(void *v)
2094 6f4d00ee 2013-09-23 0intro Cache *c;
2097 6f4d00ee 2013-09-23 0intro cacheFlush(c, 0);