commit d3c116bf7268bde4ceb3311eb4ed995fc4776487 from: Stefan Sperling date: Fri Oct 15 09:10:14 2021 UTC cache raw objects in order to speed up gotadmin pack commit - cc7a354afc4ae05de465ac2cf97b558fec5b271f commit + d3c116bf7268bde4ceb3311eb4ed995fc4776487 blob - 3857f54da2b2d09d1775f659e036b7fa820b9b3b blob + 0379a4f0e0a95fc21aceea4e5fa496a9073e2043 --- lib/got_lib_object.h +++ lib/got_lib_object.h @@ -42,6 +42,7 @@ struct got_raw_object { size_t hdrlen; size_t blocksize; uint8_t *read_buf; + int refcnt; /* > 0 if open and/or cached */ }; struct got_commit_object { @@ -106,7 +107,7 @@ const struct got_error *got_object_read_header_privsep struct got_object_id *, struct got_repository *, int); const struct got_error *got_object_open(struct got_object **, struct got_repository *, struct got_object_id *); -const struct got_error *got_object_raw_open(struct got_raw_object **, int, +const struct got_error *got_object_raw_open(struct got_raw_object **, int *, struct got_repository *, struct got_object_id *, size_t); void got_object_raw_rewind(struct got_raw_object *); size_t got_object_raw_get_hdrlen(struct got_raw_object *); blob - 4ac0ad0c7b384591131160ddd363b928e57c7d5e blob + b3a2cb74d16eb1cea91ea7e995e998721d06f5fc --- lib/got_lib_object_cache.h +++ lib/got_lib_object_cache.h @@ -19,6 +19,7 @@ enum got_object_cache_type { GOT_OBJECT_CACHE_TYPE_TREE, GOT_OBJECT_CACHE_TYPE_COMMIT, GOT_OBJECT_CACHE_TYPE_TAG, + GOT_OBJECT_CACHE_TYPE_RAW, }; struct got_object_cache_entry { @@ -28,6 +29,7 @@ struct got_object_cache_entry { struct got_tree_object *tree; struct got_commit_object *commit; struct got_tag_object *tag; + struct got_raw_object *raw; } data; }; blob - b99fe9fe68c49923f58a4b5c45969f4bbcded942 blob + 831cb967da0e76447d97b70ff058416fbf492de4 --- lib/got_lib_repository.h +++ lib/got_lib_repository.h @@ -83,6 +83,7 @@ struct got_repository { struct got_object_cache treecache; struct got_object_cache commitcache; struct got_object_cache tagcache; + struct got_object_cache rawcache; /* Settings read from Git configuration files. */ int gitconfig_repository_format_version; @@ -116,6 +117,10 @@ const struct got_error*got_repo_cache_tag(struct got_r struct got_object_id *, struct got_tag_object *); struct got_tag_object *got_repo_get_cached_tag(struct got_repository *, struct got_object_id *); +const struct got_error*got_repo_cache_raw_object(struct got_repository *, + struct got_object_id *, struct got_raw_object *); +struct got_raw_object *got_repo_get_cached_raw_object(struct got_repository *, + struct got_object_id *); int got_repo_is_packidx_filename(const char *, size_t); const struct got_error *got_repo_search_packidx(struct got_packidx **, int *, struct got_repository *, struct got_object_id *); blob - 5c98d7f542c6cc60b245e230796391a272d5a5ad blob + f8bff2f3fbb380407255d04ff28571cbb1084c6d --- lib/object.c +++ lib/object.c @@ -528,8 +528,9 @@ got_object_open(struct got_object **obj, struct got_re return got_repo_cache_object(repo, id, *obj); } +/* *outfd must be initialized to -1 by caller */ const struct got_error * -got_object_raw_open(struct got_raw_object **obj, int outfd, +got_object_raw_open(struct got_raw_object **obj, int *outfd, struct got_repository *repo, struct got_object_id *id, size_t blocksize) { const struct got_error *err = NULL; @@ -540,8 +541,18 @@ got_object_raw_open(struct got_raw_object **obj, int o size_t hdrlen = 0; char *path_packfile = NULL; - *obj = NULL; + *obj = got_repo_get_cached_raw_object(repo, id); + if (*obj != NULL) { + (*obj)->refcnt++; + return NULL; + } + if (*outfd == -1) { + *outfd = got_opentempfd(); + if (*outfd == -1) + return got_error_from_errno("got_opentempfd"); + } + err = got_repo_search_packidx(&packidx, &idx, repo, id); if (err == NULL) { struct got_pack *pack = NULL; @@ -559,7 +570,7 @@ got_object_raw_open(struct got_raw_object **obj, int o goto done; } err = read_packed_object_raw_privsep(&outbuf, &size, &hdrlen, - outfd, pack, packidx, idx, id); + *outfd, pack, packidx, idx, id); if (err) goto done; } else if (err->code == GOT_ERR_NO_OBJ) { @@ -568,7 +579,7 @@ got_object_raw_open(struct got_raw_object **obj, int o err = got_object_open_loose_fd(&fd, id, repo); if (err) goto done; - err = read_object_raw_privsep(&outbuf, &size, &hdrlen, outfd, + err = read_object_raw_privsep(&outbuf, &size, &hdrlen, *outfd, id, repo, fd); if (err) goto done; @@ -595,7 +606,7 @@ got_object_raw_open(struct got_raw_object **obj, int o (*obj)->data = outbuf; } else { struct stat sb; - if (fstat(outfd, &sb) == -1) { + if (fstat(*outfd, &sb) == -1) { err = got_error_from_errno("fstat"); goto done; } @@ -605,16 +616,18 @@ got_object_raw_open(struct got_raw_object **obj, int o goto done; } - (*obj)->f = fdopen(outfd, "r"); + (*obj)->f = fdopen(*outfd, "r"); if ((*obj)->f == NULL) { err = got_error_from_errno("fdopen"); goto done; } (*obj)->data = NULL; + *outfd = -1; } (*obj)->hdrlen = hdrlen; (*obj)->size = size; (*obj)->blocksize = blocksize; + err = got_repo_cache_raw_object(repo, id, *obj); done: free(path_packfile); if (err) { @@ -623,7 +636,8 @@ done: *obj = NULL; } free(outbuf); - } + } else + (*obj)->refcnt++; return err; } @@ -656,19 +670,6 @@ got_object_raw_read_block(size_t *outlenp, struct got_ return got_ferror(obj->f, GOT_ERR_IO); *outlenp = n; return NULL; -} - -const struct got_error * -got_object_raw_close(struct got_raw_object *obj) -{ - const struct got_error *err = NULL; - - free(obj->read_buf); - if (obj->f != NULL && fclose(obj->f) == EOF && err == NULL) - err = got_error_from_errno("fclose"); - free(obj->data); - free(obj); - return err; } const struct got_error * blob - 4ef1c3c68734fcfaa46bc785e8d103a58b652d01 blob + f3cfb97dc2422b59557ad5f258c44b0578303031 --- lib/object_cache.c +++ lib/object_cache.c @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -42,12 +43,15 @@ #define GOT_OBJECT_CACHE_SIZE_TREE 256 #define GOT_OBJECT_CACHE_SIZE_COMMIT 64 #define GOT_OBJECT_CACHE_SIZE_TAG 2048 +#define GOT_OBJECT_CACHE_SIZE_RAW 64 #define GOT_OBJECT_CACHE_MAX_ELEM_SIZE 1048576 /* 1 MB */ const struct got_error * got_object_cache_init(struct got_object_cache *cache, enum got_object_cache_type type) { + struct rlimit rl; + memset(cache, 0, sizeof(*cache)); cache->idset = got_object_idset_alloc(); @@ -68,6 +72,13 @@ got_object_cache_init(struct got_object_cache *cache, case GOT_OBJECT_CACHE_TYPE_TAG: cache->size = GOT_OBJECT_CACHE_SIZE_TAG; break; + case GOT_OBJECT_CACHE_TYPE_RAW: + if (getrlimit(RLIMIT_NOFILE, &rl) == -1) + return got_error_from_errno("getrlimit"); + cache->size = GOT_OBJECT_CACHE_SIZE_RAW; + if (cache->size > rl.rlim_cur / 16) + cache->size = rl.rlim_cur / 16; + break; } return NULL; } @@ -128,6 +139,12 @@ get_size_tag(struct got_tag_object *tag) return size; } +size_t +get_size_raw(struct got_raw_object *raw) +{ + return sizeof(*raw); +} + const struct got_error * got_object_cache_add(struct got_object_cache *cache, struct got_object_id *id, void *item) { @@ -149,6 +166,9 @@ got_object_cache_add(struct got_object_cache *cache, s case GOT_OBJECT_CACHE_TYPE_TAG: size = get_size_tag((struct got_tag_object *)item); break; + case GOT_OBJECT_CACHE_TYPE_RAW: + size = get_size_raw((struct got_raw_object *)item); + break; default: return got_error(GOT_ERR_OBJ_TYPE); } @@ -171,6 +191,9 @@ got_object_cache_add(struct got_object_cache *cache, s break; case GOT_OBJECT_CACHE_TYPE_TAG: fprintf(stderr, "tag"); + break; + case GOT_OBJECT_CACHE_TYPE_RAW: + fprintf(stderr, "raw"); break; } fprintf(stderr, " %s (%zd bytes; %zd MB)\n", id_str, size, @@ -200,6 +223,9 @@ got_object_cache_add(struct got_object_cache *cache, s case GOT_OBJECT_CACHE_TYPE_TAG: got_object_tag_close(ce->data.tag); break; + case GOT_OBJECT_CACHE_TYPE_RAW: + got_object_raw_close(ce->data.raw); + break; } free(ce); cache->cache_evict++; @@ -222,6 +248,9 @@ got_object_cache_add(struct got_object_cache *cache, s case GOT_OBJECT_CACHE_TYPE_TAG: ce->data.tag = (struct got_tag_object *)item; break; + case GOT_OBJECT_CACHE_TYPE_RAW: + ce->data.raw = (struct got_raw_object *)item; + break; } err = got_object_idset_add(cache->idset, id, ce); @@ -248,6 +277,8 @@ got_object_cache_get(struct got_object_cache *cache, s return ce->data.commit; case GOT_OBJECT_CACHE_TYPE_TAG: return ce->data.tag; + case GOT_OBJECT_CACHE_TYPE_RAW: + return ce->data.raw; } } @@ -275,6 +306,7 @@ check_refcount(struct got_object_id *id, void *data, v struct got_tree_object *tree; struct got_commit_object *commit; struct got_tag_object *tag; + struct got_raw_object *raw; char *id_str; if (got_object_id_str(&id_str, id) != NULL) @@ -309,6 +341,13 @@ check_refcount(struct got_object_id *id, void *data, v fprintf(stderr, "tag %s has %d unclaimed references\n", id_str, tag->refcnt - 1); break; + case GOT_OBJECT_CACHE_TYPE_RAW: + raw = ce->data.raw; + if (raw->refcnt == 1) + break; + fprintf(stderr, "raw %s has %d unclaimed references\n", + id_str, raw->refcnt - 1); + break; } free(id_str); return NULL; @@ -332,6 +371,9 @@ got_object_cache_close(struct got_object_cache *cache) case GOT_OBJECT_CACHE_TYPE_TAG: print_cache_stats(cache, "tag"); break; + case GOT_OBJECT_CACHE_TYPE_RAW: + print_cache_stats(cache, "raw"); + break; } got_object_idset_for_each(cache->idset, check_refcount, cache); blob - 3a483955358cbec0e4265a879e78817317bfd347 blob + f5a7e8d96b41d3c4b2f5699a936fa8bd32bf8118 --- lib/object_parse.c +++ lib/object_parse.c @@ -133,6 +133,25 @@ got_object_close(struct got_object *obj) free(obj); } +const struct got_error * +got_object_raw_close(struct got_raw_object *obj) +{ + const struct got_error *err = NULL; + + if (obj->refcnt > 0) { + obj->refcnt--; + if (obj->refcnt > 0) + return NULL; + } + + free(obj->read_buf); + if (obj->f != NULL && fclose(obj->f) == EOF && err == NULL) + err = got_error_from_errno("fclose"); + free(obj->data); + free(obj); + return err; +} + void got_object_qid_free(struct got_object_qid *qid) { blob - 8ab75df8dd3ab328895f4354591fe0d968fe613f blob + 88fa31a0498d4d53411e0111ccc521a5e0893dfb --- lib/pack_create.c +++ lib/pack_create.c @@ -195,18 +195,9 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in m->obj_type == GOT_OBJ_TYPE_TAG) continue; - if (outfd == -1) { - outfd = got_opentempfd(); - if (outfd == -1) { - err = got_error_from_errno("got_opentempfd"); - goto done; - } - } - err = got_object_raw_open(&raw, outfd, repo, &m->id, 8192); + err = got_object_raw_open(&raw, &outfd, repo, &m->id, 8192); if (err) goto done; - if (raw->data == NULL) - outfd = -1; /* outfd is now raw->f */ m->size = raw->size; err = got_deltify_init(&m->dtab, raw->f, raw->hdrlen, @@ -234,20 +225,10 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in base->obj_type != m->obj_type) continue; - if (outfd == -1) { - outfd = got_opentempfd(); - if (outfd == -1) { - err = got_error_from_errno( - "got_opentempfd"); - goto done; - } - } - err = got_object_raw_open(&base_raw, outfd, repo, + err = got_object_raw_open(&base_raw, &outfd, repo, &base->id, 8192); if (err) goto done; - if (base_raw->data == NULL) - outfd = -1; /* outfd is now base_raw->f */ err = got_deltify(&deltas, &ndeltas, raw->f, raw->hdrlen, raw->size + raw->hdrlen, base->dtab, base_raw->f, base_raw->hdrlen, @@ -1178,18 +1159,9 @@ genpack(uint8_t *pack_sha1, FILE *packfile, } m = meta[i]; m->off = ftello(packfile); - if (outfd == -1) { - outfd = got_opentempfd(); - if (outfd == -1) { - err = got_error_from_errno("got_opentempfd"); - goto done; - } - } - err = got_object_raw_open(&raw, outfd, repo, &m->id, 8192); + err = got_object_raw_open(&raw, &outfd, repo, &m->id, 8192); if (err) goto done; - if (raw->data == NULL) - outfd = -1; /* outfd is now raw->f */ if (m->deltas == NULL) { err = packhdr(&nh, buf, sizeof(buf), m->obj_type, raw->size); blob - bd802443593987f376a69bc5909a3e7497bb7297 blob + 3c4b83d77f6d8a35b74a2aeaffe36f60f8f0a7c3 --- lib/repository.c +++ lib/repository.c @@ -345,8 +345,35 @@ got_repo_get_cached_tag(struct got_repository *repo, s { return (struct got_tag_object *)got_object_cache_get( &repo->tagcache, id); +} + +const struct got_error * +got_repo_cache_raw_object(struct got_repository *repo, struct got_object_id *id, + struct got_raw_object *raw) +{ +#ifndef GOT_NO_OBJ_CACHE + const struct got_error *err = NULL; + err = got_object_cache_add(&repo->rawcache, id, raw); + if (err) { + if (err->code == GOT_ERR_OBJ_EXISTS || + err->code == GOT_ERR_OBJ_TOO_LARGE) + err = NULL; + return err; + } + raw->refcnt++; +#endif + return NULL; } + +struct got_raw_object * +got_repo_get_cached_raw_object(struct got_repository *repo, + struct got_object_id *id) +{ + return (struct got_raw_object *)got_object_cache_get(&repo->rawcache, id); +} + + static const struct got_error * open_repo(struct got_repository *repo, const char *path) { @@ -665,6 +692,10 @@ got_repo_open(struct got_repository **repop, const cha GOT_OBJECT_CACHE_TYPE_TAG); if (err) goto done; + err = got_object_cache_init(&repo->rawcache, + GOT_OBJECT_CACHE_TYPE_RAW); + if (err) + goto done; repo->pack_cache_size = GOT_PACK_CACHE_SIZE; if (repo->pack_cache_size > rl.rlim_cur / 8) @@ -761,6 +792,7 @@ got_repo_close(struct got_repository *repo) got_object_cache_close(&repo->treecache); got_object_cache_close(&repo->commitcache); got_object_cache_close(&repo->tagcache); + got_object_cache_close(&repo->rawcache); for (i = 0; i < nitems(repo->privsep_children); i++) { if (repo->privsep_children[i].imsg_fd == -1)