commit 4277420a1f45ca37345733120cbe89d19c550f92 from: Stefan Sperling date: Sat Jun 29 10:58:30 2019 UTC speed up matching of abbreviated commit IDs in pack files commit - 45f2ef4ca88b7a468ccae02543aa4b0baa662dd2 commit + 4277420a1f45ca37345733120cbe89d19c550f92 blob - 7af741d237c7cd84aca6789c32dc4cdf3bb5237a blob + a5191eb2b7c3a8299b81d38016569cf8582cbfd5 --- lib/got_lib_pack.h +++ lib/got_lib_pack.h @@ -159,12 +159,10 @@ struct got_packfile_obj_data { const struct got_error *got_packidx_init_hdr(struct got_packidx *, int); const struct got_error *got_packidx_open(struct got_packidx **, const char *, int); -const struct got_error* got_packidx_close(struct got_packidx *); +const struct got_error *got_packidx_close(struct got_packidx *); int got_packidx_get_object_idx(struct got_packidx *, struct got_object_id *); -typedef const struct got_error *(*got_packidx_for_each_id_cb)(void *, - struct got_object_id *); -const struct got_error *got_packidx_for_each_id(struct got_packidx *, - got_packidx_for_each_id_cb cb, void *); +const struct got_error *got_packidx_match_id_str_prefix(struct got_object_id **, + struct got_packidx *, const char *); const struct got_error *got_packfile_open_object(struct got_object **, struct got_pack *, struct got_packidx *, int, struct got_object_id *); blob - d207ee390d6e7f6252e01034122dc9e112bfd66d blob + 141cb2590fa14834c63381929a0a7438ae3e4556 --- lib/got_lib_sha1.h +++ lib/got_lib_sha1.h @@ -16,5 +16,6 @@ #define GOT_SHA1_STRING_ZERO "0000000000000000000000000000000000000000" +int got_parse_xdigit(uint8_t *, const char *); int got_parse_sha1_digest(uint8_t *, const char *); char *got_sha1_digest_to_str(const uint8_t *, char *, size_t); blob - 269e6fa733cf1a2955a5e341a8d46504352492c9 blob + b2200e9c73c6c68c0b87f34944ee0ab24bf48ed6 --- lib/pack.c +++ lib/pack.c @@ -444,24 +444,57 @@ got_packidx_get_object_idx(struct got_packidx *packidx } const struct got_error * -got_packidx_for_each_id(struct got_packidx *packidx, - got_packidx_for_each_id_cb cb, void *cb_arg) +got_packidx_match_id_str_prefix(struct got_object_id **unique_id, + struct got_packidx *packidx, const char *id_str_prefix) { - const struct got_error *err = NULL; - uint32_t nobj = betoh32(packidx->hdr.fanout_table[0xff]); + u_int8_t id0; + uint32_t totobj = betoh32(packidx->hdr.fanout_table[0xff]); + char hex[3]; + size_t prefix_len = strlen(id_str_prefix); struct got_packidx_object_id *oid; - struct got_object_id id; int i; - for (i = 0; i < nobj; i++) { - oid = &packidx->hdr.sorted_ids[i]; - memcpy(&id, oid->sha1, SHA1_DIGEST_LENGTH); - err = cb(cb_arg, &id); - if (err) + *unique_id = NULL; + + if (prefix_len < 2) + return got_error(GOT_ERR_BAD_OBJ_ID_STR); + + hex[0] = id_str_prefix[0]; + hex[1] = id_str_prefix[1]; + hex[2] = '\0'; + if (!got_parse_xdigit(&id0, hex)) + return got_error(GOT_ERR_BAD_OBJ_ID_STR); + + i = betoh32(packidx->hdr.fanout_table[id0 - 1]); + if (i == 0) + return NULL; + + oid = &packidx->hdr.sorted_ids[i]; + while (i < totobj && oid->sha1[0] == id0) { + char id_str[SHA1_DIGEST_STRING_LENGTH]; + int cmp; + + if (!got_sha1_digest_to_str(oid->sha1, id_str, sizeof(id_str))) + return got_error(GOT_ERR_NO_SPACE); + + cmp = strncmp(id_str, id_str_prefix, prefix_len); + if (cmp < 0) { + oid = &packidx->hdr.sorted_ids[++i]; + continue; + } else if (cmp > 0) break; + + if (*unique_id != NULL) + return got_error(GOT_ERR_AMBIGUOUS_ID); + *unique_id = malloc(sizeof(**unique_id)); + if (*unique_id == NULL) + return got_error_from_errno("malloc"); + memcpy((*unique_id)->sha1, oid->sha1, SHA1_DIGEST_LENGTH); + + oid = &packidx->hdr.sorted_ids[++i]; } - return err; + return NULL; } const struct got_error * blob - f4cf79809502ced018e0ec153ae2240982059eb5 blob + 7eafcbdfc6475382a3c4e3036819fe4661d0da42 --- lib/repository.c +++ lib/repository.c @@ -878,47 +878,16 @@ got_repo_init(const char *repo_path) return NULL; } -struct match_packidx_id_args { - const char *id_str_prefix; - struct got_object_id *unique_id; -}; - static const struct got_error * -match_packidx_id(void *arg, struct got_object_id *id) -{ - const struct got_error *err; - struct match_packidx_id_args *a = arg; - char *id_str; - - err = got_object_id_str(&id_str, id); - if (err) - return err; - - if (strncmp(a->id_str_prefix, id_str, strlen(a->id_str_prefix)) == 0) { - if (a->unique_id == NULL) { - a->unique_id = got_object_id_dup(id); - if (a->unique_id == NULL) - err = got_error_from_errno("got_object_id_dup"); - } - } - - free(id_str); - return err; -} - -static const struct got_error * -match_packed_object_id_prefix(struct got_object_id **unique_id, +match_packed_object(struct got_object_id **unique_id, struct got_repository *repo, const char *id_str_prefix) { const struct got_error *err = NULL; - struct match_packidx_id_args args; char *path_packdir; DIR *packdir; struct dirent *dent; char *path_packidx; - *unique_id = NULL; - path_packdir = got_repo_get_path_objects_pack(repo); if (path_packdir == NULL) return got_error_from_errno("got_repo_get_path_objects_pack"); @@ -931,7 +900,7 @@ match_packed_object_id_prefix(struct got_object_id **u while ((dent = readdir(packdir)) != NULL) { struct got_packidx *packidx; - const struct got_error *cb_err; + struct got_object_id *unique_id_in_pack; if (!is_packidx_filename(dent->d_name, dent->d_namlen)) continue; @@ -939,30 +908,31 @@ match_packed_object_id_prefix(struct got_object_id **u if (asprintf(&path_packidx, "%s/%s", path_packdir, dent->d_name) == -1) { err = got_error_from_errno("asprintf"); - goto done; + break; } err = got_packidx_open(&packidx, path_packidx, 0); free(path_packidx); if (err) - goto done; + break; - args.id_str_prefix = id_str_prefix; - args.unique_id = NULL; - cb_err = got_packidx_for_each_id(packidx, match_packidx_id, - &args); + err = got_packidx_match_id_str_prefix(&unique_id_in_pack, + packidx, id_str_prefix); + if (err) { + got_packidx_close(packidx); + break; + } err = got_packidx_close(packidx); - if (err) + if (err) { + free(unique_id_in_pack); break; - if (cb_err) { - err = cb_err; - break; } - if (args.unique_id) { - if (*unique_id == NULL) - *unique_id = args.unique_id; - else { + if (unique_id_in_pack) { + if (*unique_id == NULL) { + *unique_id = unique_id_in_pack; + } else { + free(unique_id_in_pack); err = got_error(GOT_ERR_AMBIGUOUS_ID); break; } @@ -980,7 +950,7 @@ done: } static const struct got_error * -match_object_id(struct got_object_id **unique_id, const char *path_objects, +match_loose_object(struct got_object_id **unique_id, const char *path_objects, const char *object_dir, const char *id_str_prefix, struct got_repository *repo) { @@ -989,20 +959,7 @@ match_object_id(struct got_object_id **unique_id, cons DIR *dir = NULL; struct dirent *dent; struct got_object_id id; - int i; - for (i = 0; i < strlen(id_str_prefix); i++) { - if (isxdigit((unsigned char)id_str_prefix[i])) - continue; - return got_error(GOT_ERR_BAD_OBJ_ID_STR); - } - - /* Search pack index. */ - err = match_packed_object_id_prefix(unique_id, repo, id_str_prefix); - if (err) - return err; - - /* Search on-disk directories. */ if (asprintf(&path, "%s/%s", path_objects, object_dir) == -1) { err = got_error_from_errno("asprintf"); goto done; @@ -1010,6 +967,10 @@ match_object_id(struct got_object_id **unique_id, cons dir = opendir(path); if (dir == NULL) { + if (errno == ENOENT) { + err = NULL; + goto done; + } err = got_error_from_errno2("opendir", path); goto done; } @@ -1046,9 +1007,6 @@ match_object_id(struct got_object_id **unique_id, cons goto done; } } - - if (err == NULL && *unique_id == NULL) - err = got_error(GOT_ERR_NO_OBJ); done: if (err) { free(*unique_id); @@ -1061,20 +1019,32 @@ done: } const struct got_error * -got_repo_match_object_id_prefix(struct got_object_id **id, const char *id_str_prefix, - struct got_repository *repo) +got_repo_match_object_id_prefix(struct got_object_id **id, + const char *id_str_prefix, struct got_repository *repo) { const struct got_error *err = NULL; char *path_objects = got_repo_get_path_objects(repo); char *object_dir = NULL; size_t len; + int i; + *id = NULL; + + for (i = 0; i < strlen(id_str_prefix); i++) { + if (isxdigit((unsigned char)id_str_prefix[i])) + continue; + return got_error(GOT_ERR_BAD_OBJ_ID_STR); + } + len = strlen(id_str_prefix); if (len >= 2) { + err = match_packed_object(id, repo, id_str_prefix); + if (err) + return err; object_dir = strndup(id_str_prefix, 2); if (object_dir == NULL) return got_error_from_errno("strdup"); - err = match_object_id(id, path_objects, object_dir, + err = match_loose_object(id, path_objects, object_dir, id_str_prefix, repo); } else if (len == 1) { int i; @@ -1082,7 +1052,10 @@ got_repo_match_object_id_prefix(struct got_object_id * if (asprintf(&object_dir, "%s%.1x", id_str_prefix, i) == -1) return got_error_from_errno("asprintf"); - err = match_object_id(id, path_objects, object_dir, + err = match_packed_object(id, repo, object_dir); + if (err) + return err; + err = match_loose_object(id, path_objects, object_dir, id_str_prefix, repo); if (err) break; @@ -1091,5 +1064,11 @@ got_repo_match_object_id_prefix(struct got_object_id * return got_error(GOT_ERR_BAD_OBJ_ID_STR); free(object_dir); + if (err) { + free(*id); + *id = NULL; + } else if (*id == NULL) + err = got_error(GOT_ERR_NO_OBJ); + return err; } blob - d5e11f068c05988fcb1d445cfd83786de2c585d5 blob + 1f25f1bb9dff77000b14a85bc42e8bd04577d1a3 --- lib/sha1.c +++ lib/sha1.c @@ -23,8 +23,8 @@ #include "got_lib_sha1.h" -static int -parse_xdigit(uint8_t *val, const char *hex) +int +got_parse_xdigit(uint8_t *val, const char *hex) { char *ep; long lval; @@ -54,7 +54,7 @@ got_parse_sha1_digest(uint8_t *digest, const char *lin hex[j] = *line; line++; } - if (!parse_xdigit(&b, hex)) + if (!got_parse_xdigit(&b, hex)) return 0; digest[i] = b; }