commit 8da9e5f4a4b7b369d617fdceb6e9b715f8d6d121 from: Stefan Sperling date: Sat Jan 12 17:39:19 2019 UTC implement checkout+update as single-pass diff between file index and tree commit - c1669e2e5fb4f6007afe46318e82a4e0de689390 commit + 8da9e5f4a4b7b369d617fdceb6e9b715f8d6d121 blob - 155fa1f5fc5510b74867022977ae2fb902fba6a0 blob + dd1d1db9598a09e3df467644273c623ca483d674 --- lib/fileindex.c +++ lib/fileindex.c @@ -26,6 +26,7 @@ #include #include "got_error.h" +#include "got_object.h" #include "got_lib_path.h" #include "got_lib_fileindex.h" @@ -518,5 +519,171 @@ got_fileindex_read(struct got_fileindex *fileindex, FI return NULL; } + +static int +in_same_subdir(struct got_fileindex_entry *ie, const char *parent_path, + struct got_tree_entry *te) +{ + size_t parent_len = strlen(parent_path); + size_t te_name_len = strlen(te->name); + char *ie_name; + + if (!got_path_is_child(ie->path, parent_path, parent_len)) + return 0; + ie_name = ie->path + parent_len; + while (ie_name[0] == '/') + ie_name++; + if (strncmp(ie_name, te->name, te_name_len) != 0) + return 0; + if (ie_name[te_name_len] == '/') + return 0; + + return 1; +} + +static int +cmp_entries(struct got_fileindex_entry *ie, const char *parent_path, + struct got_tree_entry *te) +{ + size_t parent_len = strlen(parent_path); + char *ie_name; + + if (!in_same_subdir(ie, parent_path, te)) { + if (parent_path[0]) + return got_compare_paths(ie->path, parent_path); + return got_compare_paths(ie->path, te->name); + } + + ie_name = ie->path + parent_len; + while (ie_name[0] == '/') + ie_name++; + + return got_compare_paths(ie_name, te->name); +} + +static const struct got_error * +diff_fileindex_tree(struct got_fileindex *, struct got_fileindex_entry **, + struct got_tree_object *, const char *, struct got_repository *, + struct got_fileindex_diff_cb *, void *); + +static const struct got_error * +walk_tree(struct got_tree_entry **next, struct got_fileindex *fileindex, + struct got_fileindex_entry **ie, struct got_tree_entry *te, + const char *path, struct got_repository *repo, + struct got_fileindex_diff_cb *cb, void *cb_arg) +{ + const struct got_error *err = NULL; + + if (te && S_ISREG(te->mode)) { + *next = SIMPLEQ_NEXT(te, entry); + return NULL; + } + + while (te && S_ISDIR(te->mode)) { + char *subpath; + struct got_tree_object *subtree; + + if (asprintf(&subpath, "%s%s%s", path, + path[0] == '\0' ? "" : "/", te->name) == -1) + return got_error_from_errno(); + + err = got_object_open_as_tree(&subtree, repo, te->id); + if (err) { + free(subpath); + return err; + } + + if (*ie == NULL || !in_same_subdir(*ie, path, te)) { + err = cb->diff_new(cb_arg, te, path); + if (err) + return err; + } + + err = diff_fileindex_tree(fileindex, ie, subtree, + subpath, repo, cb, cb_arg); + free(subpath); + got_object_tree_close(subtree); + if (err) + return err; + te = SIMPLEQ_NEXT(te, entry); + } + + *next = te; + return NULL; +} + +static const struct got_error * +diff_fileindex_tree(struct got_fileindex *fileindex, + struct got_fileindex_entry **ie, struct got_tree_object *tree, + const char *path, struct got_repository *repo, + struct got_fileindex_diff_cb *cb, void *cb_arg) +{ + const struct got_error *err = NULL; + struct got_tree_entry *te = NULL; + size_t path_len = strlen(path); + const struct got_tree_entries *entries; + struct got_fileindex_entry *next; + + entries = got_object_tree_get_entries(tree); + te = SIMPLEQ_FIRST(&entries->head); + do { + if (te && *ie) { + int cmp = cmp_entries(*ie, path, te); + if (cmp == 0) { + err = cb->diff_old_new(cb_arg, *ie, te, + path); + if (err) + break; + *ie = RB_NEXT(got_fileindex_tree, + &fileindex->entries, *ie); + err = walk_tree(&te, fileindex, ie, te, + path, repo, cb, cb_arg); + } else if (cmp < 0) { + next = RB_NEXT(got_fileindex_tree, + &fileindex->entries, *ie); + err = cb->diff_old(cb_arg, *ie, path); + if (err) + break; + *ie = next; + } else { + err = cb->diff_new(cb_arg, te, path); + if (err) + break; + err = walk_tree(&te, fileindex, ie, te, + path, repo, cb, cb_arg); + } + if (err) + break; + } else if (*ie) { + next = RB_NEXT(got_fileindex_tree, + &fileindex->entries, *ie); + err = cb->diff_old(cb_arg, *ie, path); + if (err) + break; + *ie = next; + } else if (te) { + err = cb->diff_new(cb_arg, te, path); + if (err) + break; + err = walk_tree(&te, fileindex, ie, te, path, repo, cb, + cb_arg); + if (err) + break; + } + } while ((*ie && got_path_is_child((*ie)->path, path, path_len)) || te); + + return err; +} + +const struct got_error * +got_fileindex_diff_tree(struct got_fileindex *fileindex, + struct got_tree_object *tree, struct got_repository *repo, + struct got_fileindex_diff_cb *cb, void *cb_arg) +{ + struct got_fileindex_entry *min; + min = RB_MIN(got_fileindex_tree, &fileindex->entries); + return diff_fileindex_tree(fileindex, &min, tree, "", repo, cb, cb_arg); +} + RB_GENERATE(got_fileindex_tree, got_fileindex_entry, entry, got_fileindex_cmp); blob - 3291b6aed6248fb6f4ab7108567a32208b6bc876 blob + 791dba0cf4be882a105d7d91c47c94691d177dc5 --- lib/got_lib_fileindex.h +++ lib/got_lib_fileindex.h @@ -112,3 +112,19 @@ typedef const struct got_error *(*got_fileindex_cb)(vo struct got_fileindex_entry *); const struct got_error *got_fileindex_for_each_entry_safe( struct got_fileindex *, got_fileindex_cb cb, void *); + +typedef const struct got_error *(*got_fileindex_diff_old_new_cb)(void *, + struct got_fileindex_entry *, struct got_tree_entry *, const char *); +typedef const struct got_error *(*got_fileindex_diff_old_cb)(void *, + struct got_fileindex_entry *, const char *); +typedef const struct got_error *(*got_fileindex_diff_new_cb)(void *, + struct got_tree_entry *, const char *); +struct got_fileindex_diff_cb { + got_fileindex_diff_old_new_cb diff_old_new; + got_fileindex_diff_old_cb diff_old; + got_fileindex_diff_new_cb diff_new; +}; + +const struct got_error *got_fileindex_diff_tree(struct got_fileindex *, + struct got_tree_object *, struct got_repository *, + struct got_fileindex_diff_cb *, void *); blob - 6f0f5dc9245062ceb219c057933f229d2e7c6c0b blob + a4f547bfffca9cd4b6dd169e2f1553e18d3cd663 --- lib/got_lib_path.h +++ lib/got_lib_path.h @@ -52,6 +52,9 @@ const struct got_error *got_path_skip_common_ancestor( /* Determine whether a path points to the root directory "/" . */ int got_path_is_root_dir(const char *); +/* Determine whether a path is a path-wise child of another path. */ +int got_path_is_child(const char *, const char *, size_t); + /* * Like strcmp() but orders children in subdirectories directly after * their parents. blob - a57cfdeeba9be0ab1803758a4fadd15c64108606 blob + ee46c557ef42afca2357843d8405531b3af0a413 --- lib/path.c +++ lib/path.c @@ -145,6 +145,20 @@ got_path_is_root_dir(const char *path) } int +got_path_is_child(const char *child, const char *parent, size_t parent_len) +{ + if (parent_len == 0) + return 1; + + if (strncmp(parent, child, parent_len) != 0) + return 0; + if (child[parent_len] != '/') + return 0; + + return 1; +} + +int got_compare_paths(const char *path1, const char *path2) { size_t len1 = strlen(path1); blob - ff90b0e46ae236d0a572b6bf0c149df9bdfb55b4 blob + cedb4c631ede7a972c61fdd54ecbacaf59798356 --- lib/worktree.c +++ lib/worktree.c @@ -504,24 +504,14 @@ lock_worktree(struct got_worktree *worktree, int opera return (errno == EWOULDBLOCK ? got_error(GOT_ERR_WORKTREE_BUSY) : got_error_from_errno()); return NULL; -} - -static const char * -apply_path_prefix(struct got_worktree *worktree, const char *path) -{ - const char *p = path; - p += strlen(worktree->path_prefix); - if (*p == '/') - p++; - return p; } static const struct got_error * -blob_checkout(struct got_worktree *worktree, struct got_fileindex *fileindex, +install_blob(struct got_worktree *worktree, struct got_fileindex *fileindex, struct got_fileindex_entry *entry, const char *path, - struct got_blob_object *blob, struct got_repository *repo, - got_worktree_checkout_cb progress_cb, void *progress_arg, - const char *progress_path) + struct got_blob_object *blob, + struct got_repository *repo, got_worktree_checkout_cb progress_cb, + void *progress_arg) { const struct got_error *err = NULL; char *ondisk_path; @@ -530,8 +520,7 @@ blob_checkout(struct got_worktree *worktree, struct go int update = 0; char *tmppath = NULL; - if (asprintf(&ondisk_path, "%s/%s", worktree->root_path, - apply_path_prefix(worktree, path)) == -1) + if (asprintf(&ondisk_path, "%s/%s", worktree->root_path, path) == -1) return got_error_from_errno(); fd = open(ondisk_path, O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW, @@ -559,7 +548,7 @@ blob_checkout(struct got_worktree *worktree, struct go } (*progress_cb)(progress_arg, - update ? GOT_STATUS_UPDATE : GOT_STATUS_ADD, progress_path); + update ? GOT_STATUS_UPDATE : GOT_STATUS_ADD, path); hdrlen = got_object_blob_get_hdrlen(blob); do { @@ -595,8 +584,7 @@ blob_checkout(struct got_worktree *worktree, struct go blob->id.sha1, worktree->base_commit_id->sha1); else { err = got_fileindex_entry_alloc(&entry, ondisk_path, - apply_path_prefix(worktree, path), blob->id.sha1, - worktree->base_commit_id->sha1); + path, blob->id.sha1, worktree->base_commit_id->sha1); if (err) goto done; err = got_fileindex_entry_add(fileindex, entry); @@ -617,8 +605,7 @@ add_dir_on_disk(struct got_worktree *worktree, const c const struct got_error *err = NULL; char *abspath; - if (asprintf(&abspath, "%s/%s", worktree->root_path, - apply_path_prefix(worktree, path)) == -1) + if (asprintf(&abspath, "%s/%s", worktree->root_path, path) == -1) return got_error_from_errno(); /* XXX queue work rather than editing disk directly? */ @@ -647,145 +634,35 @@ done: } static const struct got_error * -tree_checkout(struct got_worktree *, struct got_fileindex *, - struct got_tree_object *, const char *, struct got_repository *, - got_worktree_checkout_cb progress_cb, void *progress_arg, - got_worktree_cancel_cb cancel_cb, void *cancel_arg); - -static const struct got_error * -tree_checkout_entry(struct got_worktree *worktree, - struct got_fileindex *fileindex, struct got_tree_entry *te, - const char *parent, struct got_repository *repo, - got_worktree_checkout_cb progress_cb, void *progress_arg, - got_worktree_cancel_cb cancel_cb, void *cancel_arg) +update_blob(struct got_worktree *worktree, + struct got_fileindex *fileindex, struct got_fileindex_entry *ie, + struct got_tree_entry *te, const char *path, + struct got_repository *repo, got_worktree_checkout_cb progress_cb, + void *progress_arg, got_worktree_cancel_cb cancel_cb, void *cancel_arg) { const struct got_error *err = NULL; - struct got_object *obj = NULL; struct got_blob_object *blob = NULL; - struct got_fileindex_entry *entry = NULL; - struct got_tree_object *tree = NULL; - char *path = NULL; - char *progress_path = NULL; - size_t len; - if (parent[0] == '/' && parent[1] == '\0') - parent = ""; - if (asprintf(&path, "%s/%s", parent, te->name) == -1) - return got_error_from_errno(); - - /* Skip this entry if it is outside of our path prefix. */ - len = MIN(strlen(worktree->path_prefix), strlen(path)); - if (strncmp(path, worktree->path_prefix, len) != 0) { - free(path); - return NULL; - } - - err = got_object_open(&obj, repo, te->id); - if (err) - goto done; - - progress_path = path; - if (strncmp(progress_path, worktree->path_prefix, len) == 0) - progress_path += len; - - switch (obj->type) { - case GOT_OBJ_TYPE_BLOB: - if (strlen(worktree->path_prefix) >= strlen(path)) - break; - entry = got_fileindex_entry_get(fileindex, - apply_path_prefix(worktree, path)); - if (entry && - memcmp(entry->commit_sha1, worktree->base_commit_id->sha1, + if (ie) { + if (memcmp(ie->commit_sha1, worktree->base_commit_id->sha1, SHA1_DIGEST_LENGTH) == 0) { (*progress_cb)(progress_arg, GOT_STATUS_EXISTS, - progress_path); - break; + path); + return NULL; } - if (entry && memcmp(entry->blob_sha1, obj->id.sha1, - SHA1_DIGEST_LENGTH) == 0) - break; - err = got_object_blob_open(&blob, repo, obj, 8192); - if (err) - goto done; - err = blob_checkout(worktree, fileindex, entry, path, blob, - repo, progress_cb, progress_arg, progress_path); - break; - case GOT_OBJ_TYPE_TREE: - if (strlen(worktree->path_prefix) < strlen(path)) { - err = add_dir_on_disk(worktree, path); - if (err) - break; - } - err = got_object_tree_open(&tree, repo, obj); - if (err) - goto done; - /* XXX infinite recursion possible */ - err = tree_checkout(worktree, fileindex, tree, path, repo, - progress_cb, progress_arg, cancel_cb, cancel_arg); - break; - default: - break; + if (memcmp(ie->blob_sha1, + te->id->sha1, SHA1_DIGEST_LENGTH) == 0) + return NULL; } -done: - if (blob) - got_object_blob_close(blob); - if (tree) - got_object_tree_close(tree); - if (obj) - got_object_close(obj); - free(path); - return err; -} - -struct collect_missing_entry_args { - struct got_fileindex *fileindex; - const struct got_tree_entries *entries; - struct got_fileindex_tree missing_entries; - const char *current_subdir; -}; - -static const struct got_error * -collect_missing_file(void *args, struct got_fileindex_entry *entry) -{ - struct collect_missing_entry_args *a = args; - char *start, *end; - ptrdiff_t len; - struct got_tree_entry *te; - int found = 0; - - if (a->current_subdir[0] != '\0' && - strncmp(a->current_subdir, entry->path, - strlen(a->current_subdir)) != 0) - return NULL; - - start = entry->path + strlen(a->current_subdir); - if (a->current_subdir[0] != '\0' && start[0] != '/') - return NULL; - while (start[0] == '/') - start++; - end = strchr(start, '/'); - if (end == NULL) { - end = strchr(start, '\0'); - if (end == NULL) - return got_error(GOT_ERR_BAD_PATH); - } - len = end - start; - - SIMPLEQ_FOREACH(te, &a->entries->head, entry) { - if (strncmp(start, te->name, len) == 0 && - te->name[len] == '\0') { - found = 1; - break; - } - } + err = got_object_open_as_blob(&blob, repo, te->id, 8192); + if (err) + return err; - if (!found) { - got_fileindex_entry_remove(a->fileindex, entry); - RB_INSERT(got_fileindex_tree, &a->missing_entries, entry); - } - - return NULL; + err = install_blob(worktree, fileindex, ie, path, blob, repo, + progress_cb, progress_arg); + got_object_blob_close(blob); + return err; } static const struct got_error * @@ -814,91 +691,62 @@ remove_ondisk_file(const char *root_path, const char * return err; } -/* Remove files which exist in the file index but not in the tree. */ +struct diff_cb_arg { + struct got_fileindex *fileindex; + struct got_worktree *worktree; + struct got_repository *repo; + got_worktree_checkout_cb progress_cb; + void *progress_arg; + got_worktree_cancel_cb cancel_cb; + void *cancel_arg; +}; + static const struct got_error * -remove_missing_files(struct got_worktree *worktree, const char *path, - struct got_fileindex *fileindex, const struct got_tree_entries *entries, - got_worktree_checkout_cb progress_cb, void *progress_arg, - got_worktree_cancel_cb cancel_cb, void *cancel_arg) +diff_old_new(void *arg, struct got_fileindex_entry *ie, + struct got_tree_entry *te, const char *parent_path) { - const struct got_error *err = NULL; - struct collect_missing_entry_args a; - struct got_fileindex_entry *entry, *tmp; + struct diff_cb_arg *a = arg; - a.fileindex = fileindex; - a.entries = entries; - RB_INIT(&a.missing_entries); - a.current_subdir = apply_path_prefix(worktree, path); - err = got_fileindex_for_each_entry_safe(fileindex, - collect_missing_file, &a); - if (err) - return err; + return update_blob(a->worktree, a->fileindex, ie, te, + ie->path, a->repo, a->progress_cb, a->progress_arg, + a->cancel_cb, a->cancel_arg); +} - RB_FOREACH_SAFE(entry, got_fileindex_tree, &a.missing_entries, tmp) { - if (cancel_cb) { - err = (*cancel_cb)(cancel_arg); - if (err) - break; - } +static const struct got_error * +diff_old(void *arg, struct got_fileindex_entry *ie, const char *parent_path) +{ + const struct got_error *err; + struct diff_cb_arg *a = arg; - (*progress_cb)(progress_arg, GOT_STATUS_DELETE, entry->path); - err = remove_ondisk_file(worktree->root_path, entry->path); - if (err) - break; + (*a->progress_cb)(a->progress_arg, GOT_STATUS_DELETE, ie->path); - RB_REMOVE(got_fileindex_tree, &a.missing_entries, entry); - got_fileindex_entry_free(entry); - } - - if (err) { - /* Add back any entries which weeren't deleted from disk. */ - RB_FOREACH(entry, got_fileindex_tree, &a.missing_entries) { - if (got_fileindex_entry_add(fileindex, entry) != NULL) - break; - } - } - - return err; + err = remove_ondisk_file(a->worktree->root_path, ie->path); + if (err) + return err; + got_fileindex_entry_remove(a->fileindex, ie); + return NULL; } static const struct got_error * -tree_checkout(struct got_worktree *worktree, - struct got_fileindex *fileindex, struct got_tree_object *tree, - const char *path, struct got_repository *repo, - got_worktree_checkout_cb progress_cb, void *progress_arg, - got_worktree_cancel_cb cancel_cb, void *cancel_arg) +diff_new(void *arg, struct got_tree_entry *te, const char *parent_path) { - const struct got_error *err = NULL; - const struct got_tree_entries *entries; - struct got_tree_entry *te; - size_t len; + struct diff_cb_arg *a = arg; + const struct got_error *err; + char *path; - /* Skip this tree if it shares no path components with the prefix. */ - len = MIN(strlen(worktree->path_prefix), strlen(path)); - if (strncmp(path, worktree->path_prefix, len) != 0) - return NULL; + if (asprintf(&path, "%s%s%s", parent_path, + parent_path[0] ? "/" : "", te->name) + == -1) + return got_error_from_errno(); - entries = got_object_tree_get_entries(tree); - SIMPLEQ_FOREACH(te, &entries->head, entry) { - if (cancel_cb) { - err = (*cancel_cb)(cancel_arg); - if (err) - return err; - } - err = tree_checkout_entry(worktree, fileindex, te, path, repo, - progress_cb, progress_arg, cancel_cb, cancel_arg); - if (err) - return err; - } + if (S_ISDIR(te->mode)) + err = add_dir_on_disk(a->worktree, path); + else + err = update_blob(a->worktree, a->fileindex, NULL, te, path, + a->repo, a->progress_cb, a->progress_arg, + a->cancel_cb, a->cancel_arg); - len = strlen(worktree->path_prefix); - if (strncmp(worktree->path_prefix, path, len) == 0) { - err = remove_missing_files(worktree, path, fileindex, entries, - progress_cb, progress_arg, cancel_cb, cancel_arg); - if (err) - return err; - } - + free(path); return err; } @@ -909,10 +757,13 @@ got_worktree_checkout_files(struct got_worktree *workt { const struct got_error *err = NULL, *unlockerr, *checkout_err = NULL; struct got_commit_object *commit = NULL; + struct got_object_id *tree_id = NULL; struct got_tree_object *tree = NULL; char *fileindex_path = NULL, *new_fileindex_path = NULL; struct got_fileindex *fileindex = NULL; FILE *index = NULL, *new_index = NULL; + struct got_fileindex_diff_cb diff_cb; + struct diff_cb_arg arg; err = lock_worktree(worktree, LOCK_EX); if (err) @@ -956,13 +807,28 @@ got_worktree_checkout_files(struct got_worktree *workt if (err) goto done; - err = got_object_open_as_tree(&tree, repo, commit->tree_id); + err = got_object_id_by_path(&tree_id, repo, + worktree->base_commit_id, worktree->path_prefix); if (err) goto done; - checkout_err = tree_checkout(worktree, fileindex, tree, "/", repo, - progress_cb, progress_arg, cancel_cb, cancel_arg); + err = got_object_open_as_tree(&tree, repo, tree_id); + if (err) + goto done; + diff_cb.diff_old_new = diff_old_new; + diff_cb.diff_old = diff_old; + diff_cb.diff_new = diff_new; + arg.fileindex = fileindex; + arg.worktree = worktree; + arg.repo = repo; + arg.progress_cb = progress_cb; + arg.progress_arg = progress_arg; + arg.cancel_cb = cancel_cb; + arg.cancel_arg = cancel_arg; + checkout_err = got_fileindex_diff_tree(fileindex, tree, repo, + &diff_cb, &arg); + /* Try to sync the fileindex back to disk in any case. */ err = got_fileindex_write(fileindex, new_index); if (err)