diff --git a/Documentation/RelNotes/2.53.0.adoc b/Documentation/RelNotes/2.53.0.adoc index 039f613f12a821..7d9ca507f1730b 100644 --- a/Documentation/RelNotes/2.53.0.adoc +++ b/Documentation/RelNotes/2.53.0.adoc @@ -38,6 +38,13 @@ UI, Workflows & Features `--onto` option of "git replay". Test coverage of "git replay" has been improved. + * The split command in "git subtree" (in contrib/) has been taught to + deal better with rebased history. + + * The iconv library on macOS fails to correctly handle stateful + ISO/IEC 2022 encoded strings. Work it around instead of replacing + it wholesale from homebrew. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -96,6 +103,13 @@ Performance, Internal Implementation, Development Support etc. * Import newer version of "clar", unit testing framework. (merge 84071a6dea ps/clar-integers later to maint). + * The packfile_store data structure is moved from object store to odb + source. + + * The object-info API has been cleaned up. + + * Further preparation to upstream symbolic link support on Windows. + Fixes since v2.52 ----------------- @@ -259,6 +273,13 @@ Fixes since v2.52 "git stash export/import" recently introduced. (merge 02fc44a989 bc/doc-stash-import-export later to maint). + * "git fsck" used inconsistent set of refs to show a confused + warning, which has been corrected. + + * Some error messages from the http transport layer lacked the + terminating newline, which has been corrected. + (merge a8227ae8d5 kt/http-backend-errors later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 46207a54cc qj/doc-http-bad-want-response later to maint). (merge df90eccd93 kh/doc-commit-extra-references later to maint). @@ -288,3 +309,5 @@ Fixes since v2.52 (merge 6c5c7e7071 ac/t1420-use-more-direct-check later to maint). (merge 2ac93bfcbc ds/builtin-doc-update later to maint). (merge 3f051fc9c9 kh/doc-patch-id later to maint). + (merge 555c8464e5 je/doc-reset later to maint). + (merge 220f888d7e ps/t1410-cleanup later to maint). diff --git a/Documentation/fsck-msgids.adoc b/Documentation/fsck-msgids.adoc index acac9683af83f4..6a4db3a9916ea6 100644 --- a/Documentation/fsck-msgids.adoc +++ b/Documentation/fsck-msgids.adoc @@ -13,6 +13,9 @@ `badGpgsig`:: (ERROR) A tag contains a bad (truncated) signature (e.g., `gpgsig`) header. +`badHeadTarget`:: + (ERROR) The `HEAD` ref is a symref that does not refer to a branch. + `badHeaderContinuation`:: (ERROR) A continuation header (such as for `gpgsig`) is unexpectedly truncated. @@ -41,6 +44,9 @@ `badRefName`:: (ERROR) A ref has an invalid format. +`badRefOid`:: + (ERROR) A ref points to an invalid object ID. + `badReferentName`:: (ERROR) The referent name of a symref is invalid. diff --git a/Documentation/git-reset.adoc b/Documentation/git-reset.adoc index 3b9ba9aee95203..5023b5069972ca 100644 --- a/Documentation/git-reset.adoc +++ b/Documentation/git-reset.adoc @@ -3,86 +3,67 @@ git-reset(1) NAME ---- -git-reset - Reset current HEAD to the specified state +git-reset - Set `HEAD` or the index to a known state SYNOPSIS -------- [synopsis] +git reset [--soft | --mixed [-N] | --hard | --merge | --keep] [-q] [] git reset [-q] [] [--] ... git reset [-q] [--pathspec-from-file= [--pathspec-file-nul]] [] git reset (--patch | -p) [] [--] [...] -git reset [--soft | --mixed [-N] | --hard | --merge | --keep] [-q] [] DESCRIPTION ----------- -In the first three forms, copy entries from __ to the index. -In the last form, set the current branch head (`HEAD`) to __, -optionally modifying index and working tree to match. -The __/__ defaults to `HEAD` in all forms. - -`git reset [-q] [] [--] ...`:: -`git reset [-q] [--pathspec-from-file= [--pathspec-file-nul]] []`:: - These forms reset the index entries for all paths that match the - __ to their state at __. (It does not affect - the working tree or the current branch.) -+ -This means that `git reset ` is the opposite of `git add -`. This command is equivalent to -`git restore [--source=] --staged ...`. -+ -After running `git reset ` to update the index entry, you can -use linkgit:git-restore[1] to check the contents out of the index to -the working tree. Alternatively, using linkgit:git-restore[1] -and specifying a commit with `--source`, you -can copy the contents of a path out of a commit to the index and to the -working tree in one go. +`git reset` does either of the following: -`git reset (--patch | -p) [] [--] [...]`:: - Interactively select hunks in the difference between the index - and __ (defaults to `HEAD`). The chosen hunks are applied - in reverse to the index. -+ -This means that `git reset -p` is the opposite of `git add -p`, i.e. -you can use it to selectively reset hunks. See the "Interactive Mode" -section of linkgit:git-add[1] to learn how to operate the `--patch` mode. +1. `git reset [] ` changes which commit `HEAD` points to. This + makes it possible to undo various Git operations, for example commit, merge, + rebase, and pull. +2. When you specify files or directories or pass `--patch`, `git reset` updates + the staged version of the specified files. `git reset [] []`:: - This form resets the current branch head to __ and - possibly updates the index (resetting it to the tree of __) and - the working tree depending on __. Before the operation, `ORIG_HEAD` - is set to the tip of the current branch. If __ is omitted, - defaults to `--mixed`. The __ must be one of the following: + Set the current branch head (`HEAD`) to point at __. + Depending on __, also update the working directory and/or index + to match the contents of __. + __ defaults to `HEAD`. + Before the operation, `ORIG_HEAD` is set to the tip of the current branch. ++ +The __ must be one of the following (default `--mixed`): + --- -`--soft`:: - Does not touch the index file or the working tree at all (but - resets the head to __, just like all modes do). This leaves - all your changed files "Changes to be committed", as `git status` - would put it. +-- `--mixed`:: - Resets the index but not the working tree (i.e., the changed files - are preserved but not marked for commit) and reports what has not - been updated. This is the default action. + Leave your working directory unchanged. + Update the index to match the new `HEAD`, so nothing will be staged. + -If `-N` is specified, removed paths are marked as intent-to-add (see +If `-N` is specified, mark removed paths as intent-to-add (see linkgit:git-add[1]). +`--soft`:: + Leave your working tree files and the index unchanged. + For example, if you have no staged changes, you can use + `git reset --soft HEAD~5; git commit` + to combine the last 5 commits into 1 commit. This works even with + changes in the working tree, which are left untouched, but such usage + can lead to confusion. + `--hard`:: - Resets the index and working tree. Any changes to tracked files in the - working tree since __ are discarded. Any untracked files or - directories in the way of writing any tracked files are simply deleted. + Overwrite all files and directories with the version from __, + and may overwrite untracked files. Tracked files not in __ are + removed so that the working tree matches __. + Update the index to match the new `HEAD`, so nothing will be staged. `--merge`:: - Resets the index and updates the files in the working tree that are - different between __ and `HEAD`, but keeps those which are + Reset the index and update the files in the working tree that are + different between __ and `HEAD`, but keep those which are different between the index and working tree (i.e. which have changes which have not been added). + Mainly exists to reset unmerged index entries, like those left behind by + `git am -3` or `git switch -m` in certain situations. If a file that is different between __ and the index has unstaged changes, reset is aborted. -+ -In other words, `--merge` does something like a `git read-tree -u -m `, -but carries forward unmerged index entries. `--keep`:: Resets index entries and updates files in the working tree that are @@ -98,6 +79,28 @@ but carries forward unmerged index entries. the submodules' `HEAD` to be detached at that commit. -- +`git reset [-q] [] [--] ...`:: +`git reset [-q] [--pathspec-from-file= [--pathspec-file-nul]] []`:: + For all specified files or directories, set the staged version to + the version from the given commit or tree (which defaults to `HEAD`). ++ +This means that `git reset ` is the opposite of `git add +`: it unstages all changes to the specified file(s) or +directories. This is equivalent to `git restore --staged ...`. ++ +In this mode, `git reset` updates only the index (without updating the `HEAD` or +working tree files). If you want to update the files as well as the index +entries, use linkgit:git-restore[1]. + +`git reset (--patch | -p) [] [--] [...]`:: + Interactively select changes from the difference between the index + and the specified commit or tree (which defaults to `HEAD`). + The index is modified using the chosen changes. ++ +This means that `git reset -p` is the opposite of `git add -p`, i.e. +you can use it to selectively unstage changes. See the "Interactive Mode" +section of linkgit:git-add[1] to learn how to use the `--patch` option. + See "Reset, restore and revert" in linkgit:git[1] for the differences between the three commands. diff --git a/Makefile b/Makefile index b7eba509c6a0ca..8aa489f3b6812f 100644 --- a/Makefile +++ b/Makefile @@ -1687,11 +1687,21 @@ ifeq ($(uname_S),Darwin) BASIC_CFLAGS += -I/sw/include BASIC_LDFLAGS += -L/sw/lib endif + ifeq ($(shell test -d /opt/sw/lib && echo y),y) + BASIC_CFLAGS += -I/opt/sw/include + BASIC_LDFLAGS += -L/opt/sw/lib + ifeq ($(shell test -e /opt/sw/lib/libiconv.dylib && echo y),y) + HAS_GOOD_LIBICONV = Yes + endif + endif endif ifndef NO_DARWIN_PORTS ifeq ($(shell test -d /opt/local/lib && echo y),y) BASIC_CFLAGS += -I/opt/local/include BASIC_LDFLAGS += -L/opt/local/lib + ifeq ($(shell test -e /opt/local/lib/libiconv.dylib && echo y),y) + HAS_GOOD_LIBICONV = Yes + endif endif endif ifndef NO_APPLE_COMMON_CRYPTO @@ -1714,6 +1724,7 @@ endif ifdef USE_HOMEBREW_LIBICONV ifeq ($(shell test -d $(HOMEBREW_PREFIX)/opt/libiconv && echo y),y) ICONVDIR ?= $(HOMEBREW_PREFIX)/opt/libiconv + HAS_GOOD_LIBICONV = Yes endif endif endif @@ -1859,6 +1870,11 @@ ifndef NO_ICONV endif EXTLIBS += $(ICONV_LINK) -liconv endif + ifdef NEEDS_GOOD_LIBICONV + ifndef HAS_GOOD_LIBICONV + BASIC_CFLAGS += -DICONV_RESTART_RESET + endif + endif endif ifdef ICONV_OMITS_BOM BASIC_CFLAGS += -DICONV_OMITS_BOM diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 3cb725940d9e42..df8e87a81f5eee 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -487,8 +487,7 @@ static void batch_object_write(const char *obj_name, data->info.sizep = &data->size; if (pack) - ret = packed_object_info(the_repository, pack, - offset, &data->info); + ret = packed_object_info(pack, offset, &data->info); else ret = odb_read_object_info_extended(the_repository->objects, &data->oid, &data->info, diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 7849005ccb15ff..b8a7757cfda943 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -900,7 +900,7 @@ static void end_packfile(void) idx_name = keep_pack(create_index()); /* Register the packfile with core git's machinery. */ - new_p = packfile_store_load_pack(pack_data->repo->objects->packfiles, + new_p = packfile_store_load_pack(pack_data->repo->objects->sources->packfiles, idx_name, 1); if (!new_p) die(_("core Git rejected index %s"), idx_name); @@ -955,7 +955,7 @@ static int store_object( struct object_id *oidout, uintmax_t mark) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct odb_source *source; void *out, *delta; struct object_entry *e; unsigned char hdr[96]; @@ -979,7 +979,11 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { + } + + for (source = the_repository->objects->sources; source; source = source->next) { + if (!packfile_list_find_oid(packfile_store_get_packs(source->packfiles), &oid)) + continue; e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1096,10 +1100,10 @@ static void truncate_pack(struct hashfile_checkpoint *checkpoint) static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) { - struct packfile_store *packs = the_repository->objects->packfiles; size_t in_sz = 64 * 1024, out_sz = 64 * 1024; unsigned char *in_buf = xmalloc(in_sz); unsigned char *out_buf = xmalloc(out_sz); + struct odb_source *source; struct object_entry *e; struct object_id oid; unsigned long hdrlen; @@ -1179,24 +1183,29 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) if (e->idx.offset) { duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); + goto out; + } - } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { + for (source = the_repository->objects->sources; source; source = source->next) { + if (!packfile_list_find_oid(packfile_store_get_packs(source->packfiles), &oid)) + continue; e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - - } else { - e->depth = 0; - e->type = OBJ_BLOB; - e->pack_id = pack_id; - e->idx.offset = offset; - e->idx.crc32 = crc32_end(pack_file); - object_count++; - object_count_by_type[OBJ_BLOB]++; + goto out; } + e->depth = 0; + e->type = OBJ_BLOB; + e->pack_id = pack_id; + e->idx.offset = offset; + e->idx.crc32 = crc32_end(pack_file); + object_count++; + object_count_by_type[OBJ_BLOB]++; + +out: free(in_buf); free(out_buf); } diff --git a/builtin/fsck.c b/builtin/fsck.c index 4979bc795e5d61..0512f78a87fe1d 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -51,6 +51,7 @@ static int show_progress = -1; static int show_dangling = 1; static int name_objects; static int check_references = 1; +static timestamp_t now; #define ERROR_OBJECT 01 #define ERROR_REACHABLE 02 #define ERROR_PACK 04 @@ -510,6 +511,9 @@ static int fsck_handle_reflog_ent(const char *refname, timestamp_t timestamp, int tz UNUSED, const char *message UNUSED, void *cb_data UNUSED) { + if (now && timestamp > now) + return 0; + if (verbose) fprintf_ln(stderr, _("Checking reflog %s->%s"), oid_to_hex(ooid), oid_to_hex(noid)); @@ -531,8 +535,22 @@ static int fsck_handle_reflog(const char *logname, void *cb_data) return 0; } -static int fsck_handle_ref(const struct reference *ref, void *cb_data UNUSED) +struct ref_snapshot { + char *refname; + struct object_id oid; + /* TODO: Maybe supplement with latest reflog entry info too? */ +}; + +struct snapshot { + size_t nr; + size_t alloc; + struct ref_snapshot *ref; + /* TODO: Consider also snapshotting the index of each worktree. */ +}; + +static int snapshot_ref(const struct reference *ref, void *cb_data) { + struct snapshot *snap = cb_data; struct object *obj; obj = parse_object(the_repository, ref->oid); @@ -556,6 +574,20 @@ static int fsck_handle_ref(const struct reference *ref, void *cb_data UNUSED) errors_found |= ERROR_REFS; } default_refs++; + + ALLOC_GROW(snap->ref, snap->nr + 1, snap->alloc); + snap->ref[snap->nr].refname = xstrdup(ref->name); + oidcpy(&snap->ref[snap->nr].oid, ref->oid); + snap->nr++; + + return 0; +} + +static int fsck_handle_ref(const struct reference *ref, void *cb_data UNUSED) +{ + struct object *obj; + + obj = parse_object(the_repository, ref->oid); obj->flags |= USED; fsck_put_object_name(&fsck_walk_options, ref->oid, "%s", ref->name); @@ -564,18 +596,35 @@ static int fsck_handle_ref(const struct reference *ref, void *cb_data UNUSED) return 0; } -static int fsck_head_link(const char *head_ref_name, - const char **head_points_at, - struct object_id *head_oid); - -static void get_default_heads(void) +static void snapshot_refs(struct snapshot *snap, int argc, const char **argv) { struct worktree **worktrees, **p; const char *head_points_at; struct object_id head_oid; + for (int i = 0; i < argc; i++) { + const char *arg = argv[i]; + struct object_id oid; + if (!repo_get_oid(the_repository, arg, &oid)) { + struct reference ref = { + .name = arg, + .oid = &oid, + }; + + snapshot_ref(&ref, snap); + continue; + } + error(_("invalid parameter: expected sha1, got '%s'"), arg); + errors_found |= ERROR_OBJECT; + } + + if (argc) { + include_reflogs = 0; + return; + } + refs_for_each_rawref(get_main_ref_store(the_repository), - fsck_handle_ref, NULL); + snapshot_ref, snap); worktrees = get_worktrees(); for (p = worktrees; *p; p++) { @@ -583,22 +632,62 @@ static void get_default_heads(void) struct strbuf refname = STRBUF_INIT; strbuf_worktree_ref(wt, &refname, "HEAD"); - fsck_head_link(refname.buf, &head_points_at, &head_oid); + + head_points_at = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), + refname.buf, 0, &head_oid, NULL); + if (head_points_at && !is_null_oid(&head_oid)) { struct reference ref = { .name = refname.buf, .oid = &head_oid, }; - fsck_handle_ref(&ref, NULL); + snapshot_ref(&ref, snap); } strbuf_release(&refname); - if (include_reflogs) + /* + * TODO: Could use refs_for_each_reflog(...) to find + * latest entry instead of using a global 'now' for that + * purpose. + */ + } + free_worktrees(worktrees); + + /* Ignore reflogs newer than now */ + now = time(NULL); +} + + +static void free_snapshot_refs(struct snapshot *snap) +{ + for (size_t i = 0; i < snap->nr; i++) + free(snap->ref[i].refname); + free(snap->ref); +} + +static void process_refs(struct snapshot *snap) +{ + struct worktree **worktrees, **p; + + for (size_t i = 0; i < snap->nr; i++) { + struct reference ref = { + .name = snap->ref[i].refname, + .oid = &snap->ref[i].oid, + }; + fsck_handle_ref(&ref, NULL); + } + + if (include_reflogs) { + worktrees = get_worktrees(); + for (p = worktrees; *p; p++) { + struct worktree *wt = *p; + refs_for_each_reflog(get_worktree_ref_store(wt), fsck_handle_reflog, wt); + } + free_worktrees(worktrees); } - free_worktrees(worktrees); /* * Not having any default heads isn't really fatal, but @@ -713,43 +802,6 @@ static void fsck_source(struct odb_source *source) stop_progress(&progress); } -static int fsck_head_link(const char *head_ref_name, - const char **head_points_at, - struct object_id *head_oid) -{ - int null_is_error = 0; - - if (verbose) - fprintf_ln(stderr, _("Checking %s link"), head_ref_name); - - *head_points_at = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), - head_ref_name, 0, head_oid, - NULL); - if (!*head_points_at) { - errors_found |= ERROR_REFS; - return error(_("invalid %s"), head_ref_name); - } - if (!strcmp(*head_points_at, head_ref_name)) - /* detached HEAD */ - null_is_error = 1; - else if (!starts_with(*head_points_at, "refs/heads/")) { - errors_found |= ERROR_REFS; - return error(_("%s points to something strange (%s)"), - head_ref_name, *head_points_at); - } - if (is_null_oid(head_oid)) { - if (null_is_error) { - errors_found |= ERROR_REFS; - return error(_("%s: detached HEAD points at nothing"), - head_ref_name); - } - fprintf_ln(stderr, - _("notice: %s points to an unborn branch (%s)"), - head_ref_name, *head_points_at + 11); - } - return 0; -} - static int fsck_cache_tree(struct cache_tree *it, const char *index_path) { int i; @@ -963,8 +1015,12 @@ int cmd_fsck(int argc, const char *prefix, struct repository *repo UNUSED) { - int i; struct odb_source *source; + struct snapshot snap = { + .nr = 0, + .alloc = 0, + .ref = NULL + }; /* fsck knows how to handle missing promisor objects */ fetch_if_missing = 0; @@ -1000,6 +1056,17 @@ int cmd_fsck(int argc, if (check_references) fsck_refs(the_repository); + /* + * Take a snapshot of the refs before walking objects to avoid looking + * at a set of refs that may be changed by the user while we are walking + * objects. We can still walk over new objects that are added during the + * execution of fsck but won't miss any objects that were reachable. + */ + snapshot_refs(&snap, argc, argv); + + /* Ensure we get a "fresh" view of the odb */ + odb_reprepare(the_repository->objects); + if (connectivity_only) { for_each_loose_object(the_repository->objects, mark_loose_for_connectivity, NULL, 0); @@ -1041,42 +1108,18 @@ int cmd_fsck(int argc, errors_found |= ERROR_OBJECT; } - for (i = 0; i < argc; i++) { - const char *arg = argv[i]; - struct object_id oid; - if (!repo_get_oid(the_repository, arg, &oid)) { - struct object *obj = lookup_object(the_repository, - &oid); - - if (!obj || !(obj->flags & HAS_OBJ)) { - if (is_promisor_object(the_repository, &oid)) - continue; - error(_("%s: object missing"), oid_to_hex(&oid)); - errors_found |= ERROR_OBJECT; - continue; - } - - obj->flags |= USED; - fsck_put_object_name(&fsck_walk_options, &oid, - "%s", arg); - mark_object_reachable(obj); - continue; - } - error(_("invalid parameter: expected sha1, got '%s'"), arg); - errors_found |= ERROR_OBJECT; - } + /* Process the snapshotted refs and the reflogs. */ + process_refs(&snap); - /* - * If we've not been given any explicit head information, do the - * default ones from .git/refs. We also consider the index file - * in this case (ie this implies --cache). - */ - if (!argc) { - get_default_heads(); + /* If not given any explicit objects, process index files too. */ + if (!argc) keep_cache_objects = 1; - } - if (keep_cache_objects) { + /* + * TODO: Consider first walking these indexes in snapshot_refs, + * to snapshot where the index entries used to point, and then + * check those snapshotted locations here. + */ struct worktree **worktrees, **p; verify_index_checksum = 1; @@ -1149,5 +1192,6 @@ int cmd_fsck(int argc, } } + free_snapshot_refs(&snap); return errors_found; } diff --git a/builtin/grep.c b/builtin/grep.c index 53cccf2d25068c..5b8b87b1ac4d7a 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1213,8 +1213,14 @@ int cmd_grep(int argc, */ if (recurse_submodules) repo_read_gitmodules(the_repository, 1); - if (startup_info->have_repository) - packfile_store_prepare(the_repository->objects->packfiles); + + if (startup_info->have_repository) { + struct odb_source *source; + + odb_prepare_alternates(the_repository->objects); + for (source = the_repository->objects->sources; source; source = source->next) + packfile_store_prepare(source->packfiles); + } start_threads(&opt); } else { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index a7e901e49c06d4..b67fb0256cc831 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1638,7 +1638,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, hash, "idx", 1); if (do_fsck_object && startup_info->have_repository) - packfile_store_load_pack(the_repository->objects->packfiles, + packfile_store_load_pack(the_repository->objects->sources->packfiles, final_index_name, 0); if (!from_stdin) { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index ca44b7894fc064..6ee31d48c94748 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1529,49 +1529,53 @@ static int want_cruft_object_mtime(struct repository *r, const struct object_id *oid, unsigned flags, uint32_t mtime) { - struct packed_git **cache; + struct odb_source *source; - for (cache = kept_pack_cache(r, flags); *cache; cache++) { - struct packed_git *p = *cache; - off_t ofs; - uint32_t candidate_mtime; + for (source = r->objects->sources; source; source = source->next) { + struct packed_git **cache = packfile_store_get_kept_pack_cache(source->packfiles, flags); - ofs = find_pack_entry_one(oid, p); - if (!ofs) - continue; + for (; *cache; cache++) { + struct packed_git *p = *cache; + off_t ofs; + uint32_t candidate_mtime; - /* - * We have a copy of the object 'oid' in a non-cruft - * pack. We can avoid packing an additional copy - * regardless of what the existing copy's mtime is since - * it is outside of a cruft pack. - */ - if (!p->is_cruft) - return 0; - - /* - * If we have a copy of the object 'oid' in a cruft - * pack, then either read the cruft pack's mtime for - * that object, or, if that can't be loaded, assume the - * pack's mtime itself. - */ - if (!load_pack_mtimes(p)) { - uint32_t pos; - if (offset_to_pack_pos(p, ofs, &pos) < 0) + ofs = find_pack_entry_one(oid, p); + if (!ofs) continue; - candidate_mtime = nth_packed_mtime(p, pos); - } else { - candidate_mtime = p->mtime; - } - /* - * We have a surviving copy of the object in a cruft - * pack whose mtime is greater than or equal to the one - * we are considering. We can thus avoid packing an - * additional copy of that object. - */ - if (mtime <= candidate_mtime) - return 0; + /* + * We have a copy of the object 'oid' in a non-cruft + * pack. We can avoid packing an additional copy + * regardless of what the existing copy's mtime is since + * it is outside of a cruft pack. + */ + if (!p->is_cruft) + return 0; + + /* + * If we have a copy of the object 'oid' in a cruft + * pack, then either read the cruft pack's mtime for + * that object, or, if that can't be loaded, assume the + * pack's mtime itself. + */ + if (!load_pack_mtimes(p)) { + uint32_t pos; + if (offset_to_pack_pos(p, ofs, &pos) < 0) + continue; + candidate_mtime = nth_packed_mtime(p, pos); + } else { + candidate_mtime = p->mtime; + } + + /* + * We have a surviving copy of the object in a cruft + * pack whose mtime is greater than or equal to the one + * we are considering. We can thus avoid packing an + * additional copy of that object. + */ + if (mtime <= candidate_mtime) + return 0; + } } return -1; @@ -1624,9 +1628,9 @@ static int want_found_object(const struct object_id *oid, int exclude, */ unsigned flags = 0; if (ignore_packed_keep_on_disk) - flags |= ON_DISK_KEEP_PACKS; + flags |= KEPT_PACK_ON_DISK; if (ignore_packed_keep_in_core) - flags |= IN_CORE_KEEP_PACKS; + flags |= KEPT_PACK_IN_CORE; /* * If the object is in a pack that we want to ignore, *and* we @@ -1749,13 +1753,15 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) { - struct packed_git *p = e->pack; - want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); - if (!exclude && want > 0) - packfile_list_prepend(&the_repository->objects->packfiles->packs, p); - if (want != -1) - return want; + for (source = the_repository->objects->sources; source; source = source->next) { + for (e = source->packfiles->packs.head; e; e = e->next) { + struct packed_git *p = e->pack; + want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); + if (!exclude && want > 0) + packfile_list_prepend(&source->packfiles->packs, p); + if (want != -1) + return want; + } } if (uri_protocols.nr) { @@ -2411,7 +2417,7 @@ static void drop_reused_delta(struct object_entry *entry) oi.sizep = &size; oi.typep = &type; - if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) { + if (packed_object_info(IN_PACK(entry), entry->in_pack_offset, &oi) < 0) { /* * We failed to get the info from this pack for some reason; * fall back to odb_read_object_info, which may find another copy. @@ -3748,7 +3754,7 @@ static int add_object_entry_from_pack(const struct object_id *oid, struct object_info oi = OBJECT_INFO_INIT; oi.typep = &type; - if (packed_object_info(the_repository, p, ofs, &oi) < 0) { + if (packed_object_info(p, ofs, &oi) < 0) { die(_("could not get type of object %s in pack %s"), oid_to_hex(oid), p->pack_name); } else if (type == OBJ_COMMIT) { @@ -3931,7 +3937,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) * an optimization during delta selection. */ revs.no_kept_objects = 1; - revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs.keep_pack_cache_flags |= KEPT_PACK_IN_CORE; revs.blob_objects = 1; revs.tree_objects = 1; revs.tag_objects = 1; @@ -4030,7 +4036,7 @@ static void show_cruft_commit(struct commit *commit, void *data) static int cruft_include_check_obj(struct object *obj, void *data UNUSED) { - return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS); + return !has_object_kept_pack(to_pack.repo, &obj->oid, KEPT_PACK_IN_CORE); } static int cruft_include_check(struct commit *commit, void *data) diff --git a/commit-graph.c b/commit-graph.c index 00e8193adcab81..6b1f02e1792b64 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1493,7 +1493,7 @@ static int add_packed_commits(const struct object_id *oid, display_progress(ctx->progress, ++ctx->progress_done); oi.typep = &type; - if (packed_object_info(ctx->r, pack, offset, &oi) < 0) + if (packed_object_info(pack, offset, &oi) < 0) die(_("unable to get type of object %s"), oid_to_hex(oid)); if (type != OBJ_COMMIT) diff --git a/compat/mingw.c b/compat/mingw.c index f09b49ff21ddab..cf4f3c92e7a889 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1239,18 +1239,16 @@ char *mingw_getcwd(char *pointer, int len) { wchar_t cwd[MAX_PATH], wpointer[MAX_PATH]; DWORD ret = GetCurrentDirectoryW(ARRAY_SIZE(cwd), cwd); + HANDLE hnd; if (!ret || ret >= ARRAY_SIZE(cwd)) { errno = ret ? ENAMETOOLONG : err_win_to_posix(GetLastError()); return NULL; } - ret = GetLongPathNameW(cwd, wpointer, ARRAY_SIZE(wpointer)); - if (!ret && GetLastError() == ERROR_ACCESS_DENIED) { - HANDLE hnd = CreateFileW(cwd, 0, - FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, - OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); - if (hnd == INVALID_HANDLE_VALUE) - return NULL; + hnd = CreateFileW(cwd, 0, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, + OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); + if (hnd != INVALID_HANDLE_VALUE) { ret = GetFinalPathNameByHandleW(hnd, wpointer, ARRAY_SIZE(wpointer), 0); CloseHandle(hnd); if (!ret || ret >= ARRAY_SIZE(wpointer)) @@ -1259,13 +1257,11 @@ char *mingw_getcwd(char *pointer, int len) return NULL; return pointer; } - if (!ret || ret >= ARRAY_SIZE(wpointer)) - return NULL; - if (GetFileAttributesW(wpointer) == INVALID_FILE_ATTRIBUTES) { + if (GetFileAttributesW(cwd) == INVALID_FILE_ATTRIBUTES) { errno = ENOENT; return NULL; } - if (xwcstoutf(pointer, wpointer, len) < 0) + if (xwcstoutf(pointer, cwd, len) < 0) return NULL; convert_slashes(pointer); return pointer; diff --git a/config.mak.uname b/config.mak.uname index 38b35af366d5fd..3c35ae33a3c0c0 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -157,6 +157,7 @@ ifeq ($(uname_S),Darwin) endif ifeq ($(shell test "$(DARWIN_MAJOR_VERSION)" -ge 24 && echo 1),1) USE_HOMEBREW_LIBICONV = UnfortunatelyYes + NEEDS_GOOD_LIBICONV = UnfortunatelyYes endif # The builtin FSMonitor on MacOS builds upon Simple-IPC. Both require diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index 17106d1a721519..3ebe88cbeadb02 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -325,6 +325,12 @@ check_parents () { done } +# Usage: get_notree REV +get_notree () { + assert test $# = 1 + test -r "$cachedir/notree/$1" +} + # Usage: set_notree REV set_notree () { assert test $# = 1 @@ -511,6 +517,71 @@ find_existing_splits () { done || exit $? } +# Usage: find_other_splits DIR REV UNREVS... +# +# Scan history in REV UNREVS for other `git subtree split --rejoin` +# merge commits belonging to prefixes outside of DIR. These +# "other splits" don't contribute to DIR and can be ignored. +# +# If any such rejoins are found, +# +# * emit their second-parent as an UNREV, avoiding a +# potentially costly history traversal +# +# * mark the merge commit as "notree" to ignore it +find_other_splits () { + assert test $# -ge 2 + dir="${1%/}" + rev="$2" + shift 2 + debug "Looking for other splits with dir != $dir..." + + git log \ + --grep '^git-subtree-mainline:' \ + --no-patch \ + --no-show-signature \ + --format='hash: %H%nparents: %P%n%(trailers:key=git-subtree-dir,key=git-subtree-mainline,key=git-subtree-split)%nEND' \ + "$rev" ${@:+"$@"} | + while read -r key val + do + case "$key" in + hash:) + commit_hash="${val}" + commit_parents= + subtree_dir= + subtree_mainline= + subtree_split= + ;; + parents:) + commit_parents="${val}" ;; + git-subtree-dir:) + subtree_dir="${val%/}/" ;; + git-subtree-mainline:) + subtree_mainline="${val}" ;; + git-subtree-split:) + subtree_split="${val}" ;; + END) + # verify: + # * all git-subtree-* trailers are present + # * this subtree is outside of $dir + # * the first parent is the git-subtree-mainline: + # * the commit has at least two parents + if test -n "${subtree_dir}" && + test -n "${subtree_split}" && + test -n "${subtree_mainline}" && + test "${subtree_dir}" = "${subtree_dir#"${dir}/"}" && + test "${commit_parents}" != "${commit_parents#"$subtree_mainline "}" && + rev_exists "${commit_hash}^2" + then + debug "find_other_splits excluding dir=$subtree_dir merged in ${commit_hash}" + echo "^${commit_hash}^2" + set_notree "${commit_hash}" + fi + ;; + esac + done +} + # Usage: copy_commit REV TREE FLAGS_STR copy_commit () { assert test $# = 3 @@ -785,42 +856,6 @@ ensure_valid_ref_format () { die "fatal: '$1' does not look like a ref" } -# Usage: should_ignore_subtree_split_commit REV -# -# Check if REV is a commit from another subtree and should be -# ignored from processing for splits -should_ignore_subtree_split_commit () { - assert test $# = 1 - - git show \ - --no-patch \ - --no-show-signature \ - --format='%(trailers:key=git-subtree-dir,key=git-subtree-mainline)' \ - "$1" | - ( - have_mainline= - subtree_dir= - - while read -r trailer val - do - case "$trailer" in - git-subtree-dir:) - subtree_dir="${val%/}" ;; - git-subtree-mainline:) - have_mainline=y ;; - esac - done - - if test -n "${subtree_dir}" && - test -z "${have_mainline}" && - test "${subtree_dir}" != "$arg_prefix" - then - return 0 - fi - return 1 - ) -} - # Usage: process_split_commit REV PARENTS process_split_commit () { assert test $# = 2 @@ -994,31 +1029,39 @@ cmd_split () { fi unrevs="$(find_existing_splits "$dir" "$rev" "$repository")" || exit $? + (find_other_splits >"$cachedir/prune" "$dir" "$rev" $unrevs) || exit $? # We can't restrict rev-list to only $dir here, because some of our # parents have the $dir contents the root, and those won't match. # (and rev-list --follow doesn't seem to solve this) - grl='git rev-list --topo-order --reverse --parents $rev $unrevs' - revmax=$(eval "$grl" | wc -l) + revmax="$(git rev-list \ + <"$cachedir/prune" \ + --topo-order \ + --reverse \ + --parents \ + --stdin \ + --count \ + "$rev" \ + $unrevs + )" revcount=0 createcount=0 extracount=0 - eval "$grl" | + git rev-list \ + <"$cachedir/prune" \ + --topo-order \ + --reverse \ + --parents \ + --stdin \ + "$rev" \ + $unrevs | while read rev parents do - if should_ignore_subtree_split_commit "$rev" + if get_notree "$rev" then continue fi - parsedparents='' - for parent in $parents - do - if ! should_ignore_subtree_split_commit "$parent" - then - parsedparents="$parsedparents$parent " - fi - done - process_split_commit "$rev" "$parsedparents" + process_split_commit "$rev" "$parents" done || exit $? latest_new=$(cache_get latest_new) || exit $? diff --git a/contrib/subtree/t/t7900-subtree.sh b/contrib/subtree/t/t7900-subtree.sh index 316dc5269e2b6f..4db3a6eff37c4d 100755 --- a/contrib/subtree/t/t7900-subtree.sh +++ b/contrib/subtree/t/t7900-subtree.sh @@ -411,8 +411,9 @@ test_expect_success 'split sub dir/ with --rejoin' ' git fetch ./"sub proj" HEAD && git subtree merge --prefix="sub dir" FETCH_HEAD && split_hash=$(git subtree split --prefix="sub dir" --annotate="*") && - git subtree split --prefix="sub dir" --annotate="*" --rejoin && - test "$(last_commit_subject)" = "Split '\''sub dir/'\'' into commit '\''$split_hash'\''" + git subtree split --prefix="sub dir" --annotate="*" -b spl --rejoin && + test "$(last_commit_subject)" = "Split '\''sub dir/'\'' into commit '\''$split_hash'\''" && + test "$(git rev-list --count spl)" -eq 5 ) ' @@ -442,18 +443,25 @@ test_expect_success 'split with multiple subtrees' ' git -C "$test_count" subtree add --prefix=subADir FETCH_HEAD && git -C "$test_count" fetch ./subB HEAD && git -C "$test_count" subtree add --prefix=subBDir FETCH_HEAD && + test "$(git -C "$test_count" rev-list --count main)" -eq 7 && test_create_commit "$test_count" subADir/main-subA1 && test_create_commit "$test_count" subBDir/main-subB1 && git -C "$test_count" subtree split --prefix=subADir \ - --squash --rejoin -m "Sub A Split 1" && + --squash --rejoin -m "Sub A Split 1" -b a1 && + test "$(git -C "$test_count" rev-list --count main..a1)" -eq 1 && git -C "$test_count" subtree split --prefix=subBDir \ - --squash --rejoin -m "Sub B Split 1" && + --squash --rejoin -m "Sub B Split 1" -b b1 && + test "$(git -C "$test_count" rev-list --count main..b1)" -eq 1 && test_create_commit "$test_count" subADir/main-subA2 && test_create_commit "$test_count" subBDir/main-subB2 && git -C "$test_count" subtree split --prefix=subADir \ - --squash --rejoin -m "Sub A Split 2" && + --squash --rejoin -m "Sub A Split 2" -b a2 && + test "$(git -C "$test_count" rev-list --count main..a2)" -eq 2 && + test "$(git -C "$test_count" rev-list --count a1..a2)" -eq 1 && test "$(git -C "$test_count" subtree split --prefix=subBDir \ - --squash --rejoin -d -m "Sub B Split 1" 2>&1 | grep -w "\[1\]")" = "" + --squash --rejoin -d -m "Sub B Split 1" -b b2 2>&1 | grep -w "\[1\]")" = "" && + test "$(git -C "$test_count" rev-list --count main..b2)" -eq 2 && + test "$(git -C "$test_count" rev-list --count b1..b2)" -eq 1 ' # When subtree split-ing a directory that has other subtree @@ -477,6 +485,7 @@ do test_path_is_file subA/file1.t && test_path_is_file subA/subB/file2.t && git subtree split --prefix=subA --branch=bsplit && + test "$(git rev-list --count bsplit)" -eq 2 && git checkout bsplit && test_path_is_file file1.t && test_path_is_file subB/file2.t && @@ -489,6 +498,7 @@ do --prefix=subA/subB mksubtree && test_path_is_file subA/subB/file3.t && git subtree split --prefix=subA --branch=bsplit && + test "$(git rev-list --count bsplit)" -eq 3 && git checkout bsplit && test_path_is_file file1.t && test_path_is_file subB/file2.t && @@ -497,6 +507,67 @@ do ' done +# Usually, +# +# git subtree merge -P subA --squash f00... +# +# makes two commits, in this order: +# +# 1. Squashed 'subA/' content from commit f00... +# 2. Merge commit (1) as 'subA' +# +# Commit 1 updates the subtree but does *not* rewrite paths. +# Commit 2 rewrites all trees to start with `subA/` +# +# Commit 1 either has no parents or depends only on other +# "Squashed 'subA/' content" commits. +# +# For merge without --squash, subtree produces just one commit: +# a merge commit with git-subtree trailers. +# +# In either case, if the user rebases these commits, they will +# still have the git-subtree-* trailers… but will NOT have +# the layout described above. +# +# Test that subsequent `git subtree split` are not confused by this. +test_expect_success 'split with rebased subtree commit' ' + subtree_test_create_repo "$test_count" && + ( + cd "$test_count" && + test_commit file0 && + test_create_subtree_add \ + . mksubtree subA file1 --squash && + test_path_is_file subA/file1.t && + mkdir subB && + test_commit subB/bfile && + git commit --amend -F - <<'EOF' && +Squashed '\''subB/'\'' content from commit '\''badf00da911bbe895347b4b236f5461d55dc9877'\'' + +Simulate a cherry-picked or rebased subtree commit. + +git-subtree-dir: subB +git-subtree-split: badf00da911bbe895347b4b236f5461d55dc9877 +EOF + test_commit subA/file2 && + test_commit subB/bfile2 && + git commit --amend -F - <<'EOF' && +Split '\''subB/'\'' into commit '\''badf00da911bbe895347b4b236f5461d55dc9877'\'' + +Simulate a cherry-picked or rebased subtree commit. + +git-subtree-dir: subB +git-subtree-mainline: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +git-subtree-split: badf00da911bbe895347b4b236f5461d55dc9877 +EOF + git subtree split --prefix=subA --branch=bsplit && + git checkout bsplit && + test_path_is_file file1.t && + test_path_is_file file2.t && + test "$(last_commit_subject)" = "subA/file2" && + test "$(git rev-list --count bsplit)" -eq 2 + ) +' + test_expect_success 'split sub dir/ with --rejoin from scratch' ' subtree_test_create_repo "$test_count" && test_create_commit "$test_count" main1 && diff --git a/environment.c b/environment.c index a770b5921d9546..b65b85a01f18cf 100644 --- a/environment.c +++ b/environment.c @@ -324,8 +324,8 @@ static enum fsync_component parse_fsync_components(const char *var, const char * return (current & ~negative) | positive; } -static int git_default_core_config(const char *var, const char *value, - const struct config_context *ctx, void *cb) +int git_default_core_config(const char *var, const char *value, + const struct config_context *ctx, void *cb) { /* This needs a better name */ if (!strcmp(var, "core.filemode")) { diff --git a/environment.h b/environment.h index 51898c99cd1e45..e61f843fdbb637 100644 --- a/environment.h +++ b/environment.h @@ -106,6 +106,8 @@ const char *strip_namespace(const char *namespaced_ref); int git_default_config(const char *, const char *, const struct config_context *, void *); +int git_default_core_config(const char *var, const char *value, + const struct config_context *ctx, void *cb); /* * TODO: All the below state either explicitly or implicitly relies on diff --git a/fsck.c b/fsck.c index fae18d8561e067..813d927d57d4b0 100644 --- a/fsck.c +++ b/fsck.c @@ -1310,11 +1310,6 @@ int fsck_refs_error_function(struct fsck_options *options UNUSED, strbuf_addstr(&sb, report->path); - if (report->oid) - strbuf_addf(&sb, " -> (%s)", oid_to_hex(report->oid)); - else if (report->referent) - strbuf_addf(&sb, " -> (%s)", report->referent); - if (msg_type == FSCK_WARN) warning("%s: %s", sb.buf, message); else diff --git a/fsck.h b/fsck.h index 336917c0451aac..65ecbb7fe194ab 100644 --- a/fsck.h +++ b/fsck.h @@ -30,6 +30,7 @@ enum fsck_msg_type { FUNC(BAD_DATE_OVERFLOW, ERROR) \ FUNC(BAD_EMAIL, ERROR) \ FUNC(BAD_GPGSIG, ERROR) \ + FUNC(BAD_HEAD_TARGET, ERROR) \ FUNC(BAD_NAME, ERROR) \ FUNC(BAD_OBJECT_SHA1, ERROR) \ FUNC(BAD_PACKED_REF_ENTRY, ERROR) \ @@ -39,6 +40,7 @@ enum fsck_msg_type { FUNC(BAD_REF_CONTENT, ERROR) \ FUNC(BAD_REF_FILETYPE, ERROR) \ FUNC(BAD_REF_NAME, ERROR) \ + FUNC(BAD_REF_OID, ERROR) \ FUNC(BAD_TIMEZONE, ERROR) \ FUNC(BAD_TREE, ERROR) \ FUNC(BAD_TREE_SHA1, ERROR) \ @@ -162,8 +164,6 @@ struct fsck_object_report { struct fsck_ref_report { const char *path; - const struct object_id *oid; - const char *referent; }; struct fsck_options { diff --git a/http-backend.c b/http-backend.c index 24f0dc119ab8e6..0122146df607b2 100644 --- a/http-backend.c +++ b/http-backend.c @@ -144,8 +144,10 @@ static NORETURN void not_found(struct strbuf *hdr, const char *err, ...) end_headers(hdr); va_start(params, err); - if (err && *err) + if (err && *err) { vfprintf(stderr, err, params); + putc('\n', stderr); + } va_end(params); exit(0); } @@ -160,8 +162,10 @@ static NORETURN void forbidden(struct strbuf *hdr, const char *err, ...) end_headers(hdr); va_start(params, err); - if (err && *err) + if (err && *err) { vfprintf(stderr, err, params); + putc('\n', stderr); + } va_end(params); exit(0); } diff --git a/http.c b/http.c index 41f850db16d19f..7815f144de3d69 100644 --- a/http.c +++ b/http.c @@ -2544,7 +2544,7 @@ void http_install_packfile(struct packed_git *p, struct packfile_list *list_to_remove_from) { packfile_list_remove(list_to_remove_from, p); - packfile_store_add_pack(the_repository->objects->packfiles, p); + packfile_store_add_pack(the_repository->objects->sources->packfiles, p); } struct http_pack_request *new_http_pack_request( diff --git a/lockfile.c b/lockfile.c index 1d5ed016828746..67082a9caaeb18 100644 --- a/lockfile.c +++ b/lockfile.c @@ -19,14 +19,14 @@ static void trim_last_path_component(struct strbuf *path) int i = path->len; /* back up past trailing slashes, if any */ - while (i && path->buf[i - 1] == '/') + while (i && is_dir_sep(path->buf[i - 1])) i--; /* * then go backwards until a slash, or the beginning of the * string */ - while (i && path->buf[i - 1] != '/') + while (i && !is_dir_sep(path->buf[i - 1])) i--; strbuf_setlen(path, i); diff --git a/midx.c b/midx.c index b681b18fc19a40..a75ea99a0d4bb0 100644 --- a/midx.c +++ b/midx.c @@ -95,8 +95,8 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { - packfile_store_prepare(source->odb->packfiles); - return source->midx; + packfile_store_prepare(source->packfiles); + return source->packfiles->midx; } static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, @@ -447,7 +447,6 @@ static uint32_t midx_for_pack(struct multi_pack_index **_m, int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) { - struct repository *r = m->source->odb->repo; struct strbuf pack_name = STRBUF_INIT; struct packed_git *p; @@ -460,7 +459,7 @@ int prepare_midx_pack(struct multi_pack_index *m, strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, m->pack_names[pack_int_id]); - p = packfile_store_load_pack(r->objects->packfiles, + p = packfile_store_load_pack(m->source->packfiles, pack_name.buf, m->source->local); strbuf_release(&pack_name); @@ -710,12 +709,12 @@ int prepare_multi_pack_index_one(struct odb_source *source) if (!r->settings.core_multi_pack_index) return 0; - if (source->midx) + if (source->packfiles->midx) return 1; - source->midx = load_multi_pack_index(source); + source->packfiles->midx = load_multi_pack_index(source); - return !!source->midx; + return !!source->packfiles->midx; } int midx_checksum_valid(struct multi_pack_index *m) @@ -804,9 +803,9 @@ void clear_midx_file(struct repository *r) struct odb_source *source; for (source = r->objects->sources; source; source = source->next) { - if (source->midx) - close_midx(source->midx); - source->midx = NULL; + if (source->packfiles->midx) + close_midx(source->packfiles->midx); + source->packfiles->midx = NULL; } } diff --git a/object-file.c b/object-file.c index 6280e42f3412c3..e7e4c3348f9c1b 100644 --- a/object-file.c +++ b/object-file.c @@ -416,19 +416,16 @@ int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, struct object_info *oi, int flags) { - int status = 0; + int ret; int fd; unsigned long mapsize; const char *path; - void *map; - git_zstream stream; + void *map = NULL; + git_zstream stream, *stream_to_end = NULL; char hdr[MAX_HEADER_LEN]; unsigned long size_scratch; enum object_type type_scratch; - if (oi && oi->delta_base_oid) - oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); - /* * If we don't care about type or size, then we don't * need to look inside the object at all. Note that we @@ -439,71 +436,101 @@ int odb_source_loose_read_object_info(struct odb_source *source, */ if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) { struct stat st; - if ((!oi || !oi->disk_sizep) && (flags & OBJECT_INFO_QUICK)) - return quick_has_loose(source->loose, oid) ? 0 : -1; - if (stat_loose_object(source->loose, oid, &st, &path) < 0) - return -1; + + if ((!oi || !oi->disk_sizep) && (flags & OBJECT_INFO_QUICK)) { + ret = quick_has_loose(source->loose, oid) ? 0 : -1; + goto out; + } + + if (stat_loose_object(source->loose, oid, &st, &path) < 0) { + ret = -1; + goto out; + } + if (oi && oi->disk_sizep) *oi->disk_sizep = st.st_size; - return 0; + + ret = 0; + goto out; } fd = open_loose_object(source->loose, oid, &path); if (fd < 0) { if (errno != ENOENT) error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); - return -1; + ret = -1; + goto out; } - map = map_fd(fd, path, &mapsize); - if (!map) - return -1; - if (!oi->sizep) - oi->sizep = &size_scratch; - if (!oi->typep) - oi->typep = &type_scratch; + map = map_fd(fd, path, &mapsize); + if (!map) { + ret = -1; + goto out; + } if (oi->disk_sizep) *oi->disk_sizep = mapsize; + stream_to_end = &stream; + switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) { case ULHR_OK: - if (parse_loose_header(hdr, oi) < 0) - status = error(_("unable to parse %s header"), oid_to_hex(oid)); - else if (*oi->typep < 0) + if (!oi->sizep) + oi->sizep = &size_scratch; + if (!oi->typep) + oi->typep = &type_scratch; + + if (parse_loose_header(hdr, oi) < 0) { + ret = error(_("unable to parse %s header"), oid_to_hex(oid)); + goto corrupt; + } + + if (*oi->typep < 0) die(_("invalid object type")); - if (!oi->contentp) - break; - *oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid); - if (*oi->contentp) - goto cleanup; + if (oi->contentp) { + *oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid); + if (!*oi->contentp) { + ret = -1; + goto corrupt; + } + } - status = -1; break; case ULHR_BAD: - status = error(_("unable to unpack %s header"), - oid_to_hex(oid)); - break; + ret = error(_("unable to unpack %s header"), + oid_to_hex(oid)); + goto corrupt; case ULHR_TOO_LONG: - status = error(_("header for %s too long, exceeds %d bytes"), - oid_to_hex(oid), MAX_HEADER_LEN); - break; + ret = error(_("header for %s too long, exceeds %d bytes"), + oid_to_hex(oid), MAX_HEADER_LEN); + goto corrupt; } - if (status && (flags & OBJECT_INFO_DIE_IF_CORRUPT)) + ret = 0; + +corrupt: + if (ret && (flags & OBJECT_INFO_DIE_IF_CORRUPT)) die(_("loose object %s (stored in %s) is corrupt"), oid_to_hex(oid), path); -cleanup: - git_inflate_end(&stream); - munmap(map, mapsize); - if (oi->sizep == &size_scratch) - oi->sizep = NULL; - if (oi->typep == &type_scratch) - oi->typep = NULL; - oi->whence = OI_LOOSE; - return status; +out: + if (stream_to_end) + git_inflate_end(stream_to_end); + if (map) + munmap(map, mapsize); + if (oi) { + if (oi->sizep == &size_scratch) + oi->sizep = NULL; + if (oi->typep == &type_scratch) + oi->typep = NULL; + if (oi->delta_base_oid) + oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); + if (!ret) + oi->whence = OI_LOOSE; + } + + return ret; } static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c, diff --git a/odb.c b/odb.c index 838aa2e53a08d7..ac70b6a099f588 100644 --- a/odb.c +++ b/odb.c @@ -229,6 +229,7 @@ static struct odb_source *odb_source_new(struct object_database *odb, source->local = local; source->path = xstrdup(path); source->loose = odb_source_loose_new(source); + source->packfiles = packfile_store_new(source); return source; } @@ -376,6 +377,7 @@ static void odb_source_free(struct odb_source *source) { free(source->path); odb_source_loose_free(source->loose); + packfile_store_free(source->packfiles); free(source); } @@ -710,19 +712,19 @@ static int do_oid_object_info_extended(struct object_database *odb, while (1) { struct odb_source *source; - if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) - return 0; - /* Most likely it's a loose object. */ - for (source = odb->sources; source; source = source->next) - if (!odb_source_loose_read_object_info(source, real, oi, flags)) + for (source = odb->sources; source; source = source->next) { + if (!packfile_store_read_object_info(source->packfiles, real, oi, flags) || + !odb_source_loose_read_object_info(source, real, oi, flags)) return 0; + } /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); - if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) - return 0; + for (source = odb->sources; source; source = source->next) + if (!packfile_store_read_object_info(source->packfiles, real, oi, flags)) + return 0; } /* @@ -981,13 +983,14 @@ int odb_freshen_object(struct object_database *odb, { struct odb_source *source; - if (packfile_store_freshen_object(odb->packfiles, oid)) - return 1; - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) + for (source = odb->sources; source; source = source->next) { + if (packfile_store_freshen_object(source->packfiles, oid)) + return 1; + if (odb_source_loose_freshen_object(source, oid)) return 1; + } return 0; } @@ -1062,7 +1065,6 @@ struct object_database *odb_new(struct repository *repo, memset(o, 0, sizeof(*o)); o->repo = repo; - o->packfiles = packfile_store_new(o); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); @@ -1082,15 +1084,8 @@ struct object_database *odb_new(struct repository *repo, void odb_close(struct object_database *o) { struct odb_source *source; - - packfile_store_close(o->packfiles); - - for (source = o->sources; source; source = source->next) { - if (source->midx) - close_midx(source->midx); - source->midx = NULL; - } - + for (source = o->sources; source; source = source->next) + packfile_store_close(source->packfiles); close_commit_graph(o); } @@ -1124,7 +1119,6 @@ void odb_free(struct object_database *o) free((char *) o->cached_objects[i].value.buf); free(o->cached_objects); - packfile_store_free(o->packfiles); string_list_clear(&o->submodule_source_paths, 0); chdir_notify_unregister(NULL, odb_update_commondir, o); @@ -1147,13 +1141,13 @@ void odb_reprepare(struct object_database *o) o->loaded_alternates = 0; odb_prepare_alternates(o); - for (source = o->sources; source; source = source->next) + for (source = o->sources; source; source = source->next) { odb_source_loose_reprepare(source); + packfile_store_reprepare(source->packfiles); + } o->approximate_object_count_valid = 0; - packfile_store_reprepare(o->packfiles); - obj_read_unlock(); } diff --git a/odb.h b/odb.h index 014cd9585a2f6e..bab07755f4ec95 100644 --- a/odb.h +++ b/odb.h @@ -51,12 +51,8 @@ struct odb_source { /* Private state for loose objects. */ struct odb_source_loose *loose; - /* - * private data - * - * should only be accessed directly by packfile.c and midx.c - */ - struct multi_pack_index *midx; + /* Should only be accessed directly by packfile.c and midx.c. */ + struct packfile_store *packfiles; /* * Figure out whether this is the local source of the owning @@ -128,9 +124,6 @@ struct object_database { struct commit_graph *commit_graph; unsigned commit_graph_attempted : 1; /* if loading has been attempted */ - /* Should only be accessed directly by packfile.c and midx.c. */ - struct packfile_store *packfiles; - /* * This is meant to hold a *small* number of objects that you would * want odb_read_object() to be able to return, but yet you do not want @@ -330,7 +323,6 @@ struct object_info { OI_CACHED, OI_LOOSE, OI_PACKED, - OI_DBCACHED } whence; union { /* @@ -344,7 +336,12 @@ struct object_info { struct { struct packed_git *pack; off_t offset; - unsigned int is_delta; + enum packed_object_type { + PACKED_OBJECT_TYPE_UNKNOWN, + PACKED_OBJECT_TYPE_FULL, + PACKED_OBJECT_TYPE_OFS_DELTA, + PACKED_OBJECT_TYPE_REF_DELTA, + } type; } packed; } u; }; diff --git a/odb/streaming.c b/odb/streaming.c index 745cd486fbb33d..4a4474f891a07f 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -185,13 +185,12 @@ static int istream_source(struct odb_read_stream **out, { struct odb_source *source; - if (!packfile_store_read_object_stream(out, odb->packfiles, oid)) - return 0; - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) - if (!odb_source_loose_read_object_stream(out, source, oid)) + for (source = odb->sources; source; source = source->next) { + if (!packfile_store_read_object_stream(out, source->packfiles, oid) || + !odb_source_loose_read_object_stream(out, source, oid)) return 0; + } return open_istream_incore(out, odb, oid); } diff --git a/pack-bitmap.c b/pack-bitmap.c index 8ca79725b1d438..972203f12b6d9b 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1876,8 +1876,7 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git, ofs = pack_pos_to_offset(pack, pos); } - if (packed_object_info(bitmap_repo(bitmap_git), pack, ofs, - &oi) < 0) { + if (packed_object_info(pack, ofs, &oi) < 0) { struct object_id oid; nth_bitmap_object_oid(bitmap_git, &oid, pack_pos_to_index(pack, pos)); diff --git a/packfile.c b/packfile.c index 23a7f8a1917619..402c3b5dc73131 100644 --- a/packfile.c +++ b/packfile.c @@ -355,16 +355,17 @@ static void scan_windows(struct packed_git *p, } } -static int unuse_one_window(struct packed_git *current) +static int unuse_one_window(struct object_database *odb) { + struct odb_source *source; struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; - if (current) - scan_windows(current, &lru_p, &lru_w, &lru_l); - for (e = current->repo->objects->packfiles->packs.head; e; e = e->next) - scan_windows(e->pack, &lru_p, &lru_w, &lru_l); + for (source = odb->sources; source; source = source->next) + for (e = source->packfiles->packs.head; e; e = e->next) + scan_windows(e->pack, &lru_p, &lru_w, &lru_l); + if (lru_p) { munmap(lru_w->base, lru_w->len); pack_mapped -= lru_w->len; @@ -529,15 +530,18 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc static int close_one_pack(struct repository *r) { + struct odb_source *source; struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (e = r->objects->packfiles->packs.head; e; e = e->next) { - if (e->pack->pack_fd == -1) - continue; - find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); + for (source = r->objects->sources; source; source = source->next) { + for (e = source->packfiles->packs.head; e; e = e->next) { + if (e->pack->pack_fd == -1) + continue; + find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); + } } if (lru_p) @@ -740,8 +744,8 @@ unsigned char *use_pack(struct packed_git *p, win->len = (size_t)len; pack_mapped += win->len; - while (settings->packed_git_limit < pack_mapped - && unuse_one_window(p)) + while (settings->packed_git_limit < pack_mapped && + unuse_one_window(p->repo->objects)) ; /* nothing */ win->base = xmmap_gently(NULL, win->len, PROT_READ, MAP_PRIVATE, @@ -876,7 +880,7 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, p = strmap_get(&store->packs_by_path, key.buf); if (!p) { - p = add_packed_git(store->odb->repo, idx_path, + p = add_packed_git(store->source->odb->repo, idx_path, strlen(idx_path), local); if (p) packfile_store_add_pack(store, p); @@ -975,10 +979,8 @@ void for_each_file_in_pack_dir(const char *objdir, } struct prepare_pack_data { - struct repository *r; + struct odb_source *source; struct string_list *garbage; - int local; - struct multi_pack_index *m; }; static void prepare_pack(const char *full_name, size_t full_name_len, @@ -988,10 +990,11 @@ static void prepare_pack(const char *full_name, size_t full_name_len, size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && - !(data->m && midx_contains_pack(data->m, file_name))) { + !(data->source->packfiles->midx && + midx_contains_pack(data->source->packfiles->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); - packfile_store_load_pack(data->r->objects->packfiles, - trimmed_path, data->local); + packfile_store_load_pack(data->source->packfiles, + trimmed_path, data->source->local); free(trimmed_path); } @@ -1020,10 +1023,8 @@ static void prepare_packed_git_one(struct odb_source *source) { struct string_list garbage = STRING_LIST_INIT_DUP; struct prepare_pack_data data = { - .m = source->midx, - .r = source->odb->repo, + .source = source, .garbage = &garbage, - .local = source->local, }; for_each_file_in_pack_dir(source->path, prepare_pack, &data); @@ -1063,16 +1064,11 @@ static int sort_pack(const struct packfile_list_entry *a, void packfile_store_prepare(struct packfile_store *store) { - struct odb_source *source; - if (store->initialized) return; - odb_prepare_alternates(store->odb); - for (source = store->odb->sources; source; source = source->next) { - prepare_multi_pack_index_one(source); - prepare_packed_git_one(source); - } + prepare_multi_pack_index_one(store->source); + prepare_packed_git_one(store->source); sort_packs(&store->packs.head, sort_pack); for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) @@ -1092,10 +1088,8 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor { packfile_store_prepare(store); - for (struct odb_source *source = store->odb->sources; source; source = source->next) { - struct multi_pack_index *m = source->midx; - if (!m) - continue; + if (store->midx) { + struct multi_pack_index *m = store->midx; for (uint32_t i = 0; i < m->num_packs + m->num_packs_in_base; i++) prepare_midx_pack(m, i); } @@ -1250,11 +1244,15 @@ void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) const struct packed_git *has_packed_and_bad(struct repository *r, const struct object_id *oid) { - struct packfile_list_entry *e; + struct odb_source *source; + + for (source = r->objects->sources; source; source = source->next) { + struct packfile_list_entry *e; + for (e = source->packfiles->packs.head; e; e = e->next) + if (oidset_contains(&e->pack->bad_objects, oid)) + return e->pack; + } - for (e = r->objects->packfiles->packs.head; e; e = e->next) - if (oidset_contains(&e->pack->bad_objects, oid)) - return e->pack; return NULL; } @@ -1580,24 +1578,25 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset, hashmap_add(&delta_base_cache, &ent->ent); } -int packed_object_info(struct repository *r, struct packed_git *p, +int packed_object_info(struct packed_git *p, off_t obj_offset, struct object_info *oi) { struct pack_window *w_curs = NULL; unsigned long size; off_t curpos = obj_offset; - enum object_type type; + enum object_type type = OBJ_NONE; + int ret; /* * We always get the representation type, but only convert it to * a "real" type later if the caller is interested. */ if (oi->contentp) { - *oi->contentp = cache_or_unpack_entry(r, p, obj_offset, oi->sizep, + *oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset, oi->sizep, &type); if (!*oi->contentp) type = OBJ_BAD; - } else { + } else if (oi->sizep || oi->typep || oi->delta_base_oid) { type = unpack_object_header(p, &w_curs, &curpos, &size); } @@ -1607,12 +1606,12 @@ int packed_object_info(struct repository *r, struct packed_git *p, off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos, type, obj_offset); if (!base_offset) { - type = OBJ_BAD; + ret = -1; goto out; } *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos); if (*oi->sizep == 0) { - type = OBJ_BAD; + ret = -1; goto out; } } else { @@ -1625,7 +1624,7 @@ int packed_object_info(struct repository *r, struct packed_git *p, if (offset_to_pack_pos(p, obj_offset, &pos) < 0) { error("could not find object at offset %"PRIuMAX" " "in pack %s", (uintmax_t)obj_offset, p->pack_name); - type = OBJ_BAD; + ret = -1; goto out; } @@ -1634,12 +1633,12 @@ int packed_object_info(struct repository *r, struct packed_git *p, if (oi->typep) { enum object_type ptot; - ptot = packed_to_object_type(r, p, obj_offset, + ptot = packed_to_object_type(p->repo, p, obj_offset, type, &w_curs, curpos); if (oi->typep) *oi->typep = ptot; if (ptot < 0) { - type = OBJ_BAD; + ret = -1; goto out; } } @@ -1649,19 +1648,37 @@ int packed_object_info(struct repository *r, struct packed_git *p, if (get_delta_base_oid(p, &w_curs, curpos, oi->delta_base_oid, type, obj_offset) < 0) { - type = OBJ_BAD; + ret = -1; goto out; } } else oidclr(oi->delta_base_oid, p->repo->hash_algo); } - oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED : - OI_PACKED; + oi->whence = OI_PACKED; + oi->u.packed.offset = obj_offset; + oi->u.packed.pack = p; + + switch (type) { + case OBJ_NONE: + oi->u.packed.type = PACKED_OBJECT_TYPE_UNKNOWN; + break; + case OBJ_REF_DELTA: + oi->u.packed.type = PACKED_OBJECT_TYPE_REF_DELTA; + break; + case OBJ_OFS_DELTA: + oi->u.packed.type = PACKED_OBJECT_TYPE_OFS_DELTA; + break; + default: + oi->u.packed.type = PACKED_OBJECT_TYPE_FULL; + break; + } + + ret = 0; out: unuse_pack(&w_curs); - return type; + return ret; } static void *unpack_compressed_entry(struct packed_git *p, @@ -2090,30 +2107,26 @@ static int fill_pack_entry(const struct object_id *oid, return 1; } -static int find_pack_entry(struct repository *r, +static int find_pack_entry(struct packfile_store *store, const struct object_id *oid, struct pack_entry *e) { struct packfile_list_entry *l; - packfile_store_prepare(r->objects->packfiles); - - for (struct odb_source *source = r->objects->sources; source; source = source->next) - if (source->midx && fill_midx_entry(source->midx, oid, e)) - return 1; - - if (!r->objects->packfiles->packs.head) - return 0; + packfile_store_prepare(store); + if (store->midx && fill_midx_entry(store->midx, oid, e)) + return 1; - for (l = r->objects->packfiles->packs.head; l; l = l->next) { + for (l = store->packs.head; l; l = l->next) { struct packed_git *p = l->pack; if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - if (!r->objects->packfiles->skip_mru_updates) - packfile_list_prepend(&r->objects->packfiles->packs, p); + if (!store->skip_mru_updates) + packfile_list_prepend(&store->packs, p); return 1; } } + return 0; } @@ -2121,7 +2134,7 @@ int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid) { struct pack_entry e; - if (!find_pack_entry(store->odb->repo, oid, &e)) + if (!find_pack_entry(store, oid, &e)) return 0; if (e.p->is_cruft) return 0; @@ -2139,9 +2152,9 @@ int packfile_store_read_object_info(struct packfile_store *store, unsigned flags UNUSED) { struct pack_entry e; - int rtype; + int ret; - if (!find_pack_entry(store->odb->repo, oid, &e)) + if (!find_pack_entry(store, oid, &e)) return 1; /* @@ -2151,41 +2164,35 @@ int packfile_store_read_object_info(struct packfile_store *store, if (!oi) return 0; - rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi); - if (rtype < 0) { + ret = packed_object_info(e.p, e.offset, oi); + if (ret < 0) { mark_bad_packed_object(e.p, oid); return -1; } - if (oi->whence == OI_PACKED) { - oi->u.packed.offset = e.offset; - oi->u.packed.pack = e.p; - oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || - rtype == OBJ_OFS_DELTA); - } - return 0; } -static void maybe_invalidate_kept_pack_cache(struct repository *r, +static void maybe_invalidate_kept_pack_cache(struct packfile_store *store, unsigned flags) { - if (!r->objects->packfiles->kept_cache.packs) + if (!store->kept_cache.packs) return; - if (r->objects->packfiles->kept_cache.flags == flags) + if (store->kept_cache.flags == flags) return; - FREE_AND_NULL(r->objects->packfiles->kept_cache.packs); - r->objects->packfiles->kept_cache.flags = 0; + FREE_AND_NULL(store->kept_cache.packs); + store->kept_cache.flags = 0; } -struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) +struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store, + unsigned flags) { - maybe_invalidate_kept_pack_cache(r, flags); + maybe_invalidate_kept_pack_cache(store, flags); - if (!r->objects->packfiles->kept_cache.packs) { + if (!store->kept_cache.packs) { struct packed_git **packs = NULL; + struct packfile_list_entry *e; size_t nr = 0, alloc = 0; - struct packed_git *p; /* * We want "all" packs here, because we need to cover ones that @@ -2195,9 +2202,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) * covers, one kept and one not kept, but the midx returns only * the non-kept version. */ - repo_for_each_pack(r, p) { - if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) || - (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) { + for (e = packfile_store_get_packs(store); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((p->pack_keep && (flags & KEPT_PACK_ON_DISK)) || + (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE))) { ALLOC_GROW(packs, nr + 1, alloc); packs[nr++] = p; } @@ -2205,40 +2214,47 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) ALLOC_GROW(packs, nr + 1, alloc); packs[nr] = NULL; - r->objects->packfiles->kept_cache.packs = packs; - r->objects->packfiles->kept_cache.flags = flags; + store->kept_cache.packs = packs; + store->kept_cache.flags = flags; } - return r->objects->packfiles->kept_cache.packs; + return store->kept_cache.packs; } -int find_kept_pack_entry(struct repository *r, - const struct object_id *oid, - unsigned flags, - struct pack_entry *e) +int has_object_pack(struct repository *r, const struct object_id *oid) { - struct packed_git **cache; + struct odb_source *source; + struct pack_entry e; - for (cache = kept_pack_cache(r, flags); *cache; cache++) { - struct packed_git *p = *cache; - if (fill_pack_entry(oid, e, p)) - return 1; + odb_prepare_alternates(r->objects); + for (source = r->objects->sources; source; source = source->next) { + int ret = find_pack_entry(source->packfiles, oid, &e); + if (ret) + return ret; } return 0; } -int has_object_pack(struct repository *r, const struct object_id *oid) -{ - struct pack_entry e; - return find_pack_entry(r, oid, &e); -} - int has_object_kept_pack(struct repository *r, const struct object_id *oid, unsigned flags) { + struct odb_source *source; struct pack_entry e; - return find_kept_pack_entry(r, oid, flags, &e); + + for (source = r->objects->sources; source; source = source->next) { + struct packed_git **cache; + + cache = packfile_store_get_kept_pack_cache(source->packfiles, flags); + + for (; *cache; cache++) { + struct packed_git *p = *cache; + if (fill_pack_entry(oid, &e, p)) + return 1; + } + } + + return 0; } int for_each_object_in_pack(struct packed_git *p, @@ -2288,32 +2304,46 @@ int for_each_object_in_pack(struct packed_git *p, int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, void *data, enum for_each_object_flags flags) { - struct packed_git *p; + struct odb_source *source; int r = 0; int pack_errors = 0; - repo->objects->packfiles->skip_mru_updates = true; - repo_for_each_pack(repo, p) { - if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) - continue; - if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && - !p->pack_promisor) - continue; - if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && - p->pack_keep_in_core) - continue; - if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && - p->pack_keep) - continue; - if (open_pack_index(p)) { - pack_errors = 1; - continue; + odb_prepare_alternates(repo->objects); + + for (source = repo->objects->sources; source; source = source->next) { + struct packfile_list_entry *e; + + source->packfiles->skip_mru_updates = true; + + for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + continue; + if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && + !p->pack_promisor) + continue; + if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + p->pack_keep_in_core) + continue; + if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + p->pack_keep) + continue; + if (open_pack_index(p)) { + pack_errors = 1; + continue; + } + + r = for_each_object_in_pack(p, cb, data, flags); + if (r) + break; } - r = for_each_object_in_pack(p, cb, data, flags); + + source->packfiles->skip_mru_updates = false; + if (r) break; } - repo->objects->packfiles->skip_mru_updates = false; return r ? r : pack_errors; } @@ -2411,11 +2441,11 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l return 0; } -struct packfile_store *packfile_store_new(struct object_database *odb) +struct packfile_store *packfile_store_new(struct odb_source *source) { struct packfile_store *store; CALLOC_ARRAY(store, 1); - store->odb = odb; + store->source = source; strmap_init(&store->packs_by_path); return store; } @@ -2437,6 +2467,9 @@ void packfile_store_close(struct packfile_store *store) BUG("want to close pack marked 'do-not-close'"); close_pack(e->pack); } + if (store->midx) + close_midx(store->midx); + store->midx = NULL; } struct odb_packed_read_stream { @@ -2533,8 +2566,9 @@ int packfile_store_read_object_stream(struct odb_read_stream **out, oi.sizep = &size; if (packfile_store_read_object_info(store, oid, &oi, 0) || - oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(store->odb->repo) >= size) + oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA || + oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA || + repo_settings_get_big_file_threshold(store->source->odb->repo) >= size) return -1; in_pack_type = unpack_object_header(oi.u.packed.pack, diff --git a/packfile.h b/packfile.h index 59d162a3f415e5..acc5c55ad57754 100644 --- a/packfile.h +++ b/packfile.h @@ -5,6 +5,7 @@ #include "object.h" #include "odb.h" #include "oidset.h" +#include "repository.h" #include "strmap.h" /* in odb.h */ @@ -77,7 +78,7 @@ struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs, * A store that manages packfiles for a given object database. */ struct packfile_store { - struct object_database *odb; + struct odb_source *source; /* * The list of packfiles in the order in which they have been most @@ -90,15 +91,19 @@ struct packfile_store { * is an on-disk ".keep" file or because they are marked as "kept" in * memory. * - * Should not be accessed directly, but via `kept_pack_cache()`. The - * list of packs gets invalidated when the stored flags and the flags - * passed to `kept_pack_cache()` mismatch. + * Should not be accessed directly, but via + * `packfile_store_get_kept_pack_cache()`. The list of packs gets + * invalidated when the stored flags and the flags passed to + * `packfile_store_get_kept_pack_cache()` mismatch. */ struct { struct packed_git **packs; unsigned flags; } kept_cache; + /* The multi-pack index that belongs to this specific packfile store. */ + struct multi_pack_index *midx; + /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. @@ -129,9 +134,9 @@ struct packfile_store { /* * Allocate and initialize a new empty packfile store for the given object - * database. + * database source. */ -struct packfile_store *packfile_store_new(struct object_database *odb); +struct packfile_store *packfile_store_new(struct odb_source *source); /* * Free the packfile store and all its associated state. All packfiles @@ -169,14 +174,65 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); +/* + * Get all packs managed by the given store, including packfiles that are + * referenced by multi-pack indices. + */ +struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); + +struct repo_for_each_pack_data { + struct odb_source *source; + struct packfile_list_entry *entry; +}; + +static inline struct repo_for_each_pack_data repo_for_eack_pack_data_init(struct repository *repo) +{ + struct repo_for_each_pack_data data = { 0 }; + + odb_prepare_alternates(repo->objects); + + for (struct odb_source *source = repo->objects->sources; source; source = source->next) { + struct packfile_list_entry *entry = packfile_store_get_packs(source->packfiles); + if (!entry) + continue; + data.source = source; + data.entry = entry; + break; + } + + return data; +} + +static inline void repo_for_each_pack_data_next(struct repo_for_each_pack_data *data) +{ + struct odb_source *source; + + data->entry = data->entry->next; + if (data->entry) + return; + + for (source = data->source->next; source; source = source->next) { + struct packfile_list_entry *entry = packfile_store_get_packs(source->packfiles); + if (!entry) + continue; + data->source = source; + data->entry = entry; + return; + } + + data->source = NULL; + data->entry = NULL; +} + /* * Load and iterate through all packs of the given repository. This helper * function will yield packfiles from all object sources connected to the * repository. */ #define repo_for_each_pack(repo, p) \ - for (struct packfile_list_entry *e = packfile_store_get_packs(repo->objects->packfiles); \ - ((p) = (e ? e->pack : NULL)); e = e->next) + for (struct repo_for_each_pack_data eack_pack_data = repo_for_eack_pack_data_init(repo); \ + ((p) = (eack_pack_data.entry ? eack_pack_data.entry->pack : NULL)); \ + repo_for_each_pack_data_next(&eack_pack_data)) int packfile_store_read_object_stream(struct odb_read_stream **out, struct packfile_store *store, @@ -193,12 +249,6 @@ int packfile_store_read_object_info(struct packfile_store *store, struct object_info *oi, unsigned flags); -/* - * Get all packs managed by the given store, including packfiles that are - * referenced by multi-pack indices. - */ -struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); - /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a @@ -210,6 +260,19 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid); +enum kept_pack_type { + KEPT_PACK_ON_DISK = (1 << 0), + KEPT_PACK_IN_CORE = (1 << 1), +}; + +/* + * Retrieve the cache of kept packs from the given packfile store. Accepts a + * combination of `kept_pack_type` flags. The cache is computed on demand and + * will be recomputed whenever the flags change. + */ +struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store, + unsigned flags); + struct pack_window { struct pack_window *next; unsigned char *base; @@ -378,28 +441,20 @@ void release_pack_memory(size_t); /* global flag to enable extra checks when accessing packed objects */ extern int do_check_packed_object_crc; -int packed_object_info(struct repository *r, - struct packed_git *pack, +/* + * Look up the object info for a specific offset in the packfile. + * Returns zero on success, a negative error code otherwise. + */ +int packed_object_info(struct packed_git *pack, off_t offset, struct object_info *); void mark_bad_packed_object(struct packed_git *, const struct object_id *); const struct packed_git *has_packed_and_bad(struct repository *, const struct object_id *); -#define ON_DISK_KEEP_PACKS 1 -#define IN_CORE_KEEP_PACKS 2 - -/* - * Iff a pack file in the given repository contains the object named by sha1, - * return true and store its location to e. - */ -int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e); - int has_object_pack(struct repository *r, const struct object_id *oid); int has_object_kept_pack(struct repository *r, const struct object_id *oid, unsigned flags); -struct packed_git **kept_pack_cache(struct repository *r, unsigned flags); - /* * Return 1 if an object in a promisor packfile is or refers to the given * object, 0 otherwise. diff --git a/reachable.c b/reachable.c index b753c395530b6d..4b532039d5f84f 100644 --- a/reachable.c +++ b/reachable.c @@ -242,7 +242,7 @@ static int want_recent_object(struct recent_data *data, const struct object_id *oid) { if (data->ignore_in_core_kept_packs && - has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS)) + has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE)) return 0; return 1; } diff --git a/refs.c b/refs.c index 046b695bb20f54..627b7f8698d044 100644 --- a/refs.c +++ b/refs.c @@ -320,6 +320,49 @@ int check_refname_format(const char *refname, int flags) return check_or_sanitize_refname(refname, flags, NULL); } +int refs_fsck_ref(struct ref_store *refs UNUSED, struct fsck_options *o, + struct fsck_ref_report *report, + const char *refname UNUSED, const struct object_id *oid) +{ + if (is_null_oid(oid)) + return fsck_report_ref(o, report, FSCK_MSG_BAD_REF_OID, + "points to invalid object ID '%s'", + oid_to_hex(oid)); + + return 0; +} + +int refs_fsck_symref(struct ref_store *refs UNUSED, struct fsck_options *o, + struct fsck_ref_report *report, + const char *refname, const char *target) +{ + const char *stripped_refname; + + parse_worktree_ref(refname, NULL, NULL, &stripped_refname); + + if (!strcmp(stripped_refname, "HEAD") && + !starts_with(target, "refs/heads/") && + fsck_report_ref(o, report, FSCK_MSG_BAD_HEAD_TARGET, + "HEAD points to non-branch '%s'", target)) + return -1; + + if (is_root_ref(target)) + return 0; + + if (check_refname_format(target, 0) && + fsck_report_ref(o, report, FSCK_MSG_BAD_REFERENT_NAME, + "points to invalid refname '%s'", target)) + return -1; + + if (!starts_with(target, "refs/") && + !starts_with(target, "worktrees/") && + fsck_report_ref(o, report, FSCK_MSG_SYMREF_TARGET_IS_NOT_A_REF, + "points to non-ref target '%s'", target)) + return -1; + + return 0; +} + int refs_fsck(struct ref_store *refs, struct fsck_options *o, struct worktree *wt) { diff --git a/refs.h b/refs.h index d9051bbb0414c2..f0abfa1d93633e 100644 --- a/refs.h +++ b/refs.h @@ -653,6 +653,24 @@ int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat */ int check_refname_format(const char *refname, int flags); +struct fsck_ref_report; + +/* + * Perform generic checks for a specific direct ref. This function is + * expected to be called by the ref backends for every symbolic ref. + */ +int refs_fsck_ref(struct ref_store *refs, struct fsck_options *o, + struct fsck_ref_report *report, + const char *refname, const struct object_id *oid); + +/* + * Perform generic checks for a specific symref target. This function is + * expected to be called by the ref backends for every symbolic ref. + */ +int refs_fsck_symref(struct ref_store *refs, struct fsck_options *o, + struct fsck_ref_report *report, + const char *refname, const char *target); + /* * Check the reference database for consistency. Return 0 if refs and * reflogs are consistent, and non-zero otherwise. The errors will be diff --git a/refs/files-backend.c b/refs/files-backend.c index 6f6f76a8d86dc4..240d3c3b26e0b5 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -354,13 +354,11 @@ static int for_each_root_ref(struct files_ref_store *refs, void *cb_data) { struct strbuf path = STRBUF_INIT, refname = STRBUF_INIT; - const char *dirname = refs->loose->root->name; struct dirent *de; - size_t dirnamelen; int ret; DIR *d; - files_ref_path(refs, &path, dirname); + files_ref_path(refs, &path, ""); d = opendir(path.buf); if (!d) { @@ -368,9 +366,6 @@ static int for_each_root_ref(struct files_ref_store *refs, return -1; } - strbuf_addstr(&refname, dirname); - dirnamelen = refname.len; - while ((de = readdir(d)) != NULL) { unsigned char dtype; @@ -378,6 +373,8 @@ static int for_each_root_ref(struct files_ref_store *refs, continue; if (ends_with(de->d_name, ".lock")) continue; + + strbuf_reset(&refname); strbuf_addstr(&refname, de->d_name); dtype = get_dtype(de, &path, 1); @@ -386,8 +383,6 @@ static int for_each_root_ref(struct files_ref_store *refs, if (ret) goto done; } - - strbuf_setlen(&refname, dirnamelen); } ret = 0; @@ -3720,64 +3715,50 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store, typedef int (*files_fsck_refs_fn)(struct ref_store *ref_store, struct fsck_options *o, const char *refname, - struct dir_iterator *iter); + const char *path, + int mode); -static int files_fsck_symref_target(struct fsck_options *o, +static int files_fsck_symref_target(struct ref_store *ref_store, + struct fsck_options *o, struct fsck_ref_report *report, + const char *refname, struct strbuf *referent, unsigned int symbolic_link) { - int is_referent_root; char orig_last_byte; size_t orig_len; int ret = 0; orig_len = referent->len; orig_last_byte = referent->buf[orig_len - 1]; - if (!symbolic_link) - strbuf_rtrim(referent); - - is_referent_root = is_root_ref(referent->buf); - if (!is_referent_root && - !starts_with(referent->buf, "refs/") && - !starts_with(referent->buf, "worktrees/")) { - ret = fsck_report_ref(o, report, - FSCK_MSG_SYMREF_TARGET_IS_NOT_A_REF, - "points to non-ref target '%s'", referent->buf); - - } - if (!is_referent_root && check_refname_format(referent->buf, 0)) { - ret = fsck_report_ref(o, report, - FSCK_MSG_BAD_REFERENT_NAME, - "points to invalid refname '%s'", referent->buf); - goto out; - } + if (!symbolic_link) { + strbuf_rtrim(referent); - if (symbolic_link) - goto out; + if (referent->len == orig_len || + (referent->len < orig_len && orig_last_byte != '\n')) { + ret |= fsck_report_ref(o, report, + FSCK_MSG_REF_MISSING_NEWLINE, + "misses LF at the end"); + } - if (referent->len == orig_len || - (referent->len < orig_len && orig_last_byte != '\n')) { - ret = fsck_report_ref(o, report, - FSCK_MSG_REF_MISSING_NEWLINE, - "misses LF at the end"); + if (referent->len != orig_len && referent->len != orig_len - 1) { + ret |= fsck_report_ref(o, report, + FSCK_MSG_TRAILING_REF_CONTENT, + "has trailing whitespaces or newlines"); + } } - if (referent->len != orig_len && referent->len != orig_len - 1) { - ret = fsck_report_ref(o, report, - FSCK_MSG_TRAILING_REF_CONTENT, - "has trailing whitespaces or newlines"); - } + ret |= refs_fsck_symref(ref_store, o, report, refname, referent->buf); -out: - return ret; + return ret ? -1 : 0; } static int files_fsck_refs_content(struct ref_store *ref_store, struct fsck_options *o, const char *target_name, - struct dir_iterator *iter) + const char *path, + int mode) { struct strbuf ref_content = STRBUF_INIT; struct strbuf abs_gitdir = STRBUF_INIT; @@ -3791,7 +3772,7 @@ static int files_fsck_refs_content(struct ref_store *ref_store, report.path = target_name; - if (S_ISLNK(iter->st.st_mode)) { + if (S_ISLNK(mode)) { const char *relative_referent_path = NULL; ret = fsck_report_ref(o, &report, @@ -3803,7 +3784,7 @@ static int files_fsck_refs_content(struct ref_store *ref_store, if (!is_dir_sep(abs_gitdir.buf[abs_gitdir.len - 1])) strbuf_addch(&abs_gitdir, '/'); - strbuf_add_real_path(&ref_content, iter->path.buf); + strbuf_add_real_path(&ref_content, path); skip_prefix(ref_content.buf, abs_gitdir.buf, &relative_referent_path); @@ -3812,11 +3793,12 @@ static int files_fsck_refs_content(struct ref_store *ref_store, else strbuf_addbuf(&referent, &ref_content); - ret |= files_fsck_symref_target(o, &report, &referent, 1); + ret |= files_fsck_symref_target(ref_store, o, &report, + target_name, &referent, 1); goto cleanup; } - if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) { + if (strbuf_read_file(&ref_content, path, 0) < 0) { /* * Ref file could be removed by another concurrent process. We should * ignore this error and continue to the next ref. @@ -3824,7 +3806,7 @@ static int files_fsck_refs_content(struct ref_store *ref_store, if (errno == ENOENT) goto cleanup; - ret = error_errno(_("cannot read ref file '%s'"), iter->path.buf); + ret = error_errno(_("cannot read ref file '%s'"), path); goto cleanup; } @@ -3851,8 +3833,11 @@ static int files_fsck_refs_content(struct ref_store *ref_store, "has trailing garbage: '%s'", trailing); goto cleanup; } + + ret = refs_fsck_ref(ref_store, o, &report, target_name, &oid); } else { - ret = files_fsck_symref_target(o, &report, &referent, 0); + ret = files_fsck_symref_target(ref_store, o, &report, + target_name, &referent, 0); goto cleanup; } @@ -3866,21 +3851,25 @@ static int files_fsck_refs_content(struct ref_store *ref_store, static int files_fsck_refs_name(struct ref_store *ref_store UNUSED, struct fsck_options *o, const char *refname, - struct dir_iterator *iter) + const char *path, + int mode UNUSED) { struct strbuf sb = STRBUF_INIT; + const char *filename; int ret = 0; + filename = basename((char *) path); + /* * Ignore the files ending with ".lock" as they may be lock files * However, do not allow bare ".lock" files. */ - if (iter->basename[0] != '.' && ends_with(iter->basename, ".lock")) + if (filename[0] != '.' && ends_with(filename, ".lock")) + goto cleanup; + + if (is_root_ref(refname)) goto cleanup; - /* - * This works right now because we never check the root refs. - */ if (check_refname_format(refname, 0)) { struct fsck_ref_report report = { 0 }; @@ -3895,11 +3884,44 @@ static int files_fsck_refs_name(struct ref_store *ref_store UNUSED, return ret; } +static const files_fsck_refs_fn fsck_refs_fn[]= { + files_fsck_refs_name, + files_fsck_refs_content, + NULL, +}; + +static int files_fsck_ref(struct ref_store *ref_store, + struct fsck_options *o, + const char *refname, + const char *path, + int mode) +{ + int ret = 0; + + if (o->verbose) + fprintf_ln(stderr, "Checking %s", refname); + + if (!S_ISREG(mode) && !S_ISLNK(mode)) { + struct fsck_ref_report report = { .path = refname }; + + if (fsck_report_ref(o, &report, + FSCK_MSG_BAD_REF_FILETYPE, + "unexpected file type")) + ret = -1; + goto out; + } + + for (size_t i = 0; fsck_refs_fn[i]; i++) + if (fsck_refs_fn[i](ref_store, o, refname, path, mode)) + ret = -1; + +out: + return ret; +} + static int files_fsck_refs_dir(struct ref_store *ref_store, struct fsck_options *o, - const char *refs_check_dir, - struct worktree *wt, - files_fsck_refs_fn *fsck_refs_fn) + struct worktree *wt) { struct strbuf refname = STRBUF_INIT; struct strbuf sb = STRBUF_INIT; @@ -3907,7 +3929,7 @@ static int files_fsck_refs_dir(struct ref_store *ref_store, int iter_status; int ret = 0; - strbuf_addf(&sb, "%s/%s", ref_store->gitdir, refs_check_dir); + strbuf_addf(&sb, "%s/refs", ref_store->gitdir); iter = dir_iterator_begin(sb.buf, 0); if (!iter) { @@ -3919,31 +3941,17 @@ static int files_fsck_refs_dir(struct ref_store *ref_store, } while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) { - if (S_ISDIR(iter->st.st_mode)) { + if (S_ISDIR(iter->st.st_mode)) continue; - } else if (S_ISREG(iter->st.st_mode) || - S_ISLNK(iter->st.st_mode)) { - strbuf_reset(&refname); - if (!is_main_worktree(wt)) - strbuf_addf(&refname, "worktrees/%s/", wt->id); - strbuf_addf(&refname, "%s/%s", refs_check_dir, - iter->relative_path); + strbuf_reset(&refname); + if (!is_main_worktree(wt)) + strbuf_addf(&refname, "worktrees/%s/", wt->id); + strbuf_addf(&refname, "refs/%s", iter->relative_path); - if (o->verbose) - fprintf_ln(stderr, "Checking %s", refname.buf); - - for (size_t i = 0; fsck_refs_fn[i]; i++) { - if (fsck_refs_fn[i](ref_store, o, refname.buf, iter)) - ret = -1; - } - } else { - struct fsck_ref_report report = { .path = iter->basename }; - if (fsck_report_ref(o, &report, - FSCK_MSG_BAD_REF_FILETYPE, - "unexpected file type")) - ret = -1; - } + if (files_fsck_ref(ref_store, o, refname.buf, + iter->path.buf, iter->st.st_mode) < 0) + ret = -1; } if (iter_status != ITER_DONE) @@ -3956,17 +3964,35 @@ static int files_fsck_refs_dir(struct ref_store *ref_store, return ret; } -static int files_fsck_refs(struct ref_store *ref_store, - struct fsck_options *o, - struct worktree *wt) +struct files_fsck_root_ref_data { + struct files_ref_store *refs; + struct fsck_options *o; + struct worktree *wt; + struct strbuf refname; + struct strbuf path; +}; + +static int files_fsck_root_ref(const char *refname, void *cb_data) { - files_fsck_refs_fn fsck_refs_fn[]= { - files_fsck_refs_name, - files_fsck_refs_content, - NULL, - }; + struct files_fsck_root_ref_data *data = cb_data; + struct stat st; - return files_fsck_refs_dir(ref_store, o, "refs", wt, fsck_refs_fn); + strbuf_reset(&data->refname); + if (!is_main_worktree(data->wt)) + strbuf_addf(&data->refname, "worktrees/%s/", data->wt->id); + strbuf_addstr(&data->refname, refname); + + strbuf_reset(&data->path); + strbuf_addf(&data->path, "%s/%s", data->refs->gitcommondir, data->refname.buf); + + if (stat(data->path.buf, &st)) { + if (errno == ENOENT) + return 0; + return error_errno("failed to read ref: '%s'", data->path.buf); + } + + return files_fsck_ref(&data->refs->base, data->o, data->refname.buf, + data->path.buf, st.st_mode); } static int files_fsck(struct ref_store *ref_store, @@ -3975,9 +4001,27 @@ static int files_fsck(struct ref_store *ref_store, { struct files_ref_store *refs = files_downcast(ref_store, REF_STORE_READ, "fsck"); + struct files_fsck_root_ref_data data = { + .refs = refs, + .o = o, + .wt = wt, + .refname = STRBUF_INIT, + .path = STRBUF_INIT, + }; + int ret = 0; + + if (files_fsck_refs_dir(ref_store, o, wt) < 0) + ret = -1; + + if (for_each_root_ref(refs, files_fsck_root_ref, &data) < 0) + ret = -1; + + if (refs->packed_ref_store->be->fsck(refs->packed_ref_store, o, wt) < 0) + ret = -1; - return files_fsck_refs(ref_store, o, wt) | - refs->packed_ref_store->be->fsck(refs->packed_ref_store, o, wt); + strbuf_release(&data.refname); + strbuf_release(&data.path); + return ret; } struct ref_storage_be refs_be_files = { diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 4319a4eacbafc4..fe74af73afdb7a 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -10,9 +10,10 @@ #include "../gettext.h" #include "../hash.h" #include "../hex.h" -#include "../iterator.h" #include "../ident.h" +#include "../iterator.h" #include "../object.h" +#include "../parse.h" #include "../path.h" #include "../refs.h" #include "../reftable/reftable-basics.h" @@ -25,8 +26,8 @@ #include "../setup.h" #include "../strmap.h" #include "../trace2.h" +#include "../worktree.h" #include "../write-or-die.h" -#include "parse.h" #include "refs-internal.h" /* @@ -172,6 +173,37 @@ static struct reftable_ref_store *reftable_be_downcast(struct ref_store *ref_sto return refs; } +static int backend_for_worktree(struct reftable_backend **out, + struct reftable_ref_store *store, + const char *worktree_name) +{ + struct strbuf worktree_dir = STRBUF_INIT; + int ret; + + *out = strmap_get(&store->worktree_backends, worktree_name); + if (*out) { + ret = 0; + goto out; + } + + strbuf_addf(&worktree_dir, "%s/worktrees/%s/reftable", + store->base.repo->commondir, worktree_name); + + CALLOC_ARRAY(*out, 1); + store->err = ret = reftable_backend_init(*out, worktree_dir.buf, + &store->write_options); + if (ret < 0) { + free(*out); + goto out; + } + + strmap_put(&store->worktree_backends, worktree_name, *out); + +out: + strbuf_release(&worktree_dir); + return ret; +} + /* * Some refs are global to the repository (refs/heads/{*}), while others are * local to the worktree (eg. HEAD, refs/bisect/{*}). We solve this by having @@ -191,19 +223,19 @@ static int backend_for(struct reftable_backend **out, const char **rewritten_ref, int reload) { - struct reftable_backend *be; const char *wtname; int wtname_len; + int ret; if (!refname) { - be = &store->main_backend; + *out = &store->main_backend; + ret = 0; goto out; } switch (parse_worktree_ref(refname, &wtname, &wtname_len, rewritten_ref)) { case REF_WORKTREE_OTHER: { static struct strbuf wtname_buf = STRBUF_INIT; - struct strbuf wt_dir = STRBUF_INIT; /* * We're using a static buffer here so that we don't need to @@ -223,20 +255,8 @@ static int backend_for(struct reftable_backend **out, * already and error out when trying to write a reference via * both stacks. */ - be = strmap_get(&store->worktree_backends, wtname_buf.buf); - if (!be) { - strbuf_addf(&wt_dir, "%s/worktrees/%s/reftable", - store->base.repo->commondir, wtname_buf.buf); - - CALLOC_ARRAY(be, 1); - store->err = reftable_backend_init(be, wt_dir.buf, - &store->write_options); - assert(store->err != REFTABLE_API_ERROR); - - strmap_put(&store->worktree_backends, wtname_buf.buf, be); - } + ret = backend_for_worktree(out, store, wtname_buf.buf); - strbuf_release(&wt_dir); goto out; } case REF_WORKTREE_CURRENT: @@ -245,27 +265,24 @@ static int backend_for(struct reftable_backend **out, * main worktree. We thus return the main stack in that case. */ if (!store->worktree_backend.stack) - be = &store->main_backend; + *out = &store->main_backend; else - be = &store->worktree_backend; + *out = &store->worktree_backend; + ret = 0; goto out; case REF_WORKTREE_MAIN: case REF_WORKTREE_SHARED: - be = &store->main_backend; + *out = &store->main_backend; + ret = 0; goto out; default: BUG("unhandled worktree reference type"); } out: - if (reload) { - int ret = reftable_stack_reload(be->stack); - if (ret) - return ret; - } - *out = be; - - return 0; + if (reload && !ret) + ret = reftable_stack_reload((*out)->stack); + return ret; } static int should_write_log(struct reftable_ref_store *refs, const char *refname) @@ -2746,24 +2763,92 @@ static int reftable_fsck_error_handler(struct reftable_fsck_info *info, } static int reftable_be_fsck(struct ref_store *ref_store, struct fsck_options *o, - struct worktree *wt UNUSED) + struct worktree *wt) { - struct reftable_ref_store *refs; - struct strmap_entry *entry; - struct hashmap_iter iter; - int ret = 0; + struct reftable_ref_store *refs = + reftable_be_downcast(ref_store, REF_STORE_READ, "fsck"); + struct reftable_ref_iterator *iter = NULL; + struct reftable_ref_record ref = { 0 }; + struct fsck_ref_report report = { 0 }; + struct strbuf refname = STRBUF_INIT; + struct reftable_backend *backend; + int ret, errors = 0; + + if (is_main_worktree(wt)) { + backend = &refs->main_backend; + } else { + ret = backend_for_worktree(&backend, refs, wt->id); + if (ret < 0) { + ret = error(_("reftable stack for worktree '%s' is broken"), + wt->id); + goto out; + } + } - refs = reftable_be_downcast(ref_store, REF_STORE_READ, "fsck"); + errors |= reftable_fsck_check(backend->stack, reftable_fsck_error_handler, + reftable_fsck_verbose_handler, o); - ret |= reftable_fsck_check(refs->main_backend.stack, reftable_fsck_error_handler, - reftable_fsck_verbose_handler, o); + iter = ref_iterator_for_stack(refs, backend->stack, "", NULL, 0); + if (!iter) { + ret = error(_("could not create iterator for worktree '%s'"), wt->id); + goto out; + } - strmap_for_each_entry(&refs->worktree_backends, &iter, entry) { - struct reftable_backend *b = (struct reftable_backend *)entry->value; - ret |= reftable_fsck_check(b->stack, reftable_fsck_error_handler, - reftable_fsck_verbose_handler, o); + while (1) { + ret = reftable_iterator_next_ref(&iter->iter, &ref); + if (ret > 0) + break; + if (ret < 0) { + ret = error(_("could not read record for worktree '%s'"), wt->id); + goto out; + } + + strbuf_reset(&refname); + if (!is_main_worktree(wt)) + strbuf_addf(&refname, "worktrees/%s/", wt->id); + strbuf_addstr(&refname, ref.refname); + report.path = refname.buf; + + switch (ref.value_type) { + case REFTABLE_REF_VAL1: + case REFTABLE_REF_VAL2: { + struct object_id oid; + unsigned hash_id; + + switch (reftable_stack_hash_id(backend->stack)) { + case REFTABLE_HASH_SHA1: + hash_id = GIT_HASH_SHA1; + break; + case REFTABLE_HASH_SHA256: + hash_id = GIT_HASH_SHA256; + break; + default: + BUG("unhandled hash ID %d", + reftable_stack_hash_id(backend->stack)); + } + + oidread(&oid, reftable_ref_record_val1(&ref), + &hash_algos[hash_id]); + + errors |= refs_fsck_ref(ref_store, o, &report, ref.refname, &oid); + break; + } + case REFTABLE_REF_SYMREF: + errors |= refs_fsck_symref(ref_store, o, &report, ref.refname, + ref.value.symref); + break; + default: + BUG("unhandled reference value type %d", ref.value_type); + } } + ret = errors ? -1 : 0; + +out: + if (iter) + ref_iterator_free(&iter->base); + reftable_ref_record_release(&ref); + strbuf_release(&refname); return ret; } diff --git a/revision.c b/revision.c index 1858e093eeeb89..b65a76377062cd 100644 --- a/revision.c +++ b/revision.c @@ -2518,14 +2518,14 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg die(_("--unpacked= no longer supported")); } else if (!strcmp(arg, "--no-kept-objects")) { revs->no_kept_objects = 1; - revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; - revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_IN_CORE; + revs->keep_pack_cache_flags |= KEPT_PACK_ON_DISK; } else if (skip_prefix(arg, "--no-kept-objects=", &optarg)) { revs->no_kept_objects = 1; if (!strcmp(optarg, "in-core")) - revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_IN_CORE; if (!strcmp(optarg, "on-disk")) - revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_ON_DISK; } else if (!strcmp(arg, "-r")) { revs->diff = 1; revs->diffopt.flags.recursive = 1; diff --git a/setup.c b/setup.c index 3a6a048620dd7d..b723f8b33931bd 100644 --- a/setup.c +++ b/setup.c @@ -2693,7 +2693,7 @@ int init_db(const char *git_dir, const char *real_git_dir, * have set up the repository format such that we can evaluate * includeIf conditions correctly in the case of re-initialization. */ - repo_config(the_repository, platform_core_config, NULL); + repo_config(the_repository, git_default_core_config, NULL); safe_create_dir(the_repository, git_dir, 0); diff --git a/strbuf.c b/strbuf.c index 7fb7d12ac0cb9e..59678bf5b03e0b 100644 --- a/strbuf.c +++ b/strbuf.c @@ -566,7 +566,7 @@ ssize_t strbuf_write(struct strbuf *sb, FILE *f) return sb->len ? fwrite(sb->buf, 1, sb->len, f) : 0; } -#define STRBUF_MAXLINK (2*PATH_MAX) +#define STRBUF_MAXLINK (32767) int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) { @@ -578,12 +578,12 @@ int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) while (hint < STRBUF_MAXLINK) { ssize_t len; - strbuf_grow(sb, hint); - len = readlink(path, sb->buf, hint); + strbuf_grow(sb, hint + 1); + len = readlink(path, sb->buf, hint + 1); if (len < 0) { if (errno != ERANGE) break; - } else if (len < hint) { + } else if (len <= hint) { strbuf_setlen(sb, len); return 0; } diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh index 0ef483659d561f..3c1f553b8120ec 100755 --- a/t/t0602-reffiles-fsck.sh +++ b/t/t0602-reffiles-fsck.sh @@ -905,4 +905,34 @@ test_expect_success '--[no-]references option should apply to fsck' ' ) ' +test_expect_success 'complains about broken root ref' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + echo "ref: refs/heads/../HEAD" >.git/HEAD && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: HEAD: badReferentName: points to invalid refname ${SQ}refs/heads/../HEAD${SQ} + EOF + test_cmp expect err + ) +' + +test_expect_success 'complains about broken root ref in worktree' ' + test_when_finished "rm -rf repo worktree" && + git init repo && + ( + cd repo && + test_commit initial && + git worktree add ../worktree && + echo "ref: refs/heads/../HEAD" >.git/worktrees/worktree/HEAD && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: worktrees/worktree/HEAD: badReferentName: points to invalid refname ${SQ}refs/heads/../HEAD${SQ} + EOF + test_cmp expect err + ) +' + test_done diff --git a/t/t0614-reftable-fsck.sh b/t/t0614-reftable-fsck.sh index 677eb9143c9de4..d24b87f9611975 100755 --- a/t/t0614-reftable-fsck.sh +++ b/t/t0614-reftable-fsck.sh @@ -55,4 +55,48 @@ for TABLE_NAME in "foo-bar-e4d12d59.ref" \ ' done +test_expect_success 'worktree stacks can be verified' ' + test_when_finished "rm -rf repo worktree" && + git init repo && + test_commit -C repo initial && + git -C repo worktree add ../worktree && + + git -C worktree refs verify 2>err && + test_must_be_empty err && + + REFTABLE_DIR=$(git -C worktree rev-parse --git-dir)/reftable && + EXISTING_TABLE=$(head -n1 "$REFTABLE_DIR/tables.list") && + mv "$REFTABLE_DIR/$EXISTING_TABLE" "$REFTABLE_DIR/broken.ref" && + + for d in repo worktree + do + echo "broken.ref" >"$REFTABLE_DIR/tables.list" && + git -C "$d" refs verify 2>err && + cat >expect <<-EOF && + warning: broken.ref: badReftableTableName: invalid reftable table name + EOF + test_cmp expect err && + + echo garbage >"$REFTABLE_DIR/tables.list" && + test_must_fail git -C "$d" refs verify 2>err && + cat >expect <<-EOF && + error: reftable stack for worktree ${SQ}worktree${SQ} is broken + EOF + test_cmp expect err || return 1 + + done +' + +test_expect_success 'invalid symref gets reported' ' + test_when_finished "rm -rf repo" && + git init repo && + test_commit -C repo initial && + git -C repo symbolic-ref refs/heads/symref garbage && + test_must_fail git -C repo refs verify 2>err && + cat >expect <<-EOF && + error: refs/heads/symref: badReferentName: points to invalid refname ${SQ}garbage${SQ} + EOF + test_cmp expect err +' + test_done diff --git a/t/t1410-reflog.sh b/t/t1410-reflog.sh index e30f87a35812b8..ce71f9a30ae1ee 100755 --- a/t/t1410-reflog.sh +++ b/t/t1410-reflog.sh @@ -130,10 +130,10 @@ test_expect_success 'pass through -- to sub-command' ' test_expect_success rewind ' test_tick && git reset --hard HEAD~2 && - test -f C && - test -f A/B/E && - ! test -f F && - ! test -f A/G && + test_path_is_file C && + test_path_is_file A/B/E && + test_path_is_missing F && + test_path_is_missing A/G && check_have A B C D E F G H I J K L && diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index c4b651c2dc7938..3fae05f9d9f805 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -105,7 +105,7 @@ test_expect_success REFFILES 'HEAD link pointing at a funny object' ' echo $ZERO_OID >.git/HEAD && # avoid corrupt/broken HEAD from interfering with repo discovery test_must_fail env GIT_DIR=.git git fsck 2>out && - test_grep "detached HEAD points" out + test_grep "HEAD: badRefOid: points to invalid object ID ${SQ}$ZERO_OID${SQ}" out ' test_expect_success 'HEAD link pointing at a funny place' ' @@ -113,7 +113,7 @@ test_expect_success 'HEAD link pointing at a funny place' ' test-tool ref-store main create-symref HEAD refs/funny/place && # avoid corrupt/broken HEAD from interfering with repo discovery test_must_fail env GIT_DIR=.git git fsck 2>out && - test_grep "HEAD points to something strange" out + test_grep "HEAD: badHeadTarget: HEAD points to non-branch ${SQ}refs/funny/place${SQ}" out ' test_expect_success REFFILES 'HEAD link pointing at a funny object (from different wt)' ' @@ -123,7 +123,7 @@ test_expect_success REFFILES 'HEAD link pointing at a funny object (from differe echo $ZERO_OID >.git/HEAD && # avoid corrupt/broken HEAD from interfering with repo discovery test_must_fail git -C wt fsck 2>out && - test_grep "main-worktree/HEAD: detached HEAD points" out + test_grep "HEAD: badRefOid: points to invalid object ID ${SQ}$ZERO_OID${SQ}" out ' test_expect_success REFFILES 'other worktree HEAD link pointing at a funny object' ' @@ -131,7 +131,7 @@ test_expect_success REFFILES 'other worktree HEAD link pointing at a funny objec git worktree add other && echo $ZERO_OID >.git/worktrees/other/HEAD && test_must_fail git fsck 2>out && - test_grep "worktrees/other/HEAD: detached HEAD points" out + test_grep "worktrees/other/HEAD: badRefOid: points to invalid object ID ${SQ}$ZERO_OID${SQ}" out ' test_expect_success 'other worktree HEAD link pointing at missing object' ' @@ -148,7 +148,7 @@ test_expect_success 'other worktree HEAD link pointing at a funny place' ' git worktree add other && git -C other symbolic-ref HEAD refs/funny/place && test_must_fail git fsck 2>out && - test_grep "worktrees/other/HEAD points to something strange" out + test_grep "worktrees/other/HEAD: badHeadTarget: HEAD points to non-branch ${SQ}refs/funny/place${SQ}" out ' test_expect_success 'commit with multiple signatures is okay' ' diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh index 961c6aac256135..c8c1c5c06b6037 100755 --- a/t/t5003-archive-zip.sh +++ b/t/t5003-archive-zip.sh @@ -239,6 +239,40 @@ check_zip with_untracked2 check_added with_untracked2 untracked one/untracked check_added with_untracked2 untracked two/untracked +test_expect_success 'git-archive --format=zip with bigFile delta chains' ' + test_when_finished rm -rf repo && + git init repo && + ( + cd repo && + test-tool genrandom foo 100000 >base && + { + cat base && + echo "trailing data" + } >delta-1 && + { + cat delta-1 && + echo "trailing data" + } >delta-2 && + git add . && + git commit -m "blobs" && + git repack -Ad && + git verify-pack -v .git/objects/pack/pack-*.idx >stats && + test_grep "chain length = 1: 1 object" stats && + test_grep "chain length = 2: 1 object" stats && + + git -c core.bigFileThreshold=1k archive --format=zip HEAD >archive.zip && + if test_have_prereq UNZIP + then + mkdir unpack && + cd unpack && + "$GIT_UNZIP" ../archive.zip && + test_cmp base ../base && + test_cmp delta-1 ../delta-1 && + test_cmp delta-2 ../delta-2 + fi + ) +' + # Test remote archive over HTTP protocol. # # Note: this should be the last part of this test suite, because diff --git a/utf8.c b/utf8.c index 35a02519392e65..96460cc414348b 100644 --- a/utf8.c +++ b/utf8.c @@ -515,6 +515,19 @@ char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv, out = xrealloc(out, outalloc); outpos = out + sofar; outsz = outalloc - sofar - 1; +#ifdef ICONV_RESTART_RESET + /* + * If iconv(3) messes up piecemeal conversions + * then restore the original pointers, sizes, + * and converter state, then retry converting + * the full string using the reallocated buffer. + */ + insz += cp - (iconv_ibp)in; /* Restore insz */ + cp = (iconv_ibp)in; /* original start value */ + outpos = out + bom_len; /* original start value */ + outsz = outalloc - bom_len - 1; /* new len */ + iconv(conv, NULL, NULL, NULL, NULL); /* reset iconv machinery */ +#endif } else { *outpos = '\0';