From 2d79aa9095577bdc2049cfa938d18d3ebed5349f Mon Sep 17 00:00:00 2001 From: shejialuo Date: Thu, 8 Aug 2024 19:24:13 +0800 Subject: [PATCH 1/9] fsck: rename "skiplist" to "skip_oids" The "skiplist" field in "fsck_options" is related to objects. Because we are going to introduce ref consistency check, the "skiplist" name is too general which will make the caller think "skiplist" is related to both the refs and objects. It may seem that for both refs and objects, we should provide a general "skiplist" here. However, the type for "skiplist" is `struct oidset` which is totally unsuitable for refs. To avoid above ambiguity, rename "skiplist" to "skip_oids". Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- fsck.c | 4 ++-- fsck.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fsck.c b/fsck.c index eea71454701c6d..3f324414924d08 100644 --- a/fsck.c +++ b/fsck.c @@ -205,7 +205,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values) if (!strcmp(buf, "skiplist")) { if (equal == len) die("skiplist requires a path"); - oidset_parse_file(&options->skiplist, buf + equal + 1, + oidset_parse_file(&options->skip_oids, buf + equal + 1, the_repository->hash_algo); buf += len + 1; continue; @@ -223,7 +223,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values) static int object_on_skiplist(struct fsck_options *opts, const struct object_id *oid) { - return opts && oid && oidset_contains(&opts->skiplist, oid); + return opts && oid && oidset_contains(&opts->skip_oids, oid); } __attribute__((format (printf, 5, 6))) diff --git a/fsck.h b/fsck.h index 6085a384f624f3..bcfb2e34cd6d1d 100644 --- a/fsck.h +++ b/fsck.h @@ -136,7 +136,7 @@ struct fsck_options { fsck_error error_func; unsigned strict:1; enum fsck_msg_type *msg_type; - struct oidset skiplist; + struct oidset skip_oids; struct oidset gitmodules_found; struct oidset gitmodules_done; struct oidset gitattributes_found; @@ -145,7 +145,7 @@ struct fsck_options { }; #define FSCK_OPTIONS_DEFAULT { \ - .skiplist = OIDSET_INIT, \ + .skip_oids = OIDSET_INIT, \ .gitmodules_found = OIDSET_INIT, \ .gitmodules_done = OIDSET_INIT, \ .gitattributes_found = OIDSET_INIT, \ From 8cd4a447b8b022a25e05653eb5f2dd80b9009bbe Mon Sep 17 00:00:00 2001 From: shejialuo Date: Thu, 8 Aug 2024 19:24:25 +0800 Subject: [PATCH 2/9] fsck: rename objects-related fsck error functions The names of objects-related fsck error functions are generic. It's OK when there is only object database check. However, we are going to introduce refs database check report function. To avoid ambiguity, rename object-related fsck error functions to explicitly indicate these functions are used to report objects-related messages. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- builtin/fsck.c | 14 +++++++------- fsck.c | 17 +++++++++-------- fsck.h | 26 +++++++++++++------------- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index d13a226c2ed86b..6d86bbe1e98e48 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -89,12 +89,12 @@ static int objerror(struct object *obj, const char *err) return -1; } -static int fsck_error_func(struct fsck_options *o UNUSED, - const struct object_id *oid, - enum object_type object_type, - enum fsck_msg_type msg_type, - enum fsck_msg_id msg_id UNUSED, - const char *message) +static int fsck_objects_error_func(struct fsck_options *o UNUSED, + const struct object_id *oid, + enum object_type object_type, + enum fsck_msg_type msg_type, + enum fsck_msg_id msg_id UNUSED, + const char *message) { switch (msg_type) { case FSCK_WARN: @@ -938,7 +938,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) fsck_walk_options.walk = mark_object; fsck_obj_options.walk = mark_used; - fsck_obj_options.error_func = fsck_error_func; + fsck_obj_options.error_func = fsck_objects_error_func; if (check_strict) fsck_obj_options.strict = 1; diff --git a/fsck.c b/fsck.c index 3f324414924d08..8347842cfb2c59 100644 --- a/fsck.c +++ b/fsck.c @@ -1200,7 +1200,7 @@ int fsck_buffer(const struct object_id *oid, enum object_type type, type); } -int fsck_error_function(struct fsck_options *o, +int fsck_objects_error_function(struct fsck_options *o, const struct object_id *oid, enum object_type object_type UNUSED, enum fsck_msg_type msg_type, @@ -1303,16 +1303,17 @@ int git_fsck_config(const char *var, const char *value, * Custom error callbacks that are used in more than one place. */ -int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o, - const struct object_id *oid, - enum object_type object_type, - enum fsck_msg_type msg_type, - enum fsck_msg_id msg_id, - const char *message) +int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o, + const struct object_id *oid, + enum object_type object_type, + enum fsck_msg_type msg_type, + enum fsck_msg_id msg_id, + const char *message) { if (msg_id == FSCK_MSG_GITMODULES_MISSING) { puts(oid_to_hex(oid)); return 0; } - return fsck_error_function(o, oid, object_type, msg_type, msg_id, message); + return fsck_objects_error_function(o, oid, object_type, + msg_type, msg_id, message); } diff --git a/fsck.h b/fsck.h index bcfb2e34cd6d1d..41ebebbb5949b6 100644 --- a/fsck.h +++ b/fsck.h @@ -120,16 +120,16 @@ typedef int (*fsck_error)(struct fsck_options *o, enum fsck_msg_type msg_type, enum fsck_msg_id msg_id, const char *message); -int fsck_error_function(struct fsck_options *o, - const struct object_id *oid, enum object_type object_type, - enum fsck_msg_type msg_type, enum fsck_msg_id msg_id, - const char *message); -int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o, - const struct object_id *oid, - enum object_type object_type, - enum fsck_msg_type msg_type, - enum fsck_msg_id msg_id, - const char *message); +int fsck_objects_error_function(struct fsck_options *o, + const struct object_id *oid, enum object_type object_type, + enum fsck_msg_type msg_type, enum fsck_msg_id msg_id, + const char *message); +int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o, + const struct object_id *oid, + enum object_type object_type, + enum fsck_msg_type msg_type, + enum fsck_msg_id msg_id, + const char *message); struct fsck_options { fsck_walk_func walk; @@ -150,7 +150,7 @@ struct fsck_options { .gitmodules_done = OIDSET_INIT, \ .gitattributes_found = OIDSET_INIT, \ .gitattributes_done = OIDSET_INIT, \ - .error_func = fsck_error_function \ + .error_func = fsck_objects_error_function \ } #define FSCK_OPTIONS_STRICT { \ .strict = 1, \ @@ -158,7 +158,7 @@ struct fsck_options { .gitmodules_done = OIDSET_INIT, \ .gitattributes_found = OIDSET_INIT, \ .gitattributes_done = OIDSET_INIT, \ - .error_func = fsck_error_function, \ + .error_func = fsck_objects_error_function, \ } #define FSCK_OPTIONS_MISSING_GITMODULES { \ .strict = 1, \ @@ -166,7 +166,7 @@ struct fsck_options { .gitmodules_done = OIDSET_INIT, \ .gitattributes_found = OIDSET_INIT, \ .gitattributes_done = OIDSET_INIT, \ - .error_func = fsck_error_cb_print_missing_gitmodules, \ + .error_func = fsck_objects_error_cb_print_missing_gitmodules, \ } /* descend in all linked child objects From 0ec5dfe8c45be2efd6350b3a1a3885c795a85578 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Thu, 8 Aug 2024 19:26:47 +0800 Subject: [PATCH 3/9] fsck: make "fsck_error" callback generic The "fsck_error" callback is designed to report the objects-related error messages. It accepts two parameter "oid" and "object_type" which is not generic. In order to provide a unified callback which can report either objects or refs, remove the objects-related parameters and add the generic parameter "void *fsck_report". Create a new "fsck_object_report" structure which incorporates the removed parameters "oid" and "object_type". Then change the corresponding references to adapt to new "fsck_error" callback. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- builtin/fsck.c | 7 +++++-- builtin/mktag.c | 3 +-- fsck.c | 26 ++++++++++++++++---------- fsck.h | 17 ++++++++++++----- object-file.c | 9 ++++----- 5 files changed, 38 insertions(+), 24 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 6d86bbe1e98e48..766bbd014da2be 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -90,12 +90,15 @@ static int objerror(struct object *obj, const char *err) } static int fsck_objects_error_func(struct fsck_options *o UNUSED, - const struct object_id *oid, - enum object_type object_type, + void *fsck_report, enum fsck_msg_type msg_type, enum fsck_msg_id msg_id UNUSED, const char *message) { + struct fsck_object_report *report = fsck_report; + const struct object_id *oid = report->oid; + enum object_type object_type = report->object_type; + switch (msg_type) { case FSCK_WARN: /* TRANSLATORS: e.g. warning in tree 01bfda: */ diff --git a/builtin/mktag.c b/builtin/mktag.c index 4767f1a97e6df2..c6b644219fb19e 100644 --- a/builtin/mktag.c +++ b/builtin/mktag.c @@ -18,8 +18,7 @@ static int option_strict = 1; static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT; static int mktag_fsck_error_func(struct fsck_options *o UNUSED, - const struct object_id *oid UNUSED, - enum object_type object_type UNUSED, + void *fsck_report UNUSED, enum fsck_msg_type msg_type, enum fsck_msg_id msg_id UNUSED, const char *message) diff --git a/fsck.c b/fsck.c index 8347842cfb2c59..cca6ae144f1e23 100644 --- a/fsck.c +++ b/fsck.c @@ -232,6 +232,10 @@ static int report(struct fsck_options *options, enum fsck_msg_id msg_id, const char *fmt, ...) { va_list ap; + struct fsck_object_report report = { + .oid = oid, + .object_type = object_type + }; struct strbuf sb = STRBUF_INIT; enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options); int result; @@ -252,7 +256,7 @@ static int report(struct fsck_options *options, va_start(ap, fmt); strbuf_vaddf(&sb, fmt, ap); - result = options->error_func(options, oid, object_type, + result = options->error_func(options, &report, msg_type, msg_id, sb.buf); strbuf_release(&sb); va_end(ap); @@ -1201,12 +1205,14 @@ int fsck_buffer(const struct object_id *oid, enum object_type type, } int fsck_objects_error_function(struct fsck_options *o, - const struct object_id *oid, - enum object_type object_type UNUSED, - enum fsck_msg_type msg_type, - enum fsck_msg_id msg_id UNUSED, - const char *message) + void *fsck_report, + enum fsck_msg_type msg_type, + enum fsck_msg_id msg_id UNUSED, + const char *message) { + struct fsck_object_report *report = fsck_report; + const struct object_id *oid = report->oid; + if (msg_type == FSCK_WARN) { warning("object %s: %s", fsck_describe_object(o, oid), message); return 0; @@ -1304,16 +1310,16 @@ int git_fsck_config(const char *var, const char *value, */ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o, - const struct object_id *oid, - enum object_type object_type, + void *fsck_report, enum fsck_msg_type msg_type, enum fsck_msg_id msg_id, const char *message) { if (msg_id == FSCK_MSG_GITMODULES_MISSING) { - puts(oid_to_hex(oid)); + struct fsck_object_report *report = fsck_report; + puts(oid_to_hex(report->oid)); return 0; } - return fsck_objects_error_function(o, oid, object_type, + return fsck_objects_error_function(o, fsck_report, msg_type, msg_id, message); } diff --git a/fsck.h b/fsck.h index 41ebebbb5949b6..3b80d02506f34a 100644 --- a/fsck.h +++ b/fsck.h @@ -114,23 +114,30 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type); typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type, void *data, struct fsck_options *options); -/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */ +/* + * Callback for reporting errors either for objects or refs. The "fsck_report" + * is a generic pointer that can be used to pass any information. + */ typedef int (*fsck_error)(struct fsck_options *o, - const struct object_id *oid, enum object_type object_type, + void *fsck_report, enum fsck_msg_type msg_type, enum fsck_msg_id msg_id, const char *message); int fsck_objects_error_function(struct fsck_options *o, - const struct object_id *oid, enum object_type object_type, + void *fsck_report, enum fsck_msg_type msg_type, enum fsck_msg_id msg_id, const char *message); int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o, - const struct object_id *oid, - enum object_type object_type, + void *fsck_report, enum fsck_msg_type msg_type, enum fsck_msg_id msg_id, const char *message); +struct fsck_object_report { + const struct object_id *oid; + enum object_type object_type; +}; + struct fsck_options { fsck_walk_func walk; fsck_error error_func; diff --git a/object-file.c b/object-file.c index 065103be3ea923..05ac6ebed65bbf 100644 --- a/object-file.c +++ b/object-file.c @@ -2470,11 +2470,10 @@ int repo_has_object_file(struct repository *r, * give more context. */ static int hash_format_check_report(struct fsck_options *opts UNUSED, - const struct object_id *oid UNUSED, - enum object_type object_type UNUSED, - enum fsck_msg_type msg_type UNUSED, - enum fsck_msg_id msg_id UNUSED, - const char *message) + void *fsck_report UNUSED, + enum fsck_msg_type msg_type UNUSED, + enum fsck_msg_id msg_id UNUSED, + const char *message) { error(_("object fails fsck: %s"), message); return 1; From 3473d18fad56b40ac3bce457b3779866461cd921 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Thu, 8 Aug 2024 19:26:57 +0800 Subject: [PATCH 4/9] fsck: add a unified interface for reporting fsck messages The static function "report" provided by "fsck.c" aims at checking error type and calling the callback "error_func" to report the message. Both refs and objects need to check the error type of the current fsck message. In order to extract this common behavior, create a new function "fsck_vreport". Instead of using "...", provide "va_list" to allow more flexibility. Instead of changing "report" prototype to be align with the "fsck_vreport" function, we leave the "report" prototype unchanged due to the reason that there are nearly 62 references about "report" function. Simply change "report" function to use "fsck_vreport" to report objects related messages. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- fsck.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/fsck.c b/fsck.c index cca6ae144f1e23..3614aa56a34f32 100644 --- a/fsck.c +++ b/fsck.c @@ -226,16 +226,15 @@ static int object_on_skiplist(struct fsck_options *opts, return opts && oid && oidset_contains(&opts->skip_oids, oid); } -__attribute__((format (printf, 5, 6))) -static int report(struct fsck_options *options, - const struct object_id *oid, enum object_type object_type, - enum fsck_msg_id msg_id, const char *fmt, ...) +/* + * Provide the common functionality for either fscking refs or objects. + * It will get the current msg error type and call the error_func callback + * which is registered in the "fsck_options" struct. + */ +static int fsck_vreport(struct fsck_options *options, + void *fsck_report, + enum fsck_msg_id msg_id, const char *fmt, va_list ap) { - va_list ap; - struct fsck_object_report report = { - .oid = oid, - .object_type = object_type - }; struct strbuf sb = STRBUF_INIT; enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options); int result; @@ -243,9 +242,6 @@ static int report(struct fsck_options *options, if (msg_type == FSCK_IGNORE) return 0; - if (object_on_skiplist(options, oid)) - return 0; - if (msg_type == FSCK_FATAL) msg_type = FSCK_ERROR; else if (msg_type == FSCK_INFO) @@ -254,11 +250,31 @@ static int report(struct fsck_options *options, prepare_msg_ids(); strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased); - va_start(ap, fmt); strbuf_vaddf(&sb, fmt, ap); - result = options->error_func(options, &report, + result = options->error_func(options, fsck_report, msg_type, msg_id, sb.buf); strbuf_release(&sb); + + return result; +} + +__attribute__((format (printf, 5, 6))) +static int report(struct fsck_options *options, + const struct object_id *oid, enum object_type object_type, + enum fsck_msg_id msg_id, const char *fmt, ...) +{ + va_list ap; + struct fsck_object_report report = { + .oid = oid, + .object_type = object_type + }; + int result; + + if (object_on_skiplist(options, oid)) + return 0; + + va_start(ap, fmt); + result = fsck_vreport(options, &report, msg_id, fmt, ap); va_end(ap); return result; From 2de307cdb2e80840b1fe6741561fed7c99fd8f08 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Thu, 8 Aug 2024 19:27:08 +0800 Subject: [PATCH 5/9] fsck: add refs report function Introduce a new struct "fsck_ref_report" to contain the information we need when reporting refs-related messages. With the new "fsck_vreport" function, add a new function "fsck_report_ref" to report refs-related fsck error message. Unlike "report" function uses the exact parameters, we simply pass "struct fsck_ref_report *report" as the parameter. This is because at current we don't know exactly how many fields we need. By passing this parameter, we don't need to change this function prototype when we want to add more information into "fsck_ref_report". We have introduced "fsck_report_ref" function to report the error message for refs. We still need to add the corresponding callback function. Create refs-specific "error_func" callback "fsck_refs_error_function". Last, add "FSCK_REFS_OPTIONS_DEFAULT" macro to create default options when checking ref consistency. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- fsck.c | 39 +++++++++++++++++++++++++++++++++++++++ fsck.h | 25 +++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/fsck.c b/fsck.c index 3614aa56a34f32..e16c892f6ac1fb 100644 --- a/fsck.c +++ b/fsck.c @@ -280,6 +280,19 @@ static int report(struct fsck_options *options, return result; } +int fsck_report_ref(struct fsck_options *options, + struct fsck_ref_report *report, + enum fsck_msg_id msg_id, + const char *fmt, ...) +{ + va_list ap; + int result; + va_start(ap, fmt); + result = fsck_vreport(options, report, msg_id, fmt, ap); + va_end(ap); + return result; +} + void fsck_enable_object_names(struct fsck_options *options) { if (!options->object_names) @@ -1237,6 +1250,32 @@ int fsck_objects_error_function(struct fsck_options *o, return 1; } +int fsck_refs_error_function(struct fsck_options *options UNUSED, + void *fsck_report, + enum fsck_msg_type msg_type, + enum fsck_msg_id msg_id UNUSED, + const char *message) +{ + struct fsck_ref_report *report = fsck_report; + struct strbuf sb = STRBUF_INIT; + int ret = 0; + + strbuf_addstr(&sb, report->path); + + if (report->oid) + strbuf_addf(&sb, " -> (%s)", oid_to_hex(report->oid)); + else if (report->referent) + strbuf_addf(&sb, " -> (%s)", report->referent); + + if (msg_type == FSCK_WARN) + warning("%s: %s", sb.buf, message); + else + ret = error("%s: %s", sb.buf, message); + + strbuf_release(&sb); + return ret; +} + static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done, enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type, struct fsck_options *options, const char *blob_type) diff --git a/fsck.h b/fsck.h index 3b80d02506f34a..2002590f605796 100644 --- a/fsck.h +++ b/fsck.h @@ -133,11 +133,23 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o, enum fsck_msg_id msg_id, const char *message); +int fsck_refs_error_function(struct fsck_options *options, + void *fsck_report, + enum fsck_msg_type msg_type, + enum fsck_msg_id msg_id, + const char *message); + struct fsck_object_report { const struct object_id *oid; enum object_type object_type; }; +struct fsck_ref_report { + const char *path; + const struct object_id *oid; + const char *referent; +}; + struct fsck_options { fsck_walk_func walk; fsck_error error_func; @@ -175,6 +187,9 @@ struct fsck_options { .gitattributes_done = OIDSET_INIT, \ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \ } +#define FSCK_REFS_OPTIONS_DEFAULT { \ + .error_func = fsck_refs_error_function, \ +} /* descend in all linked child objects * the return value is: @@ -216,6 +231,16 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, */ int fsck_finish(struct fsck_options *options); +/* + * Report an error or warning for refs. + */ +__attribute__((format (printf, 4, 5))) +int fsck_report_ref(struct fsck_options *options, + struct fsck_ref_report *report, + enum fsck_msg_id msg_id, + const char *fmt, ...); + + /* * Subsystem for storing human-readable names for each object. * From ab6f79d8df7c7799ed38229eb56811fda36853ae Mon Sep 17 00:00:00 2001 From: shejialuo Date: Thu, 8 Aug 2024 19:27:17 +0800 Subject: [PATCH 6/9] refs: set up ref consistency check infrastructure The "struct ref_store" is the base class which contains the "be" pointer which provides backend-specific functions whose interfaces are defined in the "ref_storage_be". We could reuse this polymorphism to define only one interface. For every backend, we need to provide its own function pointer. The interfaces defined in the `ref_storage_be` are carefully structured in semantic. It's organized as the five parts: 1. The name and the initialization interfaces. 2. The ref transaction interfaces. 3. The ref internal interfaces (pack, rename and copy). 4. The ref filesystem interfaces. 5. The reflog related interfaces. To keep consistent with the git-fsck(1), add a new interface named "fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be grouped into any above five categories. Explicitly add blank line to make it different from others. Last, implement placeholder functions for each ref backends. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- refs.c | 5 +++++ refs.h | 8 ++++++++ refs/debug.c | 11 +++++++++++ refs/files-backend.c | 13 ++++++++++++- refs/packed-backend.c | 8 ++++++++ refs/refs-internal.h | 6 ++++++ refs/reftable-backend.c | 8 ++++++++ 7 files changed, 58 insertions(+), 1 deletion(-) diff --git a/refs.c b/refs.c index 915aeb4d1dbb62..6f642dc681785b 100644 --- a/refs.c +++ b/refs.c @@ -318,6 +318,11 @@ int check_refname_format(const char *refname, int flags) return check_or_sanitize_refname(refname, flags, NULL); } +int refs_fsck(struct ref_store *refs, struct fsck_options *o) +{ + return refs->be->fsck(refs, o); +} + void sanitize_refname_component(const char *refname, struct strbuf *out) { if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out)) diff --git a/refs.h b/refs.h index b3e39bc257046d..405073621a7179 100644 --- a/refs.h +++ b/refs.h @@ -4,6 +4,7 @@ #include "commit.h" #include "repository.h" +struct fsck_options; struct object_id; struct ref_store; struct strbuf; @@ -541,6 +542,13 @@ int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat */ int check_refname_format(const char *refname, int flags); +/* + * Check the reference database for consistency. Return 0 if refs and + * reflogs are consistent, and non-zero otherwise. The errors will be + * written to stderr. + */ +int refs_fsck(struct ref_store *refs, struct fsck_options *o); + /* * Apply the rules from check_refname_format, but mutate the result until it * is acceptable, and place the result in "out". diff --git a/refs/debug.c b/refs/debug.c index 547d9245b98af9..45e2e784a0f8c4 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -419,6 +419,15 @@ static int debug_reflog_expire(struct ref_store *ref_store, const char *refname, return res; } +static int debug_fsck(struct ref_store *ref_store, + struct fsck_options *o) +{ + struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store; + int res = drefs->refs->be->fsck(drefs->refs, o); + trace_printf_key(&trace_refs, "fsck: %d\n", res); + return res; +} + struct ref_storage_be refs_be_debug = { .name = "debug", .init = NULL, @@ -451,4 +460,6 @@ struct ref_storage_be refs_be_debug = { .create_reflog = debug_create_reflog, .delete_reflog = debug_delete_reflog, .reflog_expire = debug_reflog_expire, + + .fsck = debug_fsck, }; diff --git a/refs/files-backend.c b/refs/files-backend.c index aa52d9be7c77ad..4630eb1f802d50 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3408,6 +3408,15 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store, return ret; } +static int files_fsck(struct ref_store *ref_store, + struct fsck_options *o) +{ + struct files_ref_store *refs = + files_downcast(ref_store, REF_STORE_READ, "fsck"); + + return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o); +} + struct ref_storage_be refs_be_files = { .name = "files", .init = files_ref_store_init, @@ -3434,5 +3443,7 @@ struct ref_storage_be refs_be_files = { .reflog_exists = files_reflog_exists, .create_reflog = files_create_reflog, .delete_reflog = files_delete_reflog, - .reflog_expire = files_reflog_expire + .reflog_expire = files_reflog_expire, + + .fsck = files_fsck, }; diff --git a/refs/packed-backend.c b/refs/packed-backend.c index a0666407cdff3c..5209b0b21226b0 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1735,6 +1735,12 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s return empty_ref_iterator_begin(); } +static int packed_fsck(struct ref_store *ref_store, + struct fsck_options *o) +{ + return 0; +} + struct ref_storage_be refs_be_packed = { .name = "packed", .init = packed_ref_store_init, @@ -1762,4 +1768,6 @@ struct ref_storage_be refs_be_packed = { .create_reflog = NULL, .delete_reflog = NULL, .reflog_expire = NULL, + + .fsck = packed_fsck, }; diff --git a/refs/refs-internal.h b/refs/refs-internal.h index fa975d69aaabad..a905e187cd770a 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -4,6 +4,7 @@ #include "refs.h" #include "iterator.h" +struct fsck_options; struct ref_transaction; /* @@ -650,6 +651,9 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname, typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname, struct strbuf *referent); +typedef int fsck_fn(struct ref_store *ref_store, + struct fsck_options *o); + struct ref_storage_be { const char *name; ref_store_init_fn *init; @@ -677,6 +681,8 @@ struct ref_storage_be { create_reflog_fn *create_reflog; delete_reflog_fn *delete_reflog; reflog_expire_fn *reflog_expire; + + fsck_fn *fsck; }; extern struct ref_storage_be refs_be_files; diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index fbe74c239d336b..b5a1a526dfda80 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -2303,6 +2303,12 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store, return ret; } +static int reftable_be_fsck(struct ref_store *ref_store, + struct fsck_options *o) +{ + return 0; +} + struct ref_storage_be refs_be_reftable = { .name = "reftable", .init = reftable_be_init, @@ -2330,4 +2336,6 @@ struct ref_storage_be refs_be_reftable = { .create_reflog = reftable_be_create_reflog, .delete_reflog = reftable_be_delete_reflog, .reflog_expire = reftable_be_reflog_expire, + + .fsck = reftable_be_fsck, }; From bf061d26c7a595f1f46510aa584d50be1b1edb93 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Thu, 8 Aug 2024 19:27:28 +0800 Subject: [PATCH 7/9] builtin/refs: add verify subcommand Introduce a new subcommand "verify" in git-refs(1) to allow the user to check the reference database consistency and also this subcommand will be used as the entry point of checking refs for "git-fsck(1)". Add "verbose" field into "fsck_options" to indicate whether we should print verbose messages when checking refs and objects consistency. Remove bit-field for "strict" field, this is because we cannot take address of a bit-field which makes it unhandy to set member variables when parsing the command line options. The "git-fsck(1)" declares "fsck_options" variable with "static" identifier which avoids complaint by the leak-checker. However, in "git-refs verify", we need to do memory clean manually. Thus add "fsck_options_clear" function in "fsck.c" to provide memory clean operation. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- Documentation/git-refs.txt | 13 +++++++++++++ builtin/refs.c | 34 ++++++++++++++++++++++++++++++++++ fsck.c | 11 +++++++++++ fsck.h | 8 +++++++- 4 files changed, 65 insertions(+), 1 deletion(-) diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt index 5b99e043859216..ce31f93061db5e 100644 --- a/Documentation/git-refs.txt +++ b/Documentation/git-refs.txt @@ -10,6 +10,7 @@ SYNOPSIS -------- [verse] 'git refs migrate' --ref-format= [--dry-run] +'git refs verify' [--strict] [--verbose] DESCRIPTION ----------- @@ -22,6 +23,9 @@ COMMANDS migrate:: Migrate ref store between different formats. +verify:: + Verify reference database consistency. + OPTIONS ------- @@ -39,6 +43,15 @@ include::ref-storage-format.txt[] can be used to double check that the migration works as expected before performing the actual migration. +The following options are specific to 'git refs verify': + +--strict:: + Enable stricter error checking. This will cause warnings to be + reported as errors. See linkgit:git-fsck[1]. + +--verbose:: + When verifying the reference database consistency, be chatty. + KNOWN LIMITATIONS ----------------- diff --git a/builtin/refs.c b/builtin/refs.c index 46dcd150d4e279..131f98be986d3f 100644 --- a/builtin/refs.c +++ b/builtin/refs.c @@ -1,4 +1,6 @@ #include "builtin.h" +#include "config.h" +#include "fsck.h" #include "parse-options.h" #include "refs.h" #include "repository.h" @@ -7,6 +9,9 @@ #define REFS_MIGRATE_USAGE \ N_("git refs migrate --ref-format= [--dry-run]") +#define REFS_VERIFY_USAGE \ + N_("git refs verify [--strict] [--verbose]") + static int cmd_refs_migrate(int argc, const char **argv, const char *prefix) { const char * const migrate_usage[] = { @@ -58,15 +63,44 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix) return err; } +static int cmd_refs_verify(int argc, const char **argv, const char *prefix) +{ + struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT; + const char * const verify_usage[] = { + REFS_VERIFY_USAGE, + NULL, + }; + struct option options[] = { + OPT_BOOL(0, "verbose", &fsck_refs_options.verbose, N_("be verbose")), + OPT_BOOL(0, "strict", &fsck_refs_options.strict, N_("enable strict checking")), + OPT_END(), + }; + int ret; + + argc = parse_options(argc, argv, prefix, options, verify_usage, 0); + if (argc) + usage(_("'git refs verify' takes no arguments")); + + git_config(git_fsck_config, &fsck_refs_options); + prepare_repo_settings(the_repository); + + ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options); + + fsck_options_clear(&fsck_refs_options); + return ret; +} + int cmd_refs(int argc, const char **argv, const char *prefix) { const char * const refs_usage[] = { REFS_MIGRATE_USAGE, + REFS_VERIFY_USAGE, NULL, }; parse_opt_subcommand_fn *fn = NULL; struct option opts[] = { OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate), + OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify), OPT_END(), }; diff --git a/fsck.c b/fsck.c index e16c892f6ac1fb..3756f52459e771 100644 --- a/fsck.c +++ b/fsck.c @@ -1331,6 +1331,17 @@ int fsck_finish(struct fsck_options *options) return ret; } +void fsck_options_clear(struct fsck_options *options) +{ + free(options->msg_type); + oidset_clear(&options->skip_oids); + oidset_clear(&options->gitmodules_found); + oidset_clear(&options->gitmodules_done); + oidset_clear(&options->gitattributes_found); + oidset_clear(&options->gitattributes_done); + kh_clear_oid_map(options->object_names); +} + int git_fsck_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { diff --git a/fsck.h b/fsck.h index 2002590f605796..d551a9fe86efe2 100644 --- a/fsck.h +++ b/fsck.h @@ -153,7 +153,8 @@ struct fsck_ref_report { struct fsck_options { fsck_walk_func walk; fsck_error error_func; - unsigned strict:1; + unsigned strict; + unsigned verbose; enum fsck_msg_type *msg_type; struct oidset skip_oids; struct oidset gitmodules_found; @@ -231,6 +232,11 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, */ int fsck_finish(struct fsck_options *options); +/* + * Clear the fsck_options struct, freeing any allocated memory. + */ +void fsck_options_clear(struct fsck_options *options); + /* * Report an error or warning for refs. */ From a7600b8481b533d77a2cc5f03acdbed12b996fcd Mon Sep 17 00:00:00 2001 From: shejialuo Date: Thu, 8 Aug 2024 19:31:31 +0800 Subject: [PATCH 8/9] files-backend: add unified interface for refs scanning For refs and reflogs, we need to scan its corresponding directories to check every regular file or symbolic link which shares the same pattern. Introduce a unified interface for scanning directories for files-backend. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- Documentation/fsck-msgids.txt | 3 ++ fsck.h | 1 + refs/files-backend.c | 73 ++++++++++++++++++++++++++++++++++- 3 files changed, 76 insertions(+), 1 deletion(-) diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt index f643585a34e761..7c809fddf1aa0c 100644 --- a/Documentation/fsck-msgids.txt +++ b/Documentation/fsck-msgids.txt @@ -19,6 +19,9 @@ `badParentSha1`:: (ERROR) A commit object has a bad parent sha1. +`badRefFiletype`:: + (ERROR) A ref has a bad file type. + `badTagName`:: (INFO) A tag has an invalid format. diff --git a/fsck.h b/fsck.h index d551a9fe86efe2..af02174973b054 100644 --- a/fsck.h +++ b/fsck.h @@ -31,6 +31,7 @@ enum fsck_msg_type { FUNC(BAD_NAME, ERROR) \ FUNC(BAD_OBJECT_SHA1, ERROR) \ FUNC(BAD_PARENT_SHA1, ERROR) \ + FUNC(BAD_REF_FILETYPE, ERROR) \ FUNC(BAD_TIMEZONE, ERROR) \ FUNC(BAD_TREE, ERROR) \ FUNC(BAD_TREE_SHA1, ERROR) \ diff --git a/refs/files-backend.c b/refs/files-backend.c index 4630eb1f802d50..e511e1dcce3ef5 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -6,6 +6,7 @@ #include "../gettext.h" #include "../hash.h" #include "../hex.h" +#include "../fsck.h" #include "../refs.h" #include "refs-internal.h" #include "ref-cache.h" @@ -3408,13 +3409,83 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store, return ret; } +/* + * For refs and reflogs, they share a unified interface when scanning + * the whole directory. This function is used as the callback for each + * regular file or symlink in the directory. + */ +typedef int (*files_fsck_refs_fn)(struct ref_store *ref_store, + struct fsck_options *o, + const char *refs_check_dir, + struct dir_iterator *iter); + +static int files_fsck_refs_dir(struct ref_store *ref_store, + struct fsck_options *o, + const char *refs_check_dir, + files_fsck_refs_fn *fsck_refs_fn) +{ + struct strbuf sb = STRBUF_INIT; + struct dir_iterator *iter; + int iter_status; + int ret = 0; + + strbuf_addf(&sb, "%s/%s", ref_store->gitdir, refs_check_dir); + + iter = dir_iterator_begin(sb.buf, 0); + if (!iter) { + ret = error_errno(_("cannot open directory %s"), sb.buf); + goto out; + } + + while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) { + if (S_ISDIR(iter->st.st_mode)) { + continue; + } else if (S_ISREG(iter->st.st_mode) || + S_ISLNK(iter->st.st_mode)) { + if (o->verbose) + fprintf_ln(stderr, "Checking %s/%s", + refs_check_dir, iter->relative_path); + for (size_t i = 0; fsck_refs_fn[i]; i++) { + if (fsck_refs_fn[i](ref_store, o, refs_check_dir, iter)) + ret = -1; + } + } else { + struct fsck_ref_report report = { .path = iter->basename }; + if (fsck_report_ref(o, &report, + FSCK_MSG_BAD_REF_FILETYPE, + "unexpected file type")) + ret = -1; + } + } + + if (iter_status != ITER_DONE) + ret = error(_("failed to iterate over '%s'"), sb.buf); + +out: + strbuf_release(&sb); + return ret; +} + +static int files_fsck_refs(struct ref_store *ref_store, + struct fsck_options *o) +{ + files_fsck_refs_fn fsck_refs_fn[]= { + NULL, + }; + + if (o->verbose) + fprintf_ln(stderr, _("Checking references consistency")); + return files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fn); +} + static int files_fsck(struct ref_store *ref_store, struct fsck_options *o) { struct files_ref_store *refs = files_downcast(ref_store, REF_STORE_READ, "fsck"); - return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o); + return files_fsck_refs(ref_store, o) | + refs->packed_ref_store->be->fsck(refs->packed_ref_store, o); } struct ref_storage_be refs_be_files = { From 1c31be45b3b263670c7d2a91c27cc119b77dd2e2 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Thu, 8 Aug 2024 19:31:42 +0800 Subject: [PATCH 9/9] fsck: add ref name check for files backend The git-fsck(1) only implicitly checks the reference, it does not fully check refs with bad format name such as standalone "@". However, a file ending with ".lock" should not be marked as having a bad ref name. It is expected that concurrent writers may have such lock files. We currently ignore this situation. But for bare ".lock" file, we will report it as error. In order to provide such checks, add a new fsck message id "badRefName" with default ERROR type. Use existing "check_refname_format" to explicit check the ref name. And add a new unit test to verify the functionality. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- Documentation/fsck-msgids.txt | 3 ++ fsck.h | 1 + refs/files-backend.c | 31 ++++++++++++ t/t0602-reffiles-fsck.sh | 92 +++++++++++++++++++++++++++++++++++ 4 files changed, 127 insertions(+) create mode 100755 t/t0602-reffiles-fsck.sh diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt index 7c809fddf1aa0c..68a2801f1529df 100644 --- a/Documentation/fsck-msgids.txt +++ b/Documentation/fsck-msgids.txt @@ -22,6 +22,9 @@ `badRefFiletype`:: (ERROR) A ref has a bad file type. +`badRefName`:: + (ERROR) A ref has an invalid format. + `badTagName`:: (INFO) A tag has an invalid format. diff --git a/fsck.h b/fsck.h index af02174973b054..500b4c04d2cbf2 100644 --- a/fsck.h +++ b/fsck.h @@ -32,6 +32,7 @@ enum fsck_msg_type { FUNC(BAD_OBJECT_SHA1, ERROR) \ FUNC(BAD_PARENT_SHA1, ERROR) \ FUNC(BAD_REF_FILETYPE, ERROR) \ + FUNC(BAD_REF_NAME, ERROR) \ FUNC(BAD_TIMEZONE, ERROR) \ FUNC(BAD_TREE, ERROR) \ FUNC(BAD_TREE_SHA1, ERROR) \ diff --git a/refs/files-backend.c b/refs/files-backend.c index e511e1dcce3ef5..7f6eefa960b147 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3419,6 +3419,36 @@ typedef int (*files_fsck_refs_fn)(struct ref_store *ref_store, const char *refs_check_dir, struct dir_iterator *iter); +static int files_fsck_refs_name(struct ref_store *ref_store UNUSED, + struct fsck_options *o, + const char *refs_check_dir, + struct dir_iterator *iter) +{ + struct strbuf sb = STRBUF_INIT; + int ret = 0; + + /* + * Ignore the files ending with ".lock" as they may be lock files + * However, do not allow bare ".lock" files. + */ + if (iter->basename[0] != '.' && ends_with(iter->basename, ".lock")) + goto cleanup; + + if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) { + struct fsck_ref_report report = { .path = NULL }; + + strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path); + report.path = sb.buf; + ret = fsck_report_ref(o, &report, + FSCK_MSG_BAD_REF_NAME, + "invalid refname format"); + } + +cleanup: + strbuf_release(&sb); + return ret; +} + static int files_fsck_refs_dir(struct ref_store *ref_store, struct fsck_options *o, const char *refs_check_dir, @@ -3470,6 +3500,7 @@ static int files_fsck_refs(struct ref_store *ref_store, struct fsck_options *o) { files_fsck_refs_fn fsck_refs_fn[]= { + files_fsck_refs_name, NULL, }; diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh new file mode 100755 index 00000000000000..71a4d1a5ae4bab --- /dev/null +++ b/t/t0602-reffiles-fsck.sh @@ -0,0 +1,92 @@ +#!/bin/sh + +test_description='Test reffiles backend consistency check' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME +GIT_TEST_DEFAULT_REF_FORMAT=files +export GIT_TEST_DEFAULT_REF_FORMAT +TEST_PASSES_SANITIZE_LEAK=true + +. ./test-lib.sh + +test_expect_success 'ref name should be checked' ' + test_when_finished "rm -rf repo" && + git init repo && + branch_dir_prefix=.git/refs/heads && + tag_dir_prefix=.git/refs/tags && + cd repo && + + git commit --allow-empty -m initial && + git checkout -b branch-1 && + git tag tag-1 && + git commit --allow-empty -m second && + git checkout -b branch-2 && + git tag tag-2 && + git tag multi_hierarchy/tag-2 && + + cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/heads/.branch-1: badRefName: invalid refname format + EOF + rm $branch_dir_prefix/.branch-1 && + test_cmp expect err && + + cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/heads/@: badRefName: invalid refname format + EOF + rm $branch_dir_prefix/@ && + test_cmp expect err && + + cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format + EOF + rm $tag_dir_prefix/multi_hierarchy/@ && + test_cmp expect err && + + cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock && + git refs verify 2>err && + rm $tag_dir_prefix/tag-1.lock && + test_must_be_empty err && + + cp $tag_dir_prefix/tag-1 $tag_dir_prefix/.lock && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/tags/.lock: badRefName: invalid refname format + EOF + rm $tag_dir_prefix/.lock && + test_cmp expect err +' + +test_expect_success 'ref name check should be adapted into fsck messages' ' + test_when_finished "rm -rf repo" && + git init repo && + branch_dir_prefix=.git/refs/heads && + tag_dir_prefix=.git/refs/tags && + cd repo && + git commit --allow-empty -m initial && + git checkout -b branch-1 && + git tag tag-1 && + git commit --allow-empty -m second && + git checkout -b branch-2 && + git tag tag-2 && + + cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 && + git -c fsck.badRefName=warn refs verify 2>err && + cat >expect <<-EOF && + warning: refs/heads/.branch-1: badRefName: invalid refname format + EOF + rm $branch_dir_prefix/.branch-1 && + test_cmp expect err && + + cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ && + git -c fsck.badRefName=ignore refs verify 2>err && + test_must_be_empty err +' + +test_done