Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

maintenance: prefetch config for remotes/refs #1778

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions Documentation/config/maintenance.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,21 @@ maintenance.incremental-repack.auto::
Otherwise, a positive value implies the command should run when the
number of pack-files not in the multi-pack-index is at least the value
of `maintenance.incremental-repack.auto`. The default value is 10.

maintenance.prefetch.<remote>.refs::
This multi-valued config option specifies which refs to prefetch
for each remote during the prefetch maintenance task. Each value
of this option is a refspec source that will be used when fetching from
the specified remote. This is useful for large active repositories where
fetching all refs and remotes might not be very efficient.
+
For example, to prefetch only the master branch from the origin remote,
and all branches from the upstream remote, you would use:
+
----
$ git config maintenance.prefetch.origin.refs refs/heads/master
$ git config maintenance.prefetch.upstream.refs refs/heads/*
----
+
If this option is not set for a remote, the prefetch task will use
the default behavior of fetching all refs from all remotes.
112 changes: 104 additions & 8 deletions builtin/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "hex.h"
#include "repository.h"
#include "config.h"
#include "string-list.h"
#include "tempfile.h"
#include "lockfile.h"
#include "parse-options.h"
Expand Down Expand Up @@ -246,6 +247,7 @@ struct maintenance_run_opts {
int quiet;
enum schedule_priority schedule;
};

#define MAINTENANCE_RUN_OPTS_INIT { \
.detach = -1, \
}
Expand Down Expand Up @@ -880,6 +882,22 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
return 0;
}

struct maintenance_config {
struct prefetch_config_list {
struct prefetch_config {
char *remote;
struct string_list refs;
} *items;
int nr, alloc;
} prefetch;
};

#define MAINTENANCE_CONFIG_INIT { \
.prefetch = { NULL, 0, 0 }, \
}

static struct maintenance_config maintenance_cfg = MAINTENANCE_CONFIG_INIT;

static const char *const builtin_maintenance_run_usage[] = {
N_("git maintenance run [--auto] [--[no-]quiet] [--task=<task>] [--schedule]"),
NULL
Expand Down Expand Up @@ -1023,22 +1041,93 @@ static int fetch_remote(struct remote *remote, void *cbdata)
{
struct maintenance_run_opts *opts = cbdata;
struct child_process child = CHILD_PROCESS_INIT;
struct prefetch_config *prefetch_cfg = NULL;
static int has_prefetch_cfg = -1; // -1: unknown, 0: no config, 1: config exists

if (remote->skip_default_update)
return 0;

if (has_prefetch_cfg == -1)
has_prefetch_cfg = (maintenance_cfg.prefetch.nr > 0);

if (has_prefetch_cfg) {
for (int i = 0; i < maintenance_cfg.prefetch.nr; i++) {
if (!strcmp(remote->name, maintenance_cfg.prefetch.items[i].remote)) {
prefetch_cfg = &maintenance_cfg.prefetch.items[i];
break;
}
}

if (!prefetch_cfg)
return 0;
}

child.git_cmd = 1;
strvec_pushl(&child.args, "fetch", remote->name,
"--prefetch", "--prune", "--no-tags",
"--no-write-fetch-head", "--recurse-submodules=no",
NULL);
strvec_pushl(&child.args, "fetch", remote->name, "--prefetch", "--prune", "--no-tags",
"--no-write-fetch-head", "--recurse-submodules=no", NULL);

if (opts->quiet)
strvec_push(&child.args, "--quiet");

if (prefetch_cfg && prefetch_cfg->refs.nr > 0) {
struct string_list_item *item;
for_each_string_list_item(item, &prefetch_cfg->refs)
strvec_pushf(&child.args, "%s:%s", item->string, item->string);
}

return !!run_command(&child);
}

static int maintenance_config_callback(const char *key, const char *value,
const struct config_context *ctx,
void *data)
{
struct maintenance_config *config = data;
const char *remote_name;
const char *refs_key;
struct prefetch_config *pc;
struct strbuf name = STRBUF_INIT;

if (!skip_prefix(key, "maintenance.prefetch.", &remote_name))
return 0;

refs_key = strrchr(remote_name, '.');
if (!refs_key || strcmp(refs_key + 1, "refs"))
return 0;

strbuf_add(&name, remote_name, refs_key - remote_name);

REALLOC_ARRAY(config->prefetch.items, config->prefetch.nr + 1);
pc = &config->prefetch.items[config->prefetch.nr++];
pc->remote = strbuf_detach(&name, NULL);
string_list_init_dup(&pc->refs);
pc->refs.strdup_strings = 1;
string_list_split(&pc->refs, value, ' ', -1);

return 0;
}

static void maintenance_config_read(struct maintenance_config *config)
{
git_config(maintenance_config_callback, config);
}

static void maintenance_config_release(struct maintenance_config *config)
{
int i;

if (!config->prefetch.items)
return;

for (i = 0; i < config->prefetch.nr; i++) {
free(config->prefetch.items[i].remote);
string_list_clear(&config->prefetch.items[i].refs, 1);
}

free(config->prefetch.items);
memset(config, 0, sizeof(*config));
}

static int maintenance_task_prefetch(struct maintenance_run_opts *opts,
struct gc_config *cfg)
{
Expand Down Expand Up @@ -1563,7 +1652,7 @@ static int maintenance_run(int argc, const char **argv, const char *prefix)
{
int i;
struct maintenance_run_opts opts = MAINTENANCE_RUN_OPTS_INIT;
struct gc_config cfg = GC_CONFIG_INIT;
struct gc_config gc_cfg = GC_CONFIG_INIT;
struct option builtin_maintenance_run_options[] = {
OPT_BOOL(0, "auto", &opts.auto_flag,
N_("run tasks based on the state of the repository")),
Expand All @@ -1579,8 +1668,11 @@ static int maintenance_run(int argc, const char **argv, const char *prefix)
PARSE_OPT_NONEG, task_option_parse),
OPT_END()
};

int ret;

maintenance_config_read(&maintenance_cfg);

opts.quiet = !isatty(2);

for (i = 0; i < TASK__COUNT; i++)
Expand All @@ -1591,18 +1683,22 @@ static int maintenance_run(int argc, const char **argv, const char *prefix)
builtin_maintenance_run_usage,
PARSE_OPT_STOP_AT_NON_OPTION);


maintenance_config_read(&maintenance_cfg);

if (opts.auto_flag && opts.schedule)
die(_("use at most one of --auto and --schedule=<frequency>"));

gc_config(&cfg);
gc_config(&gc_cfg);
initialize_task_config(opts.schedule);

if (argc != 0)
usage_with_options(builtin_maintenance_run_usage,
builtin_maintenance_run_options);

ret = maintenance_run_tasks(&opts, &cfg);
gc_config_release(&cfg);
ret = maintenance_run_tasks(&opts, &gc_cfg);
gc_config_release(&gc_cfg);
maintenance_config_release(&maintenance_cfg);
return ret;
}

Expand Down
102 changes: 67 additions & 35 deletions t/t7900-maintenance.sh
Original file line number Diff line number Diff line change
Expand Up @@ -215,22 +215,31 @@ test_expect_success 'run --task=prefetch with no remotes' '
'

test_expect_success 'prefetch multiple remotes' '
git clone . clone1 &&
git clone . clone2 &&
rm -fr maintenance_repo &&
test_create_repo maintenance_repo &&
cd maintenance_repo &&

git init clone1 &&
git init clone2 &&

git remote add remote1 "file://$(pwd)/clone1" &&
git remote add remote2 "file://$(pwd)/clone2" &&

git -C clone1 switch -c one &&
git -C clone2 switch -c two &&
test_commit -C clone1 one &&
test_commit -C clone2 two &&

GIT_TRACE2_EVENT="$(pwd)/run-prefetch.txt" git maintenance run --task=prefetch 2>/dev/null &&
fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
test_subcommand git fetch remote1 $fetchargs <run-prefetch.txt &&
test_subcommand git fetch remote2 $fetchargs <run-prefetch.txt &&

git for-each-ref refs/remotes >actual &&
test_must_be_empty actual &&
git log prefetch/remotes/remote1/one &&
git log prefetch/remotes/remote2/two &&

git fetch --all &&
test_cmp_rev refs/remotes/remote1/one refs/prefetch/remotes/remote1/one &&
test_cmp_rev refs/remotes/remote2/two refs/prefetch/remotes/remote2/two &&
Expand All @@ -245,43 +254,66 @@ test_expect_success 'prefetch multiple remotes' '
test_subcommand git fetch remote2 $fetchargs <skip-remote1.txt
'

test_expect_success 'loose-objects task' '
# Repack everything so we know the state of the object dir
git repack -adk &&
test_expect_success 'prefetch with default behavior (all remotes)' '
rm -fr maintenance_repo &&
test_create_repo maintenance_repo &&
cd maintenance_repo &&

# Hack to stop maintenance from running during "git commit"
echo in use >.git/objects/maintenance.lock &&
git init clone1 &&
git init clone2 &&
git remote add remote1 "file://$(pwd)/clone1" &&
git remote add remote2 "file://$(pwd)/clone2" &&

# Assuming that "git commit" creates at least one loose object
test_commit create-loose-object &&
rm .git/objects/maintenance.lock &&
git -C clone1 switch -c one &&
git -C clone2 switch -c two &&
test_commit -C clone1 one &&
test_commit -C clone2 two &&

ls .git/objects >obj-dir-before &&
test_file_not_empty obj-dir-before &&
ls .git/objects/pack/*.pack >packs-before &&
test_line_count = 1 packs-before &&
GIT_TRACE2_EVENT="$(pwd)/run-prefetch-default.txt" \
git maintenance run --task=prefetch 2>/dev/null &&

# The first run creates a pack-file
# but does not delete loose objects.
git maintenance run --task=loose-objects &&
ls .git/objects >obj-dir-between &&
test_cmp obj-dir-before obj-dir-between &&
ls .git/objects/pack/*.pack >packs-between &&
test_line_count = 2 packs-between &&
ls .git/objects/pack/loose-*.pack >loose-packs &&
test_line_count = 1 loose-packs &&

# The second run deletes loose objects
# but does not create a pack-file.
git maintenance run --task=loose-objects &&
ls .git/objects >obj-dir-after &&
cat >expect <<-\EOF &&
info
pack
EOF
test_cmp expect obj-dir-after &&
ls .git/objects/pack/*.pack >packs-after &&
test_cmp packs-between packs-after
fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
test_subcommand git fetch remote1 $fetchargs <run-prefetch-default.txt &&
test_subcommand git fetch remote2 $fetchargs <run-prefetch-default.txt &&

git for-each-ref refs/remotes >actual &&
test_must_be_empty actual &&
git log prefetch/remotes/remote1/one &&
git log prefetch/remotes/remote2/two &&

git fetch --all &&
test_cmp_rev refs/remotes/remote1/one refs/prefetch/remotes/remote1/one &&
test_cmp_rev refs/remotes/remote2/two refs/prefetch/remotes/remote2/two
'

test_expect_success 'prefetch with configurable remotes' '
rm -fr maintenance_repo &&
test_create_repo maintenance_repo &&
cd maintenance_repo &&

git init clone1 &&
git init clone2 &&

git remote add remote1 "file://$(pwd)/clone1" &&
git remote add remote2 "file://$(pwd)/clone2" &&
git -C clone1 switch -c one &&
git -C clone2 switch -c two &&
test_commit -C clone1 one &&
test_commit -C clone2 two &&

git config maintenance.prefetch.remote1.refs "refs/heads/one" &&

GIT_TRACE2_EVENT="$(pwd)/run-prefetch-config.txt" \
git maintenance run --task=prefetch 2>/dev/null &&

fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
test_subcommand git fetch remote1 $fetchargs refs/heads/one:refs/heads/one <run-prefetch-config.txt &&
test_subcommand ! git fetch remote2 $fetchargs <run-prefetch-config.txt &&

git for-each-ref refs/remotes >actual &&
test_must_be_empty actual &&
git log prefetch/remotes/remote1/one &&
test_must_fail git log prefetch/remotes/remote2/two
'

test_expect_success 'maintenance.loose-objects.auto' '
Expand Down
Loading