Skip to content

Commit

Permalink
pack-objects: refactor path-walk delta phase
Browse files Browse the repository at this point in the history
Previously, the --path-walk option to 'git pack-objects' would compute
deltas inline with the path-walk logic. This would make the progress
indicator look like it is taking a long time to enumerate objects, and
then very quickly computed deltas.

Instead of computing deltas on each region of objects organized by tree,
store a list of regions corresponding to these groups. These can later
be pulled from the list for delta compression before doing the "global"
delta search.

The current implementation is not integrated with threads, but could be
done in a future update.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
  • Loading branch information
derrickstolee committed Sep 18, 2024
1 parent cb91d67 commit 5dd47c4
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 25 deletions.
60 changes: 35 additions & 25 deletions builtin/pack-objects.c
Original file line number Diff line number Diff line change
Expand Up @@ -3232,6 +3232,30 @@ static int should_attempt_deltas(struct object_entry *entry)
return 1;
}

static void find_deltas_by_region(struct object_entry *list,
struct packing_region *regions,
uint32_t start, uint32_t nr)
{
unsigned int processed = 0;
while (nr--) {
struct packing_region *r = &regions[start++];
struct object_entry **delta_list;
uint32_t delta_list_nr = 0;

ALLOC_ARRAY(delta_list, r->nr);
for (uint32_t i = 0; i < r->nr; i++) {
struct object_entry *entry = to_pack.objects + r->start + i;
if (!should_attempt_deltas(entry))
continue;

delta_list[delta_list_nr++] = entry;
}

QSORT(delta_list, delta_list_nr, type_size_sort);
find_deltas(delta_list, &delta_list_nr, window, depth, &processed);
}
}

static void prepare_pack(int window, int depth)
{
struct object_entry **delta_list;
Expand All @@ -3256,6 +3280,10 @@ static void prepare_pack(int window, int depth)
if (!to_pack.nr_objects || !window || !depth)
return;

if (path_walk)
find_deltas_by_region(to_pack.objects, to_pack.regions,
0, to_pack.nr_regions);

ALLOC_ARRAY(delta_list, to_pack.nr_objects);
nr_deltas = n = 0;

Expand Down Expand Up @@ -4192,10 +4220,8 @@ static int add_objects_by_path(const char *path,
enum object_type type,
void *data)
{
struct object_entry **delta_list;
size_t oe_start = to_pack.nr_objects;
size_t oe_end;
unsigned int sub_list_size;
unsigned int *processed = data;

/*
Expand Down Expand Up @@ -4234,32 +4260,16 @@ static int add_objects_by_path(const char *path,
if (oe_end == oe_start || !window)
return 0;

sub_list_size = 0;
ALLOC_ARRAY(delta_list, oe_end - oe_start);

for (size_t i = 0; i < oe_end - oe_start; i++) {
struct object_entry *entry = to_pack.objects + oe_start + i;

if (!should_attempt_deltas(entry))
continue;
ALLOC_GROW(to_pack.regions,
to_pack.nr_regions + 1,
to_pack.nr_regions_alloc);

delta_list[sub_list_size++] = entry;
}
to_pack.regions[to_pack.nr_regions].start = oe_start;
to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start;

/*
* Find delta bases among this list of objects that all match the same
* path. This causes the delta compression to be interleaved in the
* object walk, which can lead to confusing progress indicators. This is
* also incompatible with threaded delta calculations. In the future,
* consider creating a list of regions in the full to_pack.objects array
* that could be picked up by the threaded delta computation.
*/
if (sub_list_size && window) {
QSORT(delta_list, sub_list_size, type_size_sort);
find_deltas(delta_list, &sub_list_size, window, depth, processed);
}
*processed += oids->nr;
display_progress(progress_state, *processed);

free(delta_list);
return 0;
}

Expand Down
12 changes: 12 additions & 0 deletions pack-objects.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,23 @@ struct object_entry {
unsigned ext_base:1; /* delta_idx points outside packlist */
};

/**
* A packing region is a section of the packing_data.objects array
* as given by a starting index and a number of elements.
*/
struct packing_region {
uint32_t start;
uint32_t nr;
};

struct packing_data {
struct repository *repo;
struct object_entry *objects;
uint32_t nr_objects, nr_alloc;

struct packing_region *regions;
uint32_t nr_regions, nr_regions_alloc;

int32_t *index;
uint32_t index_size;

Expand Down

0 comments on commit 5dd47c4

Please sign in to comment.