Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Actually dedupe content payload #365

Merged
merged 1 commit into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions boulder/src/package/collect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,14 @@ impl PathInfo {
matches!(self.layout.entry, layout::Entry::Regular(_, _))
}

pub fn file_hash(&self) -> Option<u128> {
if let layout::Entry::Regular(hash, _) = &self.layout.entry {
Some(*hash)
} else {
None
}
}

pub fn file_name(&self) -> &str {
self.target_path
.file_name()
Expand Down
22 changes: 14 additions & 8 deletions boulder/src/package/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,15 +139,21 @@ pub fn emit(paths: &Paths, recipe: &Recipe, packages: &[Package]) -> Result<(),
fn emit_package(paths: &Paths, package: &Package) -> Result<(), Error> {
let filename = package.filename();

// Sort all files by size, largest to smallest
let sorted_files = package
// Filter for all files -> dedupe by hash -> sort largest to smallest
let files = package
.analysis
.paths
.iter()
.filter(|p| p.is_file())
.sorted_by(|a, b| a.size.cmp(&b.size).reverse())
// Filter by file
.filter_map(|info| info.file_hash().map(|hash| (hash, info)))
// Dedupe by hash
.unique_by(|(hash, _)| *hash)
// Sort largest to smallest
.sorted_by(|(_, a), (_, b)| a.size.cmp(&b.size).reverse())
.map(|(_, info)| info)
.collect::<Vec<_>>();
let total_file_size = sorted_files.iter().map(|p| p.size).sum();

let total_file_size = files.iter().map(|info| info.size).sum();

let pb = ProgressBar::new(total_file_size)
.with_message(format!("Generating {filename}"))
Expand Down Expand Up @@ -188,7 +194,7 @@ fn emit_package(paths: &Paths, package: &Package) -> Result<(), Error> {
}

// Only add content payload if we have some files
if !sorted_files.is_empty() {
if !files.is_empty() {
// Temp file for building content payload
let temp_content_path = format!("/tmp/{}.tmp", &filename);
let mut temp_content = fs::OpenOptions::new()
Expand All @@ -201,8 +207,8 @@ fn emit_package(paths: &Paths, package: &Package) -> Result<(), Error> {
let mut writer =
writer.with_content(&mut temp_content, Some(total_file_size), util::num_cpus().get() as u32)?;

for file in sorted_files {
let file = File::open(&file.path)?;
for info in files {
let file = File::open(&info.path)?;
writer.add_content(&mut pb.wrap_read(&file))?;
}

Expand Down
Loading