Skip to content

Commit

Permalink
Merge pull request #1 from AdamQuadmon/adam
Browse files Browse the repository at this point in the history
fix: improve size formatting and analysis output
  • Loading branch information
NielsBongers authored Dec 23, 2024
2 parents 10ea676 + 116190e commit ed676a0
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 20 deletions.
36 changes: 16 additions & 20 deletions src/analysis/analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,24 @@ use polars::prelude::*;
#[allow(unused)]
use log::{error, info, warn};

use crate::utils::{file_operations::print_and_save, hashing::hash_iterable};
use crate::utils::{file_operations::print_and_save, hashing::hash_iterable, formatting::format_size};

const BYTES_TO_MB: u64 = 1024 * 1024;
const BYTES_TO_GB: u64 = 1024 * 1024 * 1024;

fn total_folder_size(df: &DataFrame) -> u64 {
df.column("size")
.expect("Failed to get size column")
.u64()
.expect("Failed to convert to u64")
.sum()
.expect("Failed to sum")
/ BYTES_TO_GB
.sum::<u64>()
.unwrap_or(0)
}

fn top_n_file_sizes(df: &DataFrame, top_n: u32) -> DataFrame {
df.clone()
.lazy()
.with_columns([(col("size") / lit(BYTES_TO_MB)).alias("size (MB)")])
.select([col("name"), col("size (MB)"), col("extension"), col("path")])
.filter(col("size").is_not_null())
.with_columns([(col("size"))])
.select([col("name"), col("size"), col("extension"), col("path")])
.sort(
["size (MB)"],
["size"],
SortMultipleOptions::new().with_order_descending(true),
)
.limit(top_n)
Expand All @@ -37,12 +33,12 @@ fn top_n_file_sizes(df: &DataFrame, top_n: u32) -> DataFrame {
fn file_size_per_extension(df: &DataFrame) -> DataFrame {
df.clone()
.lazy()
.filter(col("size").is_not_null())
.group_by([col("extension")])
.agg([col("size").sum().alias("total_size")])
.with_column((col("total_size") / lit(BYTES_TO_MB)).alias("size (MB)"))
.select([col("extension"), col("size (MB)")])
.agg([col("size").sum().alias("total_size")]) // Keeping total_size as name
.select([col("extension"), col("total_size")]) // Select total_size not size
.sort(
["size (MB)"],
["total_size"], // Sort by total_size not size
SortMultipleOptions::new().with_order_descending(true),
)
.collect()
Expand All @@ -65,12 +61,12 @@ fn extension_counts(df: &DataFrame) -> DataFrame {
fn largest_folders(df: &DataFrame) -> DataFrame {
df.clone()
.lazy()
.filter(col("size").is_not_null())
.group_by([col("parents")])
.agg([col("size").sum().alias("total_size")])
.with_column((col("total_size") / lit(BYTES_TO_MB)).alias("size (MB)"))
.select([col("parents"), col("size (MB)")])
.select([col("parents"), col("total_size")]) // Select total_size not size
.sort(
["size (MB)"],
["total_size"], // Sort by total_size not size
SortMultipleOptions::new().with_order_descending(true),
)
.collect()
Expand All @@ -94,11 +90,11 @@ fn overall_hash(df: &DataFrame) -> String {

/// Some simple analysis options. Fun way to explore Polars.
pub fn run_analysis(df: DataFrame, analysis_folder_path: &Path, get_hash: bool) {
let total_folder_size: u64 = total_folder_size(&df);
let total_folder_size = total_folder_size(&df);

let top_n = 100;

info!("Total folder size: {} GB", total_folder_size);
info!("Total folder size: {}", format_size(total_folder_size));

print_and_save(
&mut top_n_file_sizes(&df, top_n),
Expand Down
1 change: 1 addition & 0 deletions src/utils.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod file_operations;
pub mod hashing;
pub mod formatting;
16 changes: 16 additions & 0 deletions src/utils/formatting.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/// Convert bytes to human readable string with appropriate unit
pub fn format_size(bytes: u64) -> String {
const KB: u64 = 1024;
const MB: u64 = KB * 1024;
const GB: u64 = MB * 1024;

if bytes >= GB {
format!("{:.2} GB", bytes as f64 / GB as f64)
} else if bytes >= MB {
format!("{:.2} MB", bytes as f64 / MB as f64)
} else if bytes >= KB {
format!("{:.2} KB", bytes as f64 / KB as f64)
} else {
format!("{} B", bytes)
}
}

0 comments on commit ed676a0

Please sign in to comment.