Skip to content

Commit

Permalink
整理内容
Browse files Browse the repository at this point in the history
  • Loading branch information
eric committed Mar 1, 2024
1 parent 28f1ebd commit 6516b0a
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 149 deletions.
8 changes: 0 additions & 8 deletions kr2r/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,6 @@ path = "src/bin/build_db.rs"
name = "classify"
path = "src/bin/classify.rs"

[[bin]]
name = "cht"
path = "src/bin/cht.rs"

[[bin]]
name = "taxo"
path = "src/bin/taxo.rs"


[features]
default = ["dna"]
Expand Down
81 changes: 0 additions & 81 deletions kr2r/src/bin/cht.rs

This file was deleted.

55 changes: 24 additions & 31 deletions kr2r/src/bin/classify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,9 @@ struct Args {
)]
confidence_threshold: f64,

/// Enable quick mode for faster processing.
#[clap(short = 'q', long = "quick-mode", action)]
quick_mode: bool,

// /// Enable quick mode for faster processing.
// #[clap(short = 'q', long = "quick-mode", action)]
// quick_mode: bool,
/// The number of threads to use, default is 1.
#[clap(short = 'p', long = "num-threads", value_parser, default_value_t = 1)]
num_threads: i32,
Expand All @@ -71,38 +70,36 @@ struct Args {
#[clap(short = 'S', long = "single-file-pairs", action)]
single_file_pairs: bool,

/// Use mpa-style report format.
#[clap(short = 'm', long = "mpa-style-report", action)]
mpa_style_report: bool,

/// Report k-mer data in the output.
#[clap(short = 'K', long = "report-kmer-data", action)]
report_kmer_data: bool,
// /// Use mpa-style report format.
// #[clap(short = 'm', long = "mpa-style-report", action)]
// mpa_style_report: bool,

/// File path for outputting the report.
#[clap(short = 'R', long = "report-filename", value_parser)]
report_filename: Option<String>,
// /// Report k-mer data in the output.
// #[clap(short = 'K', long = "report-kmer-data", action)]
// report_kmer_data: bool,

/// Report taxa with zero count.
#[clap(short = 'z', long = "report-zero-counts", action)]
report_zero_counts: bool,
// /// File path for outputting the report.
// #[clap(short = 'R', long = "report-filename", value_parser)]
// report_filename: Option<String>,

/// File path for outputting classified sequences.
#[clap(short = 'C', long = "classified-output-filename", value_parser)]
classified_output_filename: Option<String>,
// /// Report taxa with zero count.
// #[clap(short = 'z', long = "report-zero-counts", action)]
// report_zero_counts: bool,

/// File path for outputting unclassified sequences.
#[clap(short = 'U', long = "unclassified-output-filename", value_parser)]
unclassified_output_filename: Option<String>,
// /// File path for outputting classified sequences.
// #[clap(short = 'C', long = "classified-output-filename", value_parser)]
// classified_output_filename: Option<String>,

// /// File path for outputting unclassified sequences.
// #[clap(short = 'U', long = "unclassified-output-filename", value_parser)]
// unclassified_output_filename: Option<String>,
/// File path for outputting normal Kraken output.
#[clap(short = 'O', long = "kraken-output-filename", value_parser)]
kraken_output_filename: Option<String>,

/// Print scientific name instead of taxid in Kraken output.
#[clap(short = 'n', long = "print-scientific-name", action)]
print_scientific_name: bool,

// /// Print scientific name instead of taxid in Kraken output.
// #[clap(short = 'n', long = "print-scientific-name", action)]
// print_scientific_name: bool,
/// Minimum quality score for FASTQ data, default is 0.
#[clap(
short = 'Q',
Expand All @@ -112,10 +109,6 @@ struct Args {
)]
minimum_quality_score: i32,

/// Use memory mapping to access hash and taxonomy data.
#[clap(short = 'M', long = "use-memory-mapping", action)]
use_memory_mapping: bool,

/// Input files for processing.
///
/// A list of input file paths (FASTA/FASTQ) to be processed by the classify program.
Expand Down
33 changes: 29 additions & 4 deletions kr2r/src/bin/estimate_capacity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ struct Args {
#[clap(short = 'T', long, value_parser = parse_binary)]
toggle_mask: Option<u64>,

/// Read block size
#[clap(short = 'B', long, default_value = "31457280")]
block_size: usize,
/// Proportion of the hash table to be populated
/// (build task only; def: 0.7, must be
/// between 0 and 1).
#[clap(long, long, default_value_t = 0.7)]
load_factor: f64,

/// Number of threads
#[clap(short = 'p', long, default_value = "4")]
Expand Down Expand Up @@ -145,6 +147,23 @@ fn process_sequence(
hllp
}

fn format_bytes(size: f64) -> String {
let suffixes = ["B", "kB", "MB", "GB", "TB", "PB", "EB"];
let mut size = size;
let mut current_suffix = &suffixes[0];

for suffix in &suffixes[1..] {
if size >= 1024.0 {
current_suffix = suffix;
size /= 1024.0;
} else {
break;
}
}

format!("{:.2}{}", size, current_suffix)
}

fn main() {
let mut args = Args::parse();
if args.k_mer < args.l_mer as u64 {
Expand Down Expand Up @@ -182,5 +201,11 @@ fn main() {

let hllp_count = (hllp.count() * RANGE_SECTIONS as f64 / args.n as f64).round() as u64;
// println!("Final count: {:?}", final_count);
println!("estimate count: {:?}", hllp_count);
let required_capacity = (hllp_count + 8192) as f64 / args.load_factor;
println!(
"estimate count: {:?}, required capacity: {:?}, Estimated hash table requirement: {:?}",
hllp_count,
required_capacity.ceil(),
format_bytes(required_capacity)
);
}
25 changes: 0 additions & 25 deletions kr2r/src/bin/taxo.rs

This file was deleted.

0 comments on commit 6516b0a

Please sign in to comment.