Skip to content
This repository has been archived by the owner on Sep 16, 2024. It is now read-only.

Generic Resource Identifiers with ResourceIdTrait #90

Merged
merged 7 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benches/compute_bytes_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use arklib::id::ResourceId;
use arklib::resource::{ResourceId, ResourceIdTrait};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand::prelude::*;
use std::fs;
Expand Down
48 changes: 26 additions & 22 deletions src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,13 @@ use std::time::UNIX_EPOCH;
use std::time::{Duration, SystemTime};
use walkdir::{DirEntry, WalkDir};

use crate::{id::ResourceId, ArklibError, Result, ARK_FOLDER, INDEX_PATH};
use crate::{
resource::ResourceId, ArklibError, Result, ARK_FOLDER, INDEX_PATH,
};

pub const RESOURCE_UPDATED_THRESHOLD: Duration = Duration::from_millis(1);
pub type Paths = HashSet<PathBuf>;
use crate::resource::ResourceIdTrait;

/// IndexEntry represents a [`ResourceId`] and the time it was last modified
#[derive(
Expand Down Expand Up @@ -750,8 +753,9 @@ fn scan_entries(
#[cfg(test)]
mod tests {
use super::fs;
use crate::id::ResourceId;
use crate::index::{discover_files, IndexEntry};
use crate::initialize;
use crate::resource::ResourceId;
use crate::ResourceIndex;
use std::fs::File;
#[cfg(target_family = "unix")]
Expand Down Expand Up @@ -840,7 +844,7 @@ mod tests {
assert_eq!(actual.id2path.len(), 1);
assert!(actual.id2path.contains_key(&ResourceId {
data_size: FILE_SIZE_1,
crc32: CRC32_1,
hash: CRC32_1,
}));
assert_eq!(actual.collisions.len(), 0);
assert_eq!(actual.count_files(), 1);
Expand All @@ -863,7 +867,7 @@ mod tests {
assert_eq!(actual.id2path.len(), 1);
assert!(actual.id2path.contains_key(&ResourceId {
data_size: FILE_SIZE_1,
crc32: CRC32_1,
hash: CRC32_1,
}));
assert_eq!(actual.collisions.len(), 1);
assert_eq!(actual.count_files(), 2);
Expand Down Expand Up @@ -921,11 +925,11 @@ mod tests {
assert_eq!(actual.id2path.len(), 2);
assert!(actual.id2path.contains_key(&ResourceId {
data_size: FILE_SIZE_1,
crc32: CRC32_1,
hash: CRC32_1,
}));
assert!(actual.id2path.contains_key(&ResourceId {
data_size: FILE_SIZE_2,
crc32: CRC32_2,
hash: CRC32_2,
}));
assert_eq!(actual.collisions.len(), 0);
assert_eq!(actual.count_files(), 2);
Expand All @@ -943,7 +947,7 @@ mod tests {
.clone(),
ResourceId {
data_size: FILE_SIZE_2,
crc32: CRC32_2
hash: CRC32_2
}
)
}
Expand Down Expand Up @@ -990,11 +994,11 @@ mod tests {
assert_eq!(index.id2path.len(), 2);
assert!(index.id2path.contains_key(&ResourceId {
data_size: FILE_SIZE_1,
crc32: CRC32_1,
hash: CRC32_1,
}));
assert!(index.id2path.contains_key(&ResourceId {
data_size: FILE_SIZE_2,
crc32: CRC32_2,
hash: CRC32_2,
}));
assert_eq!(index.collisions.len(), 0);
assert_eq!(index.count_files(), 2);
Expand All @@ -1012,7 +1016,7 @@ mod tests {
.clone(),
ResourceId {
data_size: FILE_SIZE_2,
crc32: CRC32_2
hash: CRC32_2
}
)
}
Expand All @@ -1031,7 +1035,7 @@ mod tests {
&new_path,
ResourceId {
data_size: FILE_SIZE_2,
crc32: CRC32_2,
hash: CRC32_2,
},
);

Expand All @@ -1055,7 +1059,7 @@ mod tests {
&file_path.clone(),
ResourceId {
data_size: FILE_SIZE_1,
crc32: CRC32_1,
hash: CRC32_1,
},
)
.expect("Should update index successfully");
Expand All @@ -1072,7 +1076,7 @@ mod tests {

assert!(update.deleted.contains(&ResourceId {
data_size: FILE_SIZE_1,
crc32: CRC32_1
hash: CRC32_1
}))
}

Expand Down Expand Up @@ -1116,7 +1120,7 @@ mod tests {
let mut actual = ResourceIndex::build(path.clone());
let old_id = ResourceId {
data_size: 1,
crc32: 2,
hash: 2,
};
let result = actual
.update_one(&missing_path, old_id)
Expand All @@ -1128,7 +1132,7 @@ mod tests {
result,
Some(ResourceId {
data_size: 1,
crc32: 2,
hash: 2,
})
);
}
Expand All @@ -1144,7 +1148,7 @@ mod tests {
let mut actual = ResourceIndex::build(path.clone());
let old_id = ResourceId {
data_size: 1,
crc32: 2,
hash: 2,
};
let result = actual
.update_one(&missing_path, old_id)
Expand All @@ -1156,7 +1160,7 @@ mod tests {
result,
Some(ResourceId {
data_size: 1,
crc32: 2,
hash: 2,
})
)
}
Expand Down Expand Up @@ -1267,7 +1271,7 @@ mod tests {
assert_eq!(actual.id2path.len(), 1);
assert!(actual.id2path.contains_key(&ResourceId {
data_size: FILE_SIZE_1,
crc32: CRC32_1,
hash: CRC32_1,
}));
assert_eq!(actual.collisions.len(), 0);
assert_eq!(actual.count_files(), 1);
Expand All @@ -1278,29 +1282,29 @@ mod tests {
let old1 = IndexEntry {
id: ResourceId {
data_size: 1,
crc32: 2,
hash: 2,
},
modified: SystemTime::UNIX_EPOCH,
};
let old2 = IndexEntry {
id: ResourceId {
data_size: 2,
crc32: 1,
hash: 1,
},
modified: SystemTime::UNIX_EPOCH,
};

let new1 = IndexEntry {
id: ResourceId {
data_size: 1,
crc32: 1,
hash: 1,
},
modified: SystemTime::now(),
};
let new2 = IndexEntry {
id: ResourceId {
data_size: 1,
crc32: 2,
hash: 2,
},
modified: SystemTime::now(),
};
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ pub mod errors;
pub use errors::{ArklibError, Result};

pub mod app_id;
pub mod id;
pub mod index;

pub mod link;
pub mod pdf;
pub mod resource;

mod atomic;
mod storage;
Expand Down
2 changes: 1 addition & 1 deletion src/link.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::id::ResourceId;
use crate::resource::{ResourceId, ResourceIdTrait};
use crate::storage::meta::store_metadata;
use crate::storage::prop::store_properties;
use crate::{
Expand Down
47 changes: 25 additions & 22 deletions src/id.rs → src/resource/crc32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,16 @@ use std::io::{BufRead, BufReader};
use std::path::Path;
use std::str::FromStr;

use crate::resource::ResourceIdTrait;
use crate::{ArklibError, Result};

const KILOBYTE: u64 = 1024;
const MEGABYTE: u64 = 1024 * KILOBYTE;
const BUFFER_CAPACITY: usize = 512 * KILOBYTE as usize;

/// Represents a resource identifier using the CRC32 algorithm.
///
/// Uses `crc32fast` crate to compute the hash value.
#[derive(
Eq,
Ord,
Expand All @@ -25,12 +33,12 @@ use crate::{ArklibError, Result};
)]
pub struct ResourceId {
pub data_size: u64,
pub crc32: u32,
pub hash: u32,
}

impl Display for ResourceId {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}-{}", self.data_size, self.crc32)
write!(f, "{}-{}", self.data_size, self.hash)
}
}

Expand All @@ -40,17 +48,16 @@ impl FromStr for ResourceId {
fn from_str(s: &str) -> Result<Self> {
let (l, r) = s.split_once('-').ok_or(ArklibError::Parse)?;
let data_size: u64 = l.parse().map_err(|_| ArklibError::Parse)?;
let crc32: u32 = r.parse().map_err(|_| ArklibError::Parse)?;
let hash: u32 = r.parse().map_err(|_| ArklibError::Parse)?;

Ok(ResourceId { data_size, crc32 })
Ok(ResourceId { data_size, hash })
}
}

impl ResourceId {
pub fn compute<P: AsRef<Path>>(
data_size: u64,
file_path: P,
) -> Result<Self> {
impl ResourceIdTrait<'_> for ResourceId {
type HashType = u32;

fn compute<P: AsRef<Path>>(data_size: u64, file_path: P) -> Result<Self> {
log::trace!(
"[compute] file {} with size {} mb",
file_path.as_ref().display(),
Expand All @@ -65,15 +72,15 @@ impl ResourceId {
ResourceId::compute_reader(data_size, &mut reader)
}

pub fn compute_bytes(bytes: &[u8]) -> Result<Self> {
fn compute_bytes(bytes: &[u8]) -> Result<Self> {
let data_size = bytes.len().try_into().map_err(|_| {
ArklibError::Other(anyhow!("Can't convert usize to u64"))
})?; //.unwrap();
let mut reader = BufReader::with_capacity(BUFFER_CAPACITY, bytes);
ResourceId::compute_reader(data_size, &mut reader)
}

pub fn compute_reader<R: Read>(
fn compute_reader<R: Read>(
data_size: u64,
reader: &mut BufReader<R>,
) -> Result<Self> {
Expand All @@ -99,19 +106,15 @@ impl ResourceId {
})?;
}

let crc32: u32 = hasher.finalize();
let hash: u32 = hasher.finalize();
log::trace!("[compute] {} bytes has been read", bytes_read);
log::trace!("[compute] checksum: {:#02x}", crc32);
log::trace!("[compute] checksum: {:#02x}", hash);
assert_eq!(std::convert::Into::<u64>::into(bytes_read), data_size);

Ok(ResourceId { data_size, crc32 })
Ok(ResourceId { data_size, hash })
}
}

const KILOBYTE: u64 = 1024;
const MEGABYTE: u64 = 1024 * KILOBYTE;
const BUFFER_CAPACITY: usize = 512 * KILOBYTE as usize;

#[cfg(test)]
mod tests {
use crate::initialize;
Expand All @@ -133,24 +136,24 @@ mod tests {
.len();

let id1 = ResourceId::compute(data_size, file_path).unwrap();
assert_eq!(id1.crc32, 0x342a3d4a);
assert_eq!(id1.hash, 0x342a3d4a);
assert_eq!(id1.data_size, 128760);

let raw_bytes = fs::read(file_path).unwrap();
let id2 = ResourceId::compute_bytes(raw_bytes.as_slice()).unwrap();
assert_eq!(id2.crc32, 0x342a3d4a);
assert_eq!(id2.hash, 0x342a3d4a);
assert_eq!(id2.data_size, 128760);
}

#[test]
fn resource_id_order() {
let id1 = ResourceId {
data_size: 1,
crc32: 2,
hash: 2,
};
let id2 = ResourceId {
data_size: 2,
crc32: 1,
hash: 1,
};

assert!(id1 < id2);
Expand Down
Loading
Loading