Skip to content
This repository has been archived by the owner on Sep 16, 2024. It is now read-only.

Commit

Permalink
feat: generic resource id with ResourceIdTrait
Browse files Browse the repository at this point in the history
* refactor: define a trait ResourceIdTrait

Signed-off-by: Tarek <tareknaser360@gmail.com>

* refactor: rename crc32 field to hash

Signed-off-by: Tarek <tareknaser360@gmail.com>

* docs(resource): add doc comments for ResourceIdTrait

Signed-off-by: Tarek <tareknaser360@gmail.com>

* docs(resource): add doc comment for ResourceIdCrc32

Signed-off-by: Tarek <tareknaser360@gmail.com>

* refactor(resource): make hash type implement needed traits

Signed-off-by: Tarek <tareknaser360@gmail.com>

* rename ResourceIdCrc32 to ResourceId

Signed-off-by: Tarek <tareknaser360@gmail.com>

* fix: do not enforce get_hash for implementors of ResourceIdTrait

Signed-off-by: Tarek <tareknaser360@gmail.com>

---------

Signed-off-by: Tarek <tareknaser360@gmail.com>
  • Loading branch information
tareknaser authored Mar 22, 2024
1 parent bf3500d commit 6263e61
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 50 deletions.
2 changes: 1 addition & 1 deletion benches/compute_bytes_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use arklib::id::ResourceId;
use arklib::resource::{ResourceId, ResourceIdTrait};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand::prelude::*;
use std::fs;
Expand Down
48 changes: 26 additions & 22 deletions src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,13 @@ use std::time::UNIX_EPOCH;
use std::time::{Duration, SystemTime};
use walkdir::{DirEntry, WalkDir};

use crate::{id::ResourceId, ArklibError, Result, ARK_FOLDER, INDEX_PATH};
use crate::{
resource::ResourceId, ArklibError, Result, ARK_FOLDER, INDEX_PATH,
};

pub const RESOURCE_UPDATED_THRESHOLD: Duration = Duration::from_millis(1);
pub type Paths = HashSet<PathBuf>;
use crate::resource::ResourceIdTrait;

/// IndexEntry represents a [`ResourceId`] and the time it was last modified
#[derive(
Expand Down Expand Up @@ -750,8 +753,9 @@ fn scan_entries(
#[cfg(test)]
mod tests {
use super::fs;
use crate::id::ResourceId;
use crate::index::{discover_files, IndexEntry};
use crate::initialize;
use crate::resource::ResourceId;
use crate::ResourceIndex;
use std::fs::File;
#[cfg(target_family = "unix")]
Expand Down Expand Up @@ -840,7 +844,7 @@ mod tests {
assert_eq!(actual.id2path.len(), 1);
assert!(actual.id2path.contains_key(&ResourceId {
data_size: FILE_SIZE_1,
crc32: CRC32_1,
hash: CRC32_1,
}));
assert_eq!(actual.collisions.len(), 0);
assert_eq!(actual.count_files(), 1);
Expand All @@ -863,7 +867,7 @@ mod tests {
assert_eq!(actual.id2path.len(), 1);
assert!(actual.id2path.contains_key(&ResourceId {
data_size: FILE_SIZE_1,
crc32: CRC32_1,
hash: CRC32_1,
}));
assert_eq!(actual.collisions.len(), 1);
assert_eq!(actual.count_files(), 2);
Expand Down Expand Up @@ -921,11 +925,11 @@ mod tests {
assert_eq!(actual.id2path.len(), 2);
assert!(actual.id2path.contains_key(&ResourceId {
data_size: FILE_SIZE_1,
crc32: CRC32_1,
hash: CRC32_1,
}));
assert!(actual.id2path.contains_key(&ResourceId {
data_size: FILE_SIZE_2,
crc32: CRC32_2,
hash: CRC32_2,
}));
assert_eq!(actual.collisions.len(), 0);
assert_eq!(actual.count_files(), 2);
Expand All @@ -943,7 +947,7 @@ mod tests {
.clone(),
ResourceId {
data_size: FILE_SIZE_2,
crc32: CRC32_2
hash: CRC32_2
}
)
}
Expand Down Expand Up @@ -990,11 +994,11 @@ mod tests {
assert_eq!(index.id2path.len(), 2);
assert!(index.id2path.contains_key(&ResourceId {
data_size: FILE_SIZE_1,
crc32: CRC32_1,
hash: CRC32_1,
}));
assert!(index.id2path.contains_key(&ResourceId {
data_size: FILE_SIZE_2,
crc32: CRC32_2,
hash: CRC32_2,
}));
assert_eq!(index.collisions.len(), 0);
assert_eq!(index.count_files(), 2);
Expand All @@ -1012,7 +1016,7 @@ mod tests {
.clone(),
ResourceId {
data_size: FILE_SIZE_2,
crc32: CRC32_2
hash: CRC32_2
}
)
}
Expand All @@ -1031,7 +1035,7 @@ mod tests {
&new_path,
ResourceId {
data_size: FILE_SIZE_2,
crc32: CRC32_2,
hash: CRC32_2,
},
);

Expand All @@ -1055,7 +1059,7 @@ mod tests {
&file_path.clone(),
ResourceId {
data_size: FILE_SIZE_1,
crc32: CRC32_1,
hash: CRC32_1,
},
)
.expect("Should update index successfully");
Expand All @@ -1072,7 +1076,7 @@ mod tests {

assert!(update.deleted.contains(&ResourceId {
data_size: FILE_SIZE_1,
crc32: CRC32_1
hash: CRC32_1
}))
}

Expand Down Expand Up @@ -1116,7 +1120,7 @@ mod tests {
let mut actual = ResourceIndex::build(path.clone());
let old_id = ResourceId {
data_size: 1,
crc32: 2,
hash: 2,
};
let result = actual
.update_one(&missing_path, old_id)
Expand All @@ -1128,7 +1132,7 @@ mod tests {
result,
Some(ResourceId {
data_size: 1,
crc32: 2,
hash: 2,
})
);
}
Expand All @@ -1144,7 +1148,7 @@ mod tests {
let mut actual = ResourceIndex::build(path.clone());
let old_id = ResourceId {
data_size: 1,
crc32: 2,
hash: 2,
};
let result = actual
.update_one(&missing_path, old_id)
Expand All @@ -1156,7 +1160,7 @@ mod tests {
result,
Some(ResourceId {
data_size: 1,
crc32: 2,
hash: 2,
})
)
}
Expand Down Expand Up @@ -1267,7 +1271,7 @@ mod tests {
assert_eq!(actual.id2path.len(), 1);
assert!(actual.id2path.contains_key(&ResourceId {
data_size: FILE_SIZE_1,
crc32: CRC32_1,
hash: CRC32_1,
}));
assert_eq!(actual.collisions.len(), 0);
assert_eq!(actual.count_files(), 1);
Expand All @@ -1278,29 +1282,29 @@ mod tests {
let old1 = IndexEntry {
id: ResourceId {
data_size: 1,
crc32: 2,
hash: 2,
},
modified: SystemTime::UNIX_EPOCH,
};
let old2 = IndexEntry {
id: ResourceId {
data_size: 2,
crc32: 1,
hash: 1,
},
modified: SystemTime::UNIX_EPOCH,
};

let new1 = IndexEntry {
id: ResourceId {
data_size: 1,
crc32: 1,
hash: 1,
},
modified: SystemTime::now(),
};
let new2 = IndexEntry {
id: ResourceId {
data_size: 1,
crc32: 2,
hash: 2,
},
modified: SystemTime::now(),
};
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ pub mod errors;
pub use errors::{ArklibError, Result};

pub mod app_id;
pub mod id;
pub mod index;

pub mod link;
pub mod pdf;
pub mod resource;

mod atomic;
mod storage;
Expand Down
2 changes: 1 addition & 1 deletion src/link.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::id::ResourceId;
use crate::resource::{ResourceId, ResourceIdTrait};
use crate::storage::meta::store_metadata;
use crate::storage::prop::store_properties;
use crate::{
Expand Down
47 changes: 25 additions & 22 deletions src/id.rs → src/resource/crc32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,16 @@ use std::io::{BufRead, BufReader};
use std::path::Path;
use std::str::FromStr;

use crate::resource::ResourceIdTrait;
use crate::{ArklibError, Result};

const KILOBYTE: u64 = 1024;
const MEGABYTE: u64 = 1024 * KILOBYTE;
const BUFFER_CAPACITY: usize = 512 * KILOBYTE as usize;

/// Represents a resource identifier using the CRC32 algorithm.
///
/// Uses `crc32fast` crate to compute the hash value.
#[derive(
Eq,
Ord,
Expand All @@ -25,12 +33,12 @@ use crate::{ArklibError, Result};
)]
pub struct ResourceId {
pub data_size: u64,
pub crc32: u32,
pub hash: u32,
}

impl Display for ResourceId {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}-{}", self.data_size, self.crc32)
write!(f, "{}-{}", self.data_size, self.hash)
}
}

Expand All @@ -40,17 +48,16 @@ impl FromStr for ResourceId {
fn from_str(s: &str) -> Result<Self> {
let (l, r) = s.split_once('-').ok_or(ArklibError::Parse)?;
let data_size: u64 = l.parse().map_err(|_| ArklibError::Parse)?;
let crc32: u32 = r.parse().map_err(|_| ArklibError::Parse)?;
let hash: u32 = r.parse().map_err(|_| ArklibError::Parse)?;

Ok(ResourceId { data_size, crc32 })
Ok(ResourceId { data_size, hash })
}
}

impl ResourceId {
pub fn compute<P: AsRef<Path>>(
data_size: u64,
file_path: P,
) -> Result<Self> {
impl ResourceIdTrait<'_> for ResourceId {
type HashType = u32;

fn compute<P: AsRef<Path>>(data_size: u64, file_path: P) -> Result<Self> {
log::trace!(
"[compute] file {} with size {} mb",
file_path.as_ref().display(),
Expand All @@ -65,15 +72,15 @@ impl ResourceId {
ResourceId::compute_reader(data_size, &mut reader)
}

pub fn compute_bytes(bytes: &[u8]) -> Result<Self> {
fn compute_bytes(bytes: &[u8]) -> Result<Self> {
let data_size = bytes.len().try_into().map_err(|_| {
ArklibError::Other(anyhow!("Can't convert usize to u64"))
})?; //.unwrap();
let mut reader = BufReader::with_capacity(BUFFER_CAPACITY, bytes);
ResourceId::compute_reader(data_size, &mut reader)
}

pub fn compute_reader<R: Read>(
fn compute_reader<R: Read>(
data_size: u64,
reader: &mut BufReader<R>,
) -> Result<Self> {
Expand All @@ -99,19 +106,15 @@ impl ResourceId {
})?;
}

let crc32: u32 = hasher.finalize();
let hash: u32 = hasher.finalize();
log::trace!("[compute] {} bytes has been read", bytes_read);
log::trace!("[compute] checksum: {:#02x}", crc32);
log::trace!("[compute] checksum: {:#02x}", hash);
assert_eq!(std::convert::Into::<u64>::into(bytes_read), data_size);

Ok(ResourceId { data_size, crc32 })
Ok(ResourceId { data_size, hash })
}
}

const KILOBYTE: u64 = 1024;
const MEGABYTE: u64 = 1024 * KILOBYTE;
const BUFFER_CAPACITY: usize = 512 * KILOBYTE as usize;

#[cfg(test)]
mod tests {
use crate::initialize;
Expand All @@ -133,24 +136,24 @@ mod tests {
.len();

let id1 = ResourceId::compute(data_size, file_path).unwrap();
assert_eq!(id1.crc32, 0x342a3d4a);
assert_eq!(id1.hash, 0x342a3d4a);
assert_eq!(id1.data_size, 128760);

let raw_bytes = fs::read(file_path).unwrap();
let id2 = ResourceId::compute_bytes(raw_bytes.as_slice()).unwrap();
assert_eq!(id2.crc32, 0x342a3d4a);
assert_eq!(id2.hash, 0x342a3d4a);
assert_eq!(id2.data_size, 128760);
}

#[test]
fn resource_id_order() {
let id1 = ResourceId {
data_size: 1,
crc32: 2,
hash: 2,
};
let id2 = ResourceId {
data_size: 2,
crc32: 1,
hash: 1,
};

assert!(id1 < id2);
Expand Down
Loading

0 comments on commit 6263e61

Please sign in to comment.