Minor enhancements

Signed-off-by: Tarek <[email protected]>
tareknaser · Mar 12, 2024 · 16a9b23 · 16a9b23
1 parent 2005fa3
commit 16a9b23
Show file tree

Hide file tree

Showing 11 changed files with 185 additions and 176 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+.idea
 target
 Cargo.lock
 **/app_id
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,5 +1,11 @@
 [workspace]
-members = ["data-resource", "fs-atomic-versions", "fs-index", "fs-utils"]
+members = [
+    "data-resource",
+    "fs-atomic-versions",
+    "fs-index",
+    "fs-utils"
+]
+
 default-members = [
     "data-resource",
     "fs-atomic-versions",

diff --git a/README.md b/README.md
@@ -12,12 +12,13 @@ The purpose of the library is to manage _resource index_ of folders with various
 
 <div align="center">
 
-| Package              | Description                              |
-| -------------------- | ---------------------------------------- |
-| `fs-index`           | Resource Index construction and updating |
-| `data-resource`      | Resource hashing and ID construction     |
-| `fs-atomic-versions` | Version-based preventing of dirty writes |
-| `fs-utils`           | Utility functions and common code        |
+| Package              | Description                                |
+| -------------------- | ------------------------------------------ |
+| `data-resource`      | Resource hashing and ID construction       |
+| `fs-index`           | Resource Index construction and updating   |
+| `fs-atomic-light`    | Temp file-based preventing of dirty writes |
+| `fs-atomic-versions` | Version-based preventing of dirty writes   |
+| `fs-utils`           | Utility functions and common code          |
 
 </div>
 
@@ -61,6 +62,6 @@ cargo bench index_build
 
 Our benchmark suite includes tests on local files and directories. These benchmarks are located in the `benches/` directory of some crates. Each benchmark sets a time limit using `group.measurement_time()`, which you can adjust manually based on your requirements.
 
-You have the flexibility to benchmark specific files or folders by modifying the variables within the benchmark files. By default, the benchmarks operate on the [`testdata/`](../testdata/) directory and its contents. You can change the directory/files by setting the `DIR_PATH` and `FILE_PATHS` variables to the desired values.
+You have the flexibility to benchmark specific files or folders by modifying the variables within the benchmark files. By default, the benchmarks operate on the [`test-assets/`](../test-assets/) directory and its contents. You can change the directory/files by setting the `DIR_PATH` and `FILE_PATHS` variables to the desired values.
 
 For pre-benchmark assessment of required time to index a huge local folder, you can modify `test_build_resource_index` test case in `src/index.rs`.
diff --git a/data-resource/benches/compute_bytes_benchmark.rs b/data-resource/benches/compute_bytes_benchmark.rs
@@ -4,7 +4,11 @@ use pprof::criterion::{Output, PProfProfiler};
 use rand::prelude::*;
 use std::fs;
 
-const FILE_PATHS: [&str; 2] = ["../testdata/lena.jpg", "../testdata/test.pdf"]; // Add files to benchmark here
+const FILE_PATHS: [&str; 2] = [
+    // Add files to benchmark here
+    "../test-assets/lena.jpg",
+    "../test-assets/test.pdf",
+];
 
 fn generate_random_data(size: usize) -> Vec<u8> {
     let mut rng = rand::thread_rng();

diff --git a/data-resource/src/id.rs b/data-resource/src/id.rs
diff --git a/data-resource/src/lib.rs b/data-resource/src/lib.rs
@@ -1,3 +1,162 @@
-mod id;
+use anyhow::anyhow;
+use crc32fast::Hasher;
+use log;
+use serde::{Deserialize, Serialize};
+use std::fmt::{self, Display, Formatter};
+use std::fs;
+use std::io::Read;
+use std::io::{BufRead, BufReader};
+use std::path::Path;
+use std::str::FromStr;
 
-pub use id::ResourceId;
+use fs_utils::errors::{ArklibError, Result};
+
+#[derive(
+    Eq,
+    Ord,
+    PartialEq,
+    PartialOrd,
+    Hash,
+    Clone,
+    Copy,
+    Debug,
+    Deserialize,
+    Serialize,
+)]
+pub struct ResourceId {
+    pub data_size: u64,
+    pub crc32: u32,
+}
+
+impl Display for ResourceId {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "{}-{}", self.data_size, self.crc32)
+    }
+}
+
+impl FromStr for ResourceId {
+    type Err = ArklibError;
+
+    fn from_str(s: &str) -> Result<Self> {
+        let (l, r) = s.split_once('-').ok_or(ArklibError::Parse)?;
+        let data_size: u64 = l.parse().map_err(|_| ArklibError::Parse)?;
+        let crc32: u32 = r.parse().map_err(|_| ArklibError::Parse)?;
+
+        Ok(ResourceId { data_size, crc32 })
+    }
+}
+
+impl ResourceId {
+    pub fn compute<P: AsRef<Path>>(
+        data_size: u64,
+        file_path: P,
+    ) -> Result<Self> {
+        log::trace!(
+            "[compute] file {} with size {} mb",
+            file_path.as_ref().display(),
+            data_size / MEGABYTE
+        );
+
+        let source = fs::OpenOptions::new()
+            .read(true)
+            .open(file_path.as_ref())?;
+
+        let mut reader = BufReader::with_capacity(BUFFER_CAPACITY, source);
+        ResourceId::compute_reader(data_size, &mut reader)
+    }
+
+    pub fn compute_bytes(bytes: &[u8]) -> Result<Self> {
+        let data_size = bytes.len().try_into().map_err(|_| {
+            ArklibError::Other(anyhow!("Can't convert usize to u64"))
+        })?; //.unwrap();
+        let mut reader = BufReader::with_capacity(BUFFER_CAPACITY, bytes);
+        ResourceId::compute_reader(data_size, &mut reader)
+    }
+
+    pub fn compute_reader<R: Read>(
+        data_size: u64,
+        reader: &mut BufReader<R>,
+    ) -> Result<Self> {
+        assert!(reader.buffer().is_empty());
+
+        log::trace!(
+            "Calculating hash of raw bytes (given size is {} megabytes)",
+            data_size / MEGABYTE
+        );
+
+        let mut hasher = Hasher::new();
+        let mut bytes_read: u32 = 0;
+        loop {
+            let bytes_read_iteration: usize = reader.fill_buf()?.len();
+            if bytes_read_iteration == 0 {
+                break;
+            }
+            hasher.update(reader.buffer());
+            reader.consume(bytes_read_iteration);
+            bytes_read +=
+                u32::try_from(bytes_read_iteration).map_err(|_| {
+                    ArklibError::Other(anyhow!("Can't convert usize to u32"))
+                })?;
+        }
+
+        let crc32: u32 = hasher.finalize();
+        log::trace!("[compute] {} bytes has been read", bytes_read);
+        log::trace!("[compute] checksum: {:#02x}", crc32);
+        assert_eq!(std::convert::Into::<u64>::into(bytes_read), data_size);
+
+        Ok(ResourceId { data_size, crc32 })
+    }
+}
+
+const KILOBYTE: u64 = 1024;
+const MEGABYTE: u64 = 1024 * KILOBYTE;
+const BUFFER_CAPACITY: usize = 512 * KILOBYTE as usize;
+
+#[cfg(test)]
+mod tests {
+    use fs_atomic_versions::initialize;
+
+    use super::*;
+
+    #[test]
+    fn compute_id_test() {
+        initialize();
+
+        let file_path = Path::new("../test-assets/lena.jpg");
+        let data_size = fs::metadata(file_path)
+            .unwrap_or_else(|_| {
+                panic!(
+                    "Could not open image test file_path.{}",
+                    file_path.display()
+                )
+            })
+            .len();
+
+        let id1 = ResourceId::compute(data_size, file_path).unwrap();
+        assert_eq!(id1.crc32, 0x342a3d4a);
+        assert_eq!(id1.data_size, 128760);
+
+        let raw_bytes = fs::read(file_path).unwrap();
+        let id2 = ResourceId::compute_bytes(raw_bytes.as_slice()).unwrap();
+        assert_eq!(id2.crc32, 0x342a3d4a);
+        assert_eq!(id2.data_size, 128760);
+    }
+
+    #[test]
+    fn resource_id_order() {
+        let id1 = ResourceId {
+            data_size: 1,
+            crc32: 2,
+        };
+        let id2 = ResourceId {
+            data_size: 2,
+            crc32: 1,
+        };
+
+        assert!(id1 < id2);
+        assert!(id2 > id1);
+        assert!(id1 != id2);
+        assert!(id1 == id1);
+        assert!(id2 == id2);
+    }
+}
diff --git a/fs-index/benches/index_build_benchmark.rs b/fs-index/benches/index_build_benchmark.rs
@@ -4,7 +4,7 @@ use criterion::{
 use fs_index::index::ResourceIndex;
 use pprof::criterion::{Output, PProfProfiler};
 
-const DIR_PATH: &str = "../testdata/"; // Set the path to the directory containing the resources here
+const DIR_PATH: &str = "../test-assets/"; // Set the path to the directory containing the resources here
 
 fn index_build_benchmark(c: &mut Criterion) {
     // assert the path exists and is a directory