From b469c6b627f6b106563f23c35df48db9363802b6 Mon Sep 17 00:00:00 2001 From: Pushkar Mishra Date: Wed, 25 Dec 2024 00:11:56 +0530 Subject: [PATCH] few changes Signed-off-by: Pushkar Mishra --- README.md | 4 +- fs-cache/Cargo.toml | 1 - fs-cache/src/cache.rs | 279 +++++++++++++++++++++------------------- fs-cache/src/lib.rs | 4 +- fs-storage/src/utils.rs | 58 ++++++++- 5 files changed, 205 insertions(+), 141 deletions(-) diff --git a/README.md b/README.md index d9ab90e..4f3dbfd 100644 --- a/README.md +++ b/README.md @@ -30,11 +30,11 @@ The core crate is `fs-index` which provides us with [content addressing](https:/ | Package | Description | | --------------- | ---------------------------------------- | -| `ark-cli` | The CLI tool to interact with ark crates | +| `ark-cli` | The CLI tool to interact with ARK crates | | `data-resource` | Resource hashing and ID construction | | `fs-cache` | Memory and disk caching with LRU eviction | | `fs-index` | Resource Index construction and updating | -| `fs-storage` | Filesystem storage for resources | +| `fs-storage` | Key-value storage persisted on filesystem | | `fs-metadata` | Metadata management | | `fs-properties` | Properties management | | `data-link` | Linking resources | diff --git a/fs-cache/Cargo.toml b/fs-cache/Cargo.toml index b70c582..543f8a6 100644 --- a/fs-cache/Cargo.toml +++ b/fs-cache/Cargo.toml @@ -17,4 +17,3 @@ lru = "0.12.5" [dev-dependencies] tempdir = "0.3.7" -rstest = "0.23" diff --git a/fs-cache/src/cache.rs b/fs-cache/src/cache.rs index fb9881e..2622a58 100644 --- a/fs-cache/src/cache.rs +++ b/fs-cache/src/cache.rs @@ -31,8 +31,6 @@ pub struct Cache { current_memory_bytes: usize, /// The maximum allowable memory usage in bytes. max_memory_bytes: usize, - /// Whether to include values in debug logs - log_values: bool, } impl Cache @@ -51,24 +49,26 @@ where /// * `label` - Identifier used in logs /// * `path` - Directory where cache files are stored /// * `max_memory_bytes` - Maximum bytes to keep in memory - /// * `log_values` - Whether to include values in debug logs + /// * `preload_cache` - Whether to pre-load the cache from disk on initialization pub fn new( label: String, path: &Path, max_memory_bytes: usize, - log_values: bool, + preload_cache: bool, ) -> Result { + Self::validate_path(path, &label)?; + + let memory_cache = LruCache::new( + NonZeroUsize::new(max_memory_bytes) + .expect("Capacity can't be zero"), + ); + let mut cache = Self { label: label.clone(), path: PathBuf::from(path), - // TODO: NEED FIX - memory_cache: LruCache::new( - NonZeroUsize::new(max_memory_bytes) - .expect("Capacity can't be zero"), - ), + memory_cache, current_memory_bytes: 0, max_memory_bytes, - log_values, }; log::debug!( @@ -77,40 +77,59 @@ where max_memory_bytes ); - cache.load_fs()?; + if preload_cache { + cache.load_fs()?; + } Ok(cache) } + /// Validates the provided path. + /// + /// # Arguments + /// * `path` - The path to validate + /// * `label` - Identifier used in logs + fn validate_path(path: &Path, label: &str) -> Result<()> { + if !path.exists() { + return Err(ArklibError::Storage( + label.to_owned(), + "Folder does not exist".to_owned(), + )); + } + + if !path.is_dir() { + return Err(ArklibError::Storage( + label.to_owned(), + "Path is not a directory".to_owned(), + )); + } + + Ok(()) + } + /// Retrieves a value by its key, checking memory first then disk. /// Returns None if the key doesn't exist. pub fn get(&mut self, key: &K) -> Option { log::debug!("cache/{}: retrieving value for key {}", self.label, key); - let value = self - .fetch_from_memory(key) - .or_else(|| self.fetch_from_disk(key)); - - match &value { - Some(v) => { - if self.log_values { - log::debug!( - "cache/{}: found value for key {}: {:?}", - self.label, - key, - v - ); - } - } - None => { - log::warn!( - "cache/{}: no value found for key {}", - self.label, - key - ); - } + if let Some(v) = self.fetch_from_memory(key) { + log::debug!( + "cache/{}: value for key {} retrieved from memory", + self.label, + key + ); + return Some(v); + } + if let Some(v) = self.fetch_from_disk(key) { + log::debug!( + "cache/{}: value for key {} retrieved from disk", + self.label, + key + ); + return Some(v); } - value + log::warn!("cache/{}: no value found for key {}", self.label, key); + None } /// Stores a new value with the given key. @@ -166,20 +185,6 @@ where /// recent files as possible within the memory limit. Files that don't fit in memory remain only /// on disk. fn load_fs(&mut self) -> Result<()> { - if !self.path.exists() { - return Err(ArklibError::Storage( - self.label.clone(), - "Folder does not exist".to_owned(), - )); - } - - if !self.path.is_dir() { - return Err(ArklibError::Storage( - self.label.clone(), - "Path is not a directory".to_owned(), - )); - } - // Collect metadata for all files let mut file_metadata = Vec::new(); for entry in fs::read_dir(&self.path)? { @@ -284,7 +289,16 @@ where /// Writes a value to disk using atomic operations. fn persist_to_disk(&mut self, key: &K, value: &V) -> Result<()> { log::debug!("cache/{}: writing to disk for key {}", self.label, key); - fs::create_dir_all(&self.path)?; + + if !self.path.exists() { + return Err(ArklibError::Storage( + self.label.clone(), + format!( + "Cache directory does not exist: {}", + self.path.display() + ), + )); + } let file_path = self.path.join(key.to_string()); debug_assert!( @@ -317,11 +331,14 @@ where /// First checks the memory cache for size information to avoid disk access. /// Falls back to checking the file size on disk if not found in memory. fn get_file_size(&self, key: &K) -> Result { + if let Some(entry) = self.memory_cache.peek(key) { + return Ok(entry.size); + } Ok(fs::metadata(self.path.join(key.to_string()))?.len() as usize) } /// Adds or updates a value in the memory cache, evicting old entries if needed. - /// Returns error if value is larger than maximum memory limit. + /// Logs error if value is larger than maximum memory limit. fn update_memory_cache(&mut self, key: &K, value: &V) -> Result<()> { log::debug!("cache/{}: caching in memory for key {}", self.label, key); let size = self.get_file_size(key)?; @@ -377,7 +394,6 @@ where #[cfg(test)] mod tests { use super::*; - use rstest::{fixture, rstest}; use std::{ fs::File, io::Write, @@ -385,63 +401,50 @@ mod tests { }; use tempdir::TempDir; - // Helper struct that implements required traits - #[derive(Clone, Debug, PartialEq)] - struct TestValue(Vec); - - impl AsRef<[u8]> for TestValue { - fn as_ref(&self) -> &[u8] { - &self.0 - } - } - - impl From> for TestValue { - fn from(bytes: Vec) -> Self { - TestValue(bytes) - } - } - - #[fixture] - fn temp_dir() -> TempDir { + /// Helper function to create a temporary directory + fn create_temp_dir() -> TempDir { TempDir::new("tmp").expect("Failed to create temporary directory") } - #[fixture] - fn cache(temp_dir: TempDir) -> Cache { + /// Helper function to create a test cache with default settings + fn create_test_cache(temp_dir: &TempDir) -> Cache> { Cache::new( "test".to_string(), temp_dir.path(), 1024 * 1024, // 1MB - false, + true, // Enable preloading by default ) .expect("Failed to create cache") } - #[rstest] - fn test_new_cache(cache: Cache) { + #[test] + fn test_new_cache() { + let temp_dir = create_temp_dir(); + let cache = create_test_cache(&temp_dir); assert_eq!(cache.current_memory_bytes, 0); assert_eq!(cache.max_memory_bytes, 1024 * 1024); } - #[rstest] - fn test_set_and_get(mut cache: Cache) { + #[test] + fn test_set_and_get() { + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); let key = "test_key".to_string(); - let value = TestValue(vec![1, 2, 3, 4]); + let value = vec![1, 2, 3, 4]; - // Test set cache .set(key.clone(), value.clone()) .expect("Failed to set value"); - - // Test get let retrieved = cache.get(&key).expect("Failed to get value"); - assert_eq!(retrieved.0, value.0); + assert_eq!(retrieved, value); } - #[rstest] - fn test_exists(mut cache: Cache) { + #[test] + fn test_exists() { + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); let key = "test_key".to_string(); - let value = TestValue(vec![1, 2, 3, 4]); + let value = vec![1, 2, 3, 4]; assert!(!cache.exists(&key)); cache @@ -450,13 +453,17 @@ mod tests { assert!(cache.exists(&key)); } - #[rstest] - fn test_get_nonexistent(mut cache: Cache) { + #[test] + fn test_get_nonexistent() { + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); assert!(cache.get(&"nonexistent".to_string()).is_none()); } - #[rstest] - fn test_keys(mut cache: Cache) { + #[test] + fn test_keys() { + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); let values = vec![ ("key1".to_string(), vec![1, 2]), ("key2".to_string(), vec![3, 4]), @@ -466,7 +473,7 @@ mod tests { // Add values for (key, data) in values.iter() { cache - .set(key.clone(), TestValue(data.clone())) + .set(key.clone(), data.clone()) .expect("Failed to set value"); } @@ -483,26 +490,27 @@ mod tests { assert_eq!(cache_keys, expected_keys); } - #[rstest] - fn test_memory_eviction(temp_dir: TempDir) { + #[test] + fn test_memory_eviction() { + let temp_dir = create_temp_dir(); let mut cache = Cache::new( "test".to_string(), temp_dir.path(), - 8, // Very small limit to force eviction - false, + 8, // Very small limit to force eviction + true, // Enable preloading by default ) .expect("Failed to create cache"); // Add first value let key1 = "key1.txt".to_string(); - let value1 = TestValue(vec![1, 2, 3, 4, 5, 7]); + let value1 = vec![1, 2, 3, 4, 5, 7]; cache .set(key1.clone(), value1.clone()) .expect("Failed to set value1"); // Add second value to trigger eviction let key2 = "key2.json".to_string(); - let value2 = TestValue(vec![5, 6, 8]); + let value2 = vec![5, 6, 8]; cache .set(key2.clone(), value2.clone()) .expect("Failed to set value2"); @@ -512,10 +520,12 @@ mod tests { assert_eq!(cache.get(&key1).unwrap(), value1); // Should load from disk } - #[rstest] - fn test_large_value_handling(mut cache: Cache) { + #[test] + fn test_large_value_handling() { + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); let key = "large_key".to_string(); - let large_value = TestValue(vec![0; 2 * 1024 * 1024]); // 2MB, larger than cache + let large_value = vec![0; 2 * 1024 * 1024]; // 2MB, larger than cache // Should fail to cache in memory but succeed in writing to disk assert!(cache @@ -524,15 +534,16 @@ mod tests { assert_eq!(cache.get(&key).unwrap(), large_value); // Should load from disk } - #[rstest] - fn test_persistence(temp_dir: TempDir) { + #[test] + fn test_persistence() { + let temp_dir = create_temp_dir(); let key = "persist_key".to_string(); - let value = TestValue(vec![1, 2, 3, 4]); + let value = vec![1, 2, 3, 4]; // Scope for first cache instance { let mut cache = - Cache::new("test".to_string(), temp_dir.path(), 1024, false) + Cache::new("test".to_string(), temp_dir.path(), 1024, true) .expect("Failed to create first cache"); cache .set(key.clone(), value.clone()) @@ -541,39 +552,37 @@ mod tests { // Create new cache instance pointing to same directory let mut cache2 = - Cache::new("test".to_string(), temp_dir.path(), 1024, false) + Cache::new("test".to_string(), temp_dir.path(), 1024, true) .expect("Failed to create second cache"); // Should be able to read value written by first instance - let retrieved: TestValue = - cache2.get(&key).expect("Failed to get value"); - assert_eq!(retrieved.0, value.0); + let retrieved: Vec = cache2.get(&key).expect("Failed to get value"); + assert_eq!(retrieved, value); } - #[rstest] - fn test_concurrent_reads(temp_dir: TempDir) { + #[test] + fn test_concurrent_reads() { use std::thread; + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); let key = "test_key".to_string(); - let value = TestValue(vec![1, 2, 3, 4]); + let value = vec![1, 2, 3, 4]; // Set up initial cache with data - let mut cache = - Cache::new("test".to_string(), temp_dir.path(), 1024, false) - .expect("Failed to create cache"); cache .set(key.clone(), value.clone()) .expect("Failed to set value"); // Create multiple reader caches - let mut handles: Vec>> = vec![]; + let mut handles: Vec>>> = vec![]; for _ in 0..3 { let key = key.clone(); let cache_path = temp_dir.path().to_path_buf(); handles.push(thread::spawn(move || { let mut reader_cache = - Cache::new("test".to_string(), &cache_path, 1024, false) + Cache::new("test".to_string(), &cache_path, 1024, true) .expect("Failed to create reader cache"); reader_cache.get(&key) @@ -587,11 +596,13 @@ mod tests { } } - #[rstest] - fn test_duplicate_set(mut cache: Cache) { + #[test] + fn test_duplicate_set() { + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); let key = "dup_key".to_string(); - let value1 = TestValue(vec![1, 2, 3, 4]); - let value2 = TestValue(vec![5, 6, 7, 8]); + let value1 = vec![1, 2, 3, 4]; + let value2 = vec![5, 6, 7, 8]; // First set cache @@ -603,16 +614,17 @@ mod tests { // Should still have first value let retrieved = cache.get(&key).expect("Failed to get value"); - assert_eq!(retrieved.0, value1.0); + assert_eq!(retrieved, value1); } - #[rstest] - fn test_loads_recent_files_first(temp_dir: TempDir) { - let mut cache: Cache = Cache::new( + #[test] + fn test_loads_recent_files_first() { + let temp_dir = create_temp_dir(); + let mut cache: Cache> = Cache::new( "test".to_string(), temp_dir.path(), - 4, // Small limit to force selection - false, + 4, // Small limit to force selection + true, // Enable preloading by default ) .expect("Failed to create cache"); @@ -646,17 +658,20 @@ mod tests { .contains(&"old.txt".to_string())); } - #[rstest] + #[test] #[should_panic(expected = "Capacity can't be zero")] - fn test_zero_capacity(temp_dir: TempDir) { - let _cache: std::result::Result, ArklibError> = - Cache::new("test".to_string(), temp_dir.path(), 0, false); + fn test_zero_capacity() { + let temp_dir = create_temp_dir(); + let _cache: std::result::Result>, ArklibError> = + Cache::new("test".to_string(), temp_dir.path(), 0, true); } - #[rstest] - fn test_memory_tracking(mut cache: Cache) { + #[test] + fn test_memory_tracking() { + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); let key = "track_key".to_string(); - let value = TestValue(vec![1, 2, 3, 4]); // 4 bytes + let value = vec![1, 2, 3, 4]; // 4 bytes cache .set(key.clone(), value) @@ -665,4 +680,6 @@ mod tests { // Memory usage should match file size assert_eq!(cache.current_memory_bytes, 4); } + + // TODO: Add More Test } diff --git a/fs-cache/src/lib.rs b/fs-cache/src/lib.rs index a5c08fd..acd91ad 100644 --- a/fs-cache/src/lib.rs +++ b/fs-cache/src/lib.rs @@ -1 +1,3 @@ -pub mod cache; +mod cache; + +pub use cache::Cache; diff --git a/fs-storage/src/utils.rs b/fs-storage/src/utils.rs index f20d860..26cbece 100644 --- a/fs-storage/src/utils.rs +++ b/fs-storage/src/utils.rs @@ -53,7 +53,10 @@ where Ok(data) } -/// Writes a serializable value to a file and returns the timestamp of the write +/// Writes a serializable value to a file. +/// +/// This function takes a path, a serializable value, and a timestamp. It writes the value to the specified +/// file in a pretty JSON format. The function ensures that the file is flushed and synced after writing. pub fn write_json_file( path: &Path, value: &T, @@ -69,6 +72,10 @@ pub fn write_json_file( Ok(()) } +/// Extracts a key of type K from the given file path. +/// +/// The function can include or exclude the file extension based on the `include_extension` parameter. +/// It returns a Result containing the parsed key or an error if the extraction or parsing fails. pub fn extract_key_from_file_path( label: &str, path: &Path, @@ -106,28 +113,67 @@ where #[cfg(test)] mod tests { use super::*; + use serde_json::json; use std::io::Write; use tempdir::TempDir; /// Test reading a legacy version 2 `FileStorage` file #[test] fn test_read_legacy_fs() { - let temp_dir = TempDir::new("ark-rust").unwrap(); + let temp_dir = TempDir::new("ark-rust") + .expect("Failed to create temporary directory"); let file_path = temp_dir.path().join("test_read_legacy_fs"); let file_content = r#"version: 2 key1:1 key2:2 key3:3 "#; - let mut file = std::fs::File::create(&file_path).unwrap(); - file.write_all(file_content.as_bytes()).unwrap(); + let mut file = std::fs::File::create(&file_path) + .expect("Failed to create test file"); + file.write_all(file_content.as_bytes()) + .expect("Failed to write to test file"); // Read the file and check the data - let data: BTreeMap = - read_version_2_fs(&file_path).unwrap(); + let data: BTreeMap = read_version_2_fs(&file_path) + .expect("Failed to read version 2 file storage"); assert_eq!(data.len(), 3); assert_eq!(data.get("key1"), Some(&1)); assert_eq!(data.get("key2"), Some(&2)); assert_eq!(data.get("key3"), Some(&3)); } + + /// Test writing a JSON file + #[test] + fn test_write_json_file() { + let temp_dir = TempDir::new("ark-rust") + .expect("Failed to create temporary directory"); + let file_path = temp_dir.path().join("test_write_json_file.json"); + let value = json!({"key": "value"}); + + write_json_file(&file_path, &value, SystemTime::now()) + .expect("Failed to write JSON file"); + + let written_content = std::fs::read_to_string(&file_path) + .expect("Failed to read written JSON file"); + let expected_content = serde_json::to_string_pretty(&value) + .expect("Failed to serialize JSON value"); + assert_eq!(written_content, expected_content); + } + + /// Test extracting a key from a file path + #[test] + fn test_extract_key_from_file_path() { + let path_with_extension = Path::new("tmp/foo.txt"); + let path_without_extension = Path::new("tmp/foo"); + + let key_with_extension: String = + extract_key_from_file_path("test", path_with_extension, true) + .expect("Failed to extract key with extension"); + assert_eq!(key_with_extension, "foo.txt"); + + let key_without_extension: String = + extract_key_from_file_path("test", path_without_extension, false) + .expect("Failed to extract key without extension"); + assert_eq!(key_without_extension, "foo"); + } }