From 4856876e455ea8349b038e5d1f2ddffa907d9a2e Mon Sep 17 00:00:00 2001 From: Ishan Bhanuka Date: Sun, 31 Mar 2024 15:20:34 -0400 Subject: [PATCH] #1: Add FileStorage struct (#10) * Add FileStorage logic, example and documentation Co-Authored-by: Ishan Bhanuka Co-Authored-by: Pushkar Mishra Co-Authored-by: Tarek Co-Authored-by: Kirill Taran * refactor done Signed-off-by: Pushkar Mishra * fix cargo.toml Signed-off-by: Pushkar Mishra * Update fs-storage/src/file_storage.rs Co-authored-by: Tarek Elsayed <60650661+tareknaser@users.noreply.github.com> * Update fs-storage/src/file_storage.rs Co-authored-by: Tarek Elsayed <60650661+tareknaser@users.noreply.github.com> * Update fs-storage/src/file_storage.rs Co-authored-by: Tarek Elsayed <60650661+tareknaser@users.noreply.github.com> * Add doc comment for erase * feat(fs-storage): refactor CLI write cmd to accept key-value pairs Signed-off-by: Tarek --------- Signed-off-by: Pushkar Mishra Signed-off-by: Tarek Co-authored-by: Pushkar Mishra Co-authored-by: Tarek Co-authored-by: Kirill Taran Co-authored-by: Tarek Elsayed <60650661+tareknaser@users.noreply.github.com> --- data-error/src/lib.rs | 17 ++- data-resource/src/lib.rs | 1 - fs-storage/Cargo.toml | 11 ++ fs-storage/README.md | 46 +++++++ fs-storage/examples/cli.rs | 115 +++++++++++++++++ fs-storage/src/file_storage.rs | 222 +++++++++++++++++++++++++++++++++ fs-storage/src/lib.rs | 1 + rust-toolchain.toml | 3 + 8 files changed, 414 insertions(+), 2 deletions(-) create mode 100644 fs-storage/README.md create mode 100644 fs-storage/examples/cli.rs create mode 100644 fs-storage/src/file_storage.rs create mode 100644 rust-toolchain.toml diff --git a/data-error/src/lib.rs b/data-error/src/lib.rs index d8a1dd74..1852afe4 100644 --- a/data-error/src/lib.rs +++ b/data-error/src/lib.rs @@ -1,4 +1,4 @@ -use std::str::Utf8Error; +use std::{convert::Infallible, str::Utf8Error}; use thiserror::Error; pub type Result = std::result::Result; @@ -15,6 +15,9 @@ pub enum ArklibError { Parse, #[error("Networking error")] Network, + /// Storage error shows label and error message + #[error("Storage error: {0} {1}")] + Storage(String, String), #[error(transparent)] Other(#[from] anyhow::Error), } @@ -48,3 +51,15 @@ impl From> for ArklibError { Self::Other(anyhow::anyhow!(e.to_string())) } } + +impl From<&str> for ArklibError { + fn from(e: &str) -> Self { + Self::Other(anyhow::anyhow!(e.to_string())) + } +} + +impl From for ArklibError { + fn from(_: Infallible) -> Self { + Self::Parse + } +} diff --git a/data-resource/src/lib.rs b/data-resource/src/lib.rs index 21701636..4d7cfb16 100644 --- a/data-resource/src/lib.rs +++ b/data-resource/src/lib.rs @@ -1,6 +1,5 @@ use anyhow::anyhow; use crc32fast::Hasher; -use log; use serde::{Deserialize, Serialize}; use std::fmt::{self, Display, Formatter}; use std::fs; diff --git a/fs-storage/Cargo.toml b/fs-storage/Cargo.toml index 09fc9687..518eabf5 100644 --- a/fs-storage/Cargo.toml +++ b/fs-storage/Cargo.toml @@ -8,4 +8,15 @@ name = "fs_storage" crate-type = ["rlib"] bench = false +[[example]] +name = "cli" + [dependencies] +data-error = { path = "../data-error" } +log = { version = "0.4.17", features = ["release_max_level_off"] } +serde_json = "1.0.82" +serde = { version = "1.0.138", features = ["derive"] } + +[dev-dependencies] +anyhow = "1.0.81" +tempdir = "0.3.7" diff --git a/fs-storage/README.md b/fs-storage/README.md new file mode 100644 index 00000000..cfd68569 --- /dev/null +++ b/fs-storage/README.md @@ -0,0 +1,46 @@ +# Ark file system storage + +File system storage implementation for writing key value pairs to disk. + +## Steps to use CLI + +- Create a test.json file of key:values pairs you want to store. + +```json +{ + "key1": "value1", + "key2": "value2", + "key3": "value3" +} +``` + +- Run Write Command + +```bash +cargo run --example cli write /tmp/z test.json +``` + +Alternatively, you can directly provide the input data as a comma-separated list of key-value pairs + +```bash +cargo run --example cli write /tmp/z a:1,b:2,c:3 +``` + +- Run Read Command + +```bash +cargo run --example cli read /tmp/z key1,key2 +``` + +- Get Output + +```bash +key1: value1 +key2: value2 +``` + +- To get all key value pairs + +```bash +cargo run --example cli read /tmp/z +``` diff --git a/fs-storage/examples/cli.rs b/fs-storage/examples/cli.rs new file mode 100644 index 00000000..37f3a24a --- /dev/null +++ b/fs-storage/examples/cli.rs @@ -0,0 +1,115 @@ +use anyhow::{Context, Result}; +use fs_storage::file_storage::FileStorage; +use serde_json::Value; +use std::collections::BTreeMap; +use std::env; +use std::fs; +use std::path::Path; + +fn main() { + if let Err(e) = run() { + eprintln!("Error: {}", e); + } +} + +fn run() -> Result<()> { + let args: Vec = env::args().collect(); + if args.len() < 3 { + println!("Usage:"); + println!(" cargo run --example cli write [JSON_FILE_PATH | KEY_VALUE_PAIRS]"); + println!(" cargo run --example cli read "); + return Ok(()); + } + + let command = &args[1]; + let path = &args[2]; + match command.as_str() { + "read" => read_command(&args, path), + "write" => write_command(&args, path), + _ => { + eprintln!("Invalid command. Use 'read' or 'write'."); + Ok(()) + } + } +} + +fn read_command(args: &[String], path: &str) -> Result<()> { + let keys = if args.len() > 3 { + args[3] + .split(',') + .map(|s| s.to_string()) + .collect::>() + } else { + vec![] + }; + + let mut fs = FileStorage::new("cli".to_string(), Path::new(path)); + let map: BTreeMap = + fs.read_file().context("Failed to read file")?; + + if keys.is_empty() { + for (key, value) in map { + println!("{}: {}", key, value); + } + } else { + for key in &keys { + if let Some(value) = map.get(key) { + println!("{}: {}", key, value); + } else { + eprintln!("Key '{}' not found", key); + } + } + } + + Ok(()) +} + +fn write_command(args: &[String], path: &str) -> Result<()> { + if args.len() < 4 { + println!("Usage: cargo run --example cli write [JSON_FILE_PATH | KEY_VALUE_PAIRS]"); + return Ok(()); + } + + let content = &args[3]; + // Check if the content is a JSON file path + let content_json = Path::new(content) + .extension() + .map_or(false, |ext| ext == "json"); + + let mut kv_pairs = BTreeMap::new(); + if content_json { + let content = + fs::read_to_string(content).context("Failed to read JSON file")?; + let json: Value = + serde_json::from_str(&content).context("Failed to parse JSON")?; + if let Value::Object(object) = json { + for (key, value) in object { + if let Value::String(value_str) = value { + kv_pairs.insert(key, value_str); + } else { + println!( + "Warning: Skipping non-string value for key '{}'", + key + ); + } + } + } else { + println!("JSON value is not an object"); + return Ok(()); + } + } else { + let pairs = content.split(','); + for pair in pairs { + let kv: Vec<&str> = pair.split(':').collect(); + if kv.len() == 2 { + kv_pairs.insert(kv[0].to_string(), kv[1].to_string()); + } + } + } + + let mut fs = FileStorage::new("cli".to_string(), Path::new(path)); + fs.write_file(&kv_pairs) + .context("Failed to write file")?; + + Ok(()) +} diff --git a/fs-storage/src/file_storage.rs b/fs-storage/src/file_storage.rs new file mode 100644 index 00000000..965b1693 --- /dev/null +++ b/fs-storage/src/file_storage.rs @@ -0,0 +1,222 @@ +use std::fmt::Debug; +use std::fs::{self, File}; +use std::io::{BufRead, BufReader, BufWriter, Write}; +use std::str::FromStr; +use std::time::SystemTime; +use std::{ + collections::BTreeMap, + path::{Path, PathBuf}, +}; + +use data_error::{ArklibError, Result}; + +const STORAGE_VERSION: i32 = 2; +const STORAGE_VERSION_PREFIX: &str = "version "; + +pub struct FileStorage { + label: String, + path: PathBuf, + timestamp: SystemTime, +} + +impl FileStorage { + /// Create a new file storage with a diagnostic label and file path + pub fn new(label: String, path: &Path) -> Self { + Self { + label, + path: PathBuf::from(path), + timestamp: SystemTime::now(), + } + } + + /// Check if underlying file has been updated + /// + /// This check can be used before reading the file. + pub fn is_file_updated(&self) -> Result { + let file_timestamp = fs::metadata(&self.path)?.modified()?; + Ok(self.timestamp < file_timestamp) + } + + /// Read data from disk + /// + /// Data is read as key value pairs separated by a symbol and stored + /// in a [BTreeMap] with a generic key K and V value. A handler + /// is called on the data after reading it. + pub fn read_file(&mut self) -> Result> + where + K: serde::de::DeserializeOwned + + FromStr + + std::hash::Hash + + std::cmp::Eq + + Debug + + std::cmp::Ord, + V: serde::de::DeserializeOwned + Debug, + ArklibError: From<::Err>, + { + let file = fs::File::open(&self.path)?; + let reader = BufReader::new(file); + let mut lines = reader.lines(); + + let new_timestamp = fs::metadata(&self.path)?.modified()?; + match lines.next() { + Some(header) => { + let header = header?; + self.verify_version(&header)?; + let mut data = String::new(); + for line in lines { + let line = line?; + if line.is_empty() { + continue; + } + data.push_str(&line); + } + let value_by_id = serde_json::from_str(&data)?; + + self.timestamp = new_timestamp; + Ok(value_by_id) + } + None => Err(ArklibError::Storage( + self.label.clone(), + "Storage file is missing header".to_owned(), + )), + } + } + + /// Write data to file + /// + /// Data is a key-value mapping between [ResourceId] and a generic Value + pub fn write_file( + &mut self, + value_by_id: &BTreeMap, + ) -> Result<()> + where + K: serde::Serialize, + V: serde::Serialize, + { + let parent_dir = self.path.parent().ok_or_else(|| { + ArklibError::Storage( + self.label.clone(), + "Failed to get parent directory".to_owned(), + ) + })?; + fs::create_dir_all(parent_dir)?; + let file = File::create(&self.path)?; + let mut writer = BufWriter::new(file); + + writer.write_all( + format!("{}{}\n", STORAGE_VERSION_PREFIX, STORAGE_VERSION) + .as_bytes(), + )?; + + let data = serde_json::to_string(value_by_id)?; + writer.write_all(data.as_bytes())?; + + let new_timestamp = fs::metadata(&self.path)?.modified()?; + if new_timestamp == self.timestamp { + return Err("Timestamp didn't update".into()); + } + self.timestamp = new_timestamp; + + log::info!( + "{} {} entries have been written", + self.label, + value_by_id.len() + ); + Ok(()) + } + + /// Remove file at stored path + pub fn erase(&self) -> Result<()> { + fs::remove_file(&self.path).map_err(|err| { + ArklibError::Storage(self.label.clone(), err.to_string()) + }) + } + + /// Verify the version stored in the file header + fn verify_version(&self, header: &str) -> Result<()> { + if !header.starts_with(STORAGE_VERSION_PREFIX) { + return Err(ArklibError::Storage( + self.label.clone(), + "Unknown storage version prefix".to_owned(), + )); + } + + let version = header[STORAGE_VERSION_PREFIX.len()..] + .parse::() + .map_err(|_err| { + ArklibError::Storage( + self.label.clone(), + "Failed to parse storage version".to_owned(), + ) + })?; + + if version != STORAGE_VERSION { + return Err(ArklibError::Storage( + self.label.clone(), + format!( + "Storage version mismatch: expected {}, found {}", + STORAGE_VERSION, version + ), + )); + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::collections::BTreeMap; + use tempdir::TempDir; + + use crate::file_storage::FileStorage; + + #[test] + fn test_file_storage_write_read() { + let temp_dir = + TempDir::new("tmp").expect("Failed to create temporary directory"); + let storage_path = temp_dir.path().join("test_storage.txt"); + + let mut file_storage = + FileStorage::new("TestStorage".to_string(), &storage_path); + + let mut data_to_write = BTreeMap::new(); + data_to_write.insert("key1".to_string(), "value1".to_string()); + data_to_write.insert("key2".to_string(), "value2".to_string()); + + file_storage + .write_file(&data_to_write) + .expect("Failed to write data to disk"); + + let data_read: BTreeMap<_, _> = file_storage + .read_file() + .expect("Failed to read data from disk"); + + assert_eq!(data_read, data_to_write); + } + + #[test] + fn test_file_storage_auto_delete() { + let temp_dir = + TempDir::new("tmp").expect("Failed to create temporary directory"); + let storage_path = temp_dir.path().join("test_storage.txt"); + + let mut file_storage = + FileStorage::new("TestStorage".to_string(), &storage_path); + + let mut data_to_write = BTreeMap::new(); + data_to_write.insert("key1".to_string(), "value1".to_string()); + data_to_write.insert("key2".to_string(), "value2".to_string()); + + file_storage + .write_file(&data_to_write) + .expect("Failed to write data to disk"); + + assert_eq!(storage_path.exists(), true); + + if let Err(err) = file_storage.erase() { + panic!("Failed to delete file: {:?}", err); + } + assert_eq!(storage_path.exists(), false); + } +} diff --git a/fs-storage/src/lib.rs b/fs-storage/src/lib.rs index 6e7af127..adeb750b 100644 --- a/fs-storage/src/lib.rs +++ b/fs-storage/src/lib.rs @@ -1,3 +1,4 @@ +pub mod file_storage; pub const ARK_FOLDER: &str = ".ark"; // Should not be lost if possible diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 00000000..6a8344b5 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +version = "1.75.0" +channel = "stable"