diff --git a/Cargo.toml b/Cargo.toml index 798b2ecc..1a75dffd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,16 +1,25 @@ [workspace] members = [ + "data-error", + "data-json", + "data-link", + "data-pdf", "data-resource", "fs-atomic-versions", + "fs-atomic-light", "fs-index", - "fs-utils" + "fs-storage", ] default-members = [ + "data-error", + "data-json", + "data-link", + "data-pdf", "data-resource", "fs-atomic-versions", "fs-index", - "fs-utils", + "fs-storage", ] resolver = "2" diff --git a/README.md b/README.md index d28f03c0..2983c98c 100644 --- a/README.md +++ b/README.md @@ -12,13 +12,15 @@ The purpose of the library is to manage _resource index_ of folders with various
-| Package | Description | -| -------------------- | ------------------------------------------ | -| `data-resource` | Resource hashing and ID construction | -| `fs-index` | Resource Index construction and updating | -| `fs-atomic-light` | Temp file-based preventing of dirty writes | -| `fs-atomic-versions` | Version-based preventing of dirty writes | -| `fs-utils` | Utility functions and common code | +| Package | Description | +| --------------- | ---------------------------------------- | +| `data-resource` | Resource hashing and ID construction | +| `fs-index` | Resource Index construction and updating | +| `fs-storage` | Filesystem storage for resources | +| `data-link` | Linking resources | +| `data-pdf` | PDF handling | +| `data-error` | Error handling | +| `data-json` | JSON serialization and deserialization |
diff --git a/fs-utils/Cargo.toml b/data-error/Cargo.toml similarity index 71% rename from fs-utils/Cargo.toml rename to data-error/Cargo.toml index 9bdfa207..b30cb2a6 100644 --- a/fs-utils/Cargo.toml +++ b/data-error/Cargo.toml @@ -1,16 +1,11 @@ [package] -name = "fs-utils" +name = "data-error" version = "0.1.0" edition = "2021" - [dependencies] thiserror = "1" reqwest = "0.11.11" serde_json = "1.0.82" anyhow = "1" url = { version = "2.2.2", features = ["serde"] } - -[dev-dependencies] -rstest = "0.18" -tempdir = "0.3" diff --git a/fs-utils/src/errors.rs b/data-error/src/lib.rs similarity index 99% rename from fs-utils/src/errors.rs rename to data-error/src/lib.rs index c12c6092..d8a1dd74 100644 --- a/fs-utils/src/errors.rs +++ b/data-error/src/lib.rs @@ -1,5 +1,4 @@ use std::str::Utf8Error; - use thiserror::Error; pub type Result = std::result::Result; diff --git a/data-json/Cargo.toml b/data-json/Cargo.toml new file mode 100644 index 00000000..b56702df --- /dev/null +++ b/data-json/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "data-json" +version = "0.1.0" +edition = "2021" + + +[dependencies] +serde_json = "1.0.82" + +[dev-dependencies] +rstest = "0.18" diff --git a/fs-utils/src/json.rs b/data-json/src/lib.rs similarity index 100% rename from fs-utils/src/json.rs rename to data-json/src/lib.rs diff --git a/data-link/Cargo.toml b/data-link/Cargo.toml new file mode 100644 index 00000000..e10cae11 --- /dev/null +++ b/data-link/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "data-link" +version = "0.1.0" +edition = "2021" + +[lib] +name = "data_link" +crate-type = ["rlib"] +bench = false + +[dependencies] +fs-atomic-light = { path = "../fs-atomic-light" } +fs-atomic-versions = { path = "../fs-atomic-versions" } +fs-storage = { path = "../fs-storage" } +data-resource = { path = "../data-resource" } +data-error = { path = "../data-error" } + +log = { version = "0.4.17", features = ["release_max_level_off"] } +serde_json = "1.0.82" +serde = { version = "1.0.138", features = ["derive"] } +url = { version = "2.2.2", features = ["serde"] } +reqwest = "0.11.11" +scraper = "0.13.0" +tokio = { version = "1", features = ["full"] } + +[dev-dependencies] +tempdir = "0.3.7" diff --git a/fs-index/src/link.rs b/data-link/src/lib.rs similarity index 93% rename from fs-index/src/link.rs rename to data-link/src/lib.rs index ea5dc738..b02391d8 100644 --- a/fs-index/src/link.rs +++ b/data-link/src/lib.rs @@ -1,10 +1,12 @@ -use crate::storage::meta::store_metadata; -use crate::storage::prop::store_properties; -use crate::{ - storage::prop::load_raw_properties, AtomicFile, Result, ARK_FOLDER, - PREVIEWS_STORAGE_FOLDER, PROPERTIES_STORAGE_FOLDER, -}; +use data_error::Result; use data_resource::ResourceId; +use fs_atomic_versions::atomic::AtomicFile; +use fs_storage::meta::store_metadata; +use fs_storage::prop::load_raw_properties; +use fs_storage::prop::store_properties; +use fs_storage::{ + ARK_FOLDER, PREVIEWS_STORAGE_FOLDER, PROPERTIES_STORAGE_FOLDER, +}; use reqwest::header::HeaderValue; use scraper::{Html, Selector}; use serde::{Deserialize, Serialize}; @@ -25,20 +27,6 @@ pub struct Properties { pub title: String, pub desc: Option, } -/// Write data to a tempory file and move that written file to destination -/// -/// May failed if writing or moving failed -fn temp_and_move( - data: &[u8], - dest_dir: impl AsRef, - filename: &str, -) -> Result<()> { - let mut path = std::env::temp_dir(); - path.push(filename); - std::fs::write(&path, data)?; - std::fs::copy(path, dest_dir.as_ref().join(filename))?; - Ok(()) -} impl Link { pub fn new(url: Url, title: String, desc: Option) -> Self { @@ -62,6 +50,7 @@ impl Link { .join(PROPERTIES_STORAGE_FOLDER) .join(id.to_string()); let file = AtomicFile::new(path)?; + let current = file.load()?; let data = current.read_to_string()?; let user_meta: Properties = serde_json::from_str(&data)?; @@ -103,7 +92,7 @@ impl Link { // Resources are stored in the folder chosen by user let bytes = self.url.as_str().as_bytes(); - temp_and_move(bytes, root.as_ref(), &id_string)?; + fs_atomic_light::temp_and_move(bytes, root.as_ref(), &id_string)?; //User defined properties store_properties(&root, id, &self.prop)?; @@ -194,6 +183,7 @@ fn select_og(html: &Html, tag: OpenGraphTag) -> Option { None } + fn select_desc(html: &Html) -> Option { let selector = Selector::parse("meta[name=\"description\"]").unwrap(); @@ -205,6 +195,7 @@ fn select_desc(html: &Html) -> Option { None } + fn select_title(html: &Html) -> Option { let selector = Selector::parse("title").unwrap(); if let Some(element) = html.select(&selector).next() { @@ -213,6 +204,7 @@ fn select_title(html: &Html) -> Option { None } + #[derive(Debug, Serialize, Deserialize, Default, Clone)] pub struct OpenGraph { /// Represents the "og:title" OpenGraph meta tag. @@ -234,6 +226,7 @@ pub struct OpenGraph { /// Represents the "og:locale" OpenGraph meta tag locale: Option, } + impl OpenGraph { pub async fn fetch_image(&self) -> Option> { if let Some(url) = &self.image { diff --git a/data-pdf/Cargo.toml b/data-pdf/Cargo.toml new file mode 100644 index 00000000..4d846bfc --- /dev/null +++ b/data-pdf/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "data-pdf" +version = "0.1.0" +edition = "2021" + +[lib] +name = "data_pdf" +crate-type = ["rlib"] +bench = false + +[dependencies] +once_cell = "1.16.0" +image = "0.24.2" +pdfium-render = { git = "https://github.com/ajrcarey/pdfium-render", rev = "d2559c1", features = [ + "thread_safe", + "sync", +] } + +[dev-dependencies] +tempdir = "0.3.7" + +[build-dependencies] +flate2 = "1.0.24" +fs_extra = "1.2.0" +tar = "0.4.38" +target-lexicon = "0.12.4" +ureq = "2.4.0" +ring = "=0.17.5" diff --git a/fs-index/build.rs b/data-pdf/build.rs similarity index 100% rename from fs-index/build.rs rename to data-pdf/build.rs diff --git a/fs-index/src/pdf.rs b/data-pdf/src/lib.rs similarity index 100% rename from fs-index/src/pdf.rs rename to data-pdf/src/lib.rs diff --git a/data-resource/Cargo.toml b/data-resource/Cargo.toml index 87b7c5d3..fd7399bb 100644 --- a/data-resource/Cargo.toml +++ b/data-resource/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -fs-utils = { path = "../fs-utils" } +data-error = { path = "../data-error" } fs-atomic-versions = { path = "../fs-atomic-versions" } log = { version = "0.4.17", features = ["release_max_level_off"] } diff --git a/data-resource/src/lib.rs b/data-resource/src/lib.rs index 8a36f074..21701636 100644 --- a/data-resource/src/lib.rs +++ b/data-resource/src/lib.rs @@ -9,7 +9,7 @@ use std::io::{BufRead, BufReader}; use std::path::Path; use std::str::FromStr; -use fs_utils::errors::{ArklibError, Result}; +use data_error::{ArklibError, Result}; #[derive( Eq, diff --git a/fs-atomic-light/Cargo.toml b/fs-atomic-light/Cargo.toml new file mode 100644 index 00000000..5c15bd54 --- /dev/null +++ b/fs-atomic-light/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "fs-atomic-light" +version = "0.1.0" +edition = "2021" + +[lib] +name = "fs_atomic_light" +crate-type = ["rlib"] +bench = false + +[dependencies] +data-error = { path = "../data-error" } diff --git a/fs-atomic-light/src/lib.rs b/fs-atomic-light/src/lib.rs new file mode 100644 index 00000000..25288f6d --- /dev/null +++ b/fs-atomic-light/src/lib.rs @@ -0,0 +1,23 @@ +use data_error::Result; + +use std::env; +use std::fs; +use std::path::Path; +use std::str; + +/// Write data to a tempory file and move that written file to destination +/// +/// May failed if writing or moving failed +pub fn temp_and_move( + data: &[u8], + dest_dir: impl AsRef, + filename: &str, +) -> Result<()> { + let mut path = env::temp_dir(); + path.push(filename); + + fs::write(&path, data)?; + fs::copy(path, dest_dir.as_ref().join(filename))?; + + Ok(()) +} diff --git a/fs-atomic-versions/Cargo.toml b/fs-atomic-versions/Cargo.toml index f6a5af43..70372063 100644 --- a/fs-atomic-versions/Cargo.toml +++ b/fs-atomic-versions/Cargo.toml @@ -9,7 +9,7 @@ crate-type = ["rlib"] bench = false [dependencies] -fs-utils = { path = "../fs-utils" } +data-error = { path = "../data-error" } log = { version = "0.4.17", features = ["release_max_level_off"] } anyhow = "1.0.58" diff --git a/fs-atomic-versions/src/app_id.rs b/fs-atomic-versions/src/app_id.rs index 4b76749a..d8aaa376 100644 --- a/fs-atomic-versions/src/app_id.rs +++ b/fs-atomic-versions/src/app_id.rs @@ -2,7 +2,7 @@ use std::{fs, path::Path}; use anyhow::anyhow; -use fs_utils::errors::{ArklibError, Result}; +use data_error::{ArklibError, Result}; use crate::{APP_ID_FILE, APP_ID_PATH}; diff --git a/fs-atomic-versions/src/atomic/file.rs b/fs-atomic-versions/src/atomic/file.rs index b82545af..bd3f2571 100644 --- a/fs-atomic-versions/src/atomic/file.rs +++ b/fs-atomic-versions/src/atomic/file.rs @@ -103,7 +103,7 @@ fn parse_version(filename: Option<&str>) -> Option { } impl AtomicFile { - pub fn new(path: impl Into) -> fs_utils::errors::Result { + pub fn new(path: impl Into) -> data_error::Result { let directory = path.into(); // This UID must be treated as confidential information. // Depending on network transport used to sync the files (if any), diff --git a/fs-index/Cargo.toml b/fs-index/Cargo.toml index 24b2d33f..b7284d44 100644 --- a/fs-index/Cargo.toml +++ b/fs-index/Cargo.toml @@ -9,8 +9,9 @@ crate-type = ["rlib"] bench = false [dependencies] -fs-utils = { path = "../fs-utils" } +data-error = { path = "../data-error" } fs-atomic-versions = { path = "../fs-atomic-versions" } +fs-storage = { path = "../fs-storage" } data-resource = { path = "../data-resource" } log = { version = "0.4.17", features = ["release_max_level_off"] } @@ -19,20 +20,7 @@ anyhow = "1.0.58" lazy_static = "1.4.0" canonical-path = "2.0.2" pathdiff = "0.2.1" -serde_json = "1.0.82" -serde = { version = "1.0.138", features = ["derive"] } -url = { version = "2.2.2", features = ["serde"] } -reqwest = "0.11.11" -scraper = "0.13.0" -tokio = { version = "1", features = ["full"] } itertools = "0.10.5" -once_cell = "1.16.0" -image = "0.24.2" -pdfium-render = { git = "https://github.com/ajrcarey/pdfium-render", rev = "d2559c1", features = [ - "thread_safe", - "sync", -] } - [dev-dependencies] uuid = { version = "1.6.1", features = ["v4"] } @@ -43,14 +31,6 @@ criterion = { version = "0.5", features = ["html_reports"] } pprof = { version = "0.13", features = ["criterion", "flamegraph"] } rand = "0.8" -[build-dependencies] -flate2 = "1.0.24" -fs_extra = "1.2.0" -tar = "0.4.38" -target-lexicon = "0.12.4" -ureq = "2.4.0" -ring = "=0.17.5" - [[bench]] name = "index_build_benchmark" harness = false diff --git a/fs-index/src/lib.rs b/fs-index/src/lib.rs index ef174829..4259bdd1 100644 --- a/fs-index/src/lib.rs +++ b/fs-index/src/lib.rs @@ -3,15 +3,12 @@ extern crate lazy_static; extern crate canonical_path; -use fs_utils::errors::{ArklibError, Result}; +use data_error::{ArklibError, Result}; pub mod index; -pub mod link; -pub mod pdf; - -mod storage; pub use fs_atomic_versions::atomic::{modify, modify_json, AtomicFile}; +pub use fs_storage::{ARK_FOLDER, INDEX_PATH}; use index::ResourceIndex; @@ -21,23 +18,6 @@ use std::sync::{Arc, RwLock}; use canonical_path::CanonicalPathBuf; -pub const ARK_FOLDER: &str = ".ark"; - -// Should not be lost if possible -pub const STATS_FOLDER: &str = "stats"; -pub const FAVORITES_FILE: &str = "favorites"; - -// User-defined data -pub const TAG_STORAGE_FILE: &str = "user/tags"; -pub const SCORE_STORAGE_FILE: &str = "user/scores"; -pub const PROPERTIES_STORAGE_FOLDER: &str = "user/properties"; - -// Generated data -pub const INDEX_PATH: &str = "index"; -pub const METADATA_STORAGE_FOLDER: &str = "cache/metadata"; -pub const PREVIEWS_STORAGE_FOLDER: &str = "cache/previews"; -pub const THUMBNAILS_STORAGE_FOLDER: &str = "cache/thumbnails"; - pub type ResourceIndexLock = Arc>; lazy_static! { diff --git a/fs-index/src/storage/mod.rs b/fs-index/src/storage/mod.rs deleted file mode 100644 index 8b08441b..00000000 --- a/fs-index/src/storage/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod meta; -pub mod prop; diff --git a/fs-storage/Cargo.toml b/fs-storage/Cargo.toml new file mode 100644 index 00000000..146e6c98 --- /dev/null +++ b/fs-storage/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "fs-storage" +version = "0.1.0" +edition = "2021" + + +[dependencies] +data-error = { path = "../data-error" } +data-json = { path = "../data-json" } +fs-atomic-versions = { path = "../fs-atomic-versions" } +data-resource = { path = "../data-resource" } + +serde_json = "1.0.82" +serde = { version = "1.0.138", features = ["derive"] } + +[dev-dependencies] +log = { version = "0.4.17", features = ["release_max_level_off"] } +tempdir = "0.3.7" diff --git a/fs-storage/src/lib.rs b/fs-storage/src/lib.rs new file mode 100644 index 00000000..90cb61a5 --- /dev/null +++ b/fs-storage/src/lib.rs @@ -0,0 +1,19 @@ +pub mod meta; +pub mod prop; + +pub const ARK_FOLDER: &str = ".ark"; + +// Should not be lost if possible +pub const STATS_FOLDER: &str = "stats"; +pub const FAVORITES_FILE: &str = "favorites"; + +// User-defined data +pub const TAG_STORAGE_FILE: &str = "user/tags"; +pub const SCORE_STORAGE_FILE: &str = "user/scores"; +pub const PROPERTIES_STORAGE_FOLDER: &str = "user/properties"; + +// Generated data +pub const INDEX_PATH: &str = "index"; +pub const METADATA_STORAGE_FOLDER: &str = "cache/metadata"; +pub const PREVIEWS_STORAGE_FOLDER: &str = "cache/previews"; +pub const THUMBNAILS_STORAGE_FOLDER: &str = "cache/thumbnails"; diff --git a/fs-index/src/storage/meta.rs b/fs-storage/src/meta.rs similarity index 97% rename from fs-index/src/storage/meta.rs rename to fs-storage/src/meta.rs index 8591bf5a..5ada9b72 100644 --- a/fs-index/src/storage/meta.rs +++ b/fs-storage/src/meta.rs @@ -1,10 +1,11 @@ +use data_error::Result; use fs_atomic_versions::atomic::{modify_json, AtomicFile}; use serde::{de::DeserializeOwned, Serialize}; use std::fmt::Debug; use std::io::Read; use std::path::Path; -use crate::{Result, ARK_FOLDER, METADATA_STORAGE_FOLDER}; +use crate::{ARK_FOLDER, METADATA_STORAGE_FOLDER}; use data_resource::ResourceId; pub fn store_metadata< diff --git a/fs-index/src/storage/prop.rs b/fs-storage/src/prop.rs similarity index 96% rename from fs-index/src/storage/prop.rs rename to fs-storage/src/prop.rs index 43654075..e09cd2c9 100644 --- a/fs-index/src/storage/prop.rs +++ b/fs-storage/src/prop.rs @@ -1,12 +1,13 @@ +use data_error::Result; +use data_json::merge; use fs_atomic_versions::atomic::{modify_json, AtomicFile}; -use fs_utils::json::merge; use serde::{de::DeserializeOwned, Serialize}; use serde_json::Value; use std::fmt::Debug; use std::io::Read; use std::path::Path; -use crate::{Result, ARK_FOLDER, PROPERTIES_STORAGE_FOLDER}; +use crate::{ARK_FOLDER, PROPERTIES_STORAGE_FOLDER}; use data_resource::ResourceId; pub fn store_properties< diff --git a/fs-utils/src/lib.rs b/fs-utils/src/lib.rs deleted file mode 100644 index 27d54a24..00000000 --- a/fs-utils/src/lib.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod errors; -pub mod json;