From 1e46e4f70fdf40f1fac36c58c403782645bbfe61 Mon Sep 17 00:00:00 2001 From: Robert Bastian Date: Thu, 9 Jan 2025 15:25:24 +0100 Subject: [PATCH] dedupe against script root --- components/datetime/Cargo.toml | 2 + components/datetime/src/pattern/names.rs | 29 +- .../datetime/src/provider/time_zones.rs | 3 + .../datetime/data/locations_v1_marker.rs.data | 565 ++++++++++++------ provider/data/datetime/fingerprints.csv | 411 +++++++------ .../debug/time_zone/locations@1/ar-EG.json | 4 + .../data/debug/time_zone/locations@1/ar.json | 4 + .../data/debug/time_zone/locations@1/bn.json | 4 + .../data/debug/time_zone/locations@1/ccp.json | 11 + .../debug/time_zone/locations@1/en-001.json | 33 +- .../debug/time_zone/locations@1/en-ZA.json | 33 +- .../data/debug/time_zone/locations@1/en.json | 393 +++++++++++- .../debug/time_zone/locations@1/es-AR.json | 15 +- .../data/debug/time_zone/locations@1/es.json | 15 +- .../data/debug/time_zone/locations@1/fil.json | 35 +- .../data/debug/time_zone/locations@1/fr.json | 27 +- .../data/debug/time_zone/locations@1/ja.json | 4 + .../data/debug/time_zone/locations@1/ru.json | 4 + .../debug/time_zone/locations@1/sr-Latn.json | 10 +- .../data/debug/time_zone/locations@1/sr.json | 117 +--- .../data/debug/time_zone/locations@1/th.json | 4 + .../data/debug/time_zone/locations@1/tr.json | 9 +- .../data/debug/time_zone/locations@1/und.json | 393 +----------- provider/source/src/time_zones/convert.rs | 58 +- 24 files changed, 1175 insertions(+), 1008 deletions(-) diff --git a/components/datetime/Cargo.toml b/components/datetime/Cargo.toml index fc81c995966..4082729f186 100644 --- a/components/datetime/Cargo.toml +++ b/components/datetime/Cargo.toml @@ -75,6 +75,7 @@ serde = [ "dep:serde", "icu_calendar/serde", "icu_decimal/serde", + "icu_locale_core/serde", "icu_pattern/serde", "icu_plurals/serde", "icu_provider/serde", @@ -89,6 +90,7 @@ datagen = [ "dep:databake", "dep:litemap", "icu_calendar/datagen", + "icu_locale_core/databake", "icu_pattern/databake", "icu_plurals/datagen", "icu_timezone/datagen", diff --git a/components/datetime/src/pattern/names.rs b/components/datetime/src/pattern/names.rs index fd54b74a086..465c758c413 100644 --- a/components/datetime/src/pattern/names.rs +++ b/components/datetime/src/pattern/names.rs @@ -1912,14 +1912,35 @@ impl RawDateTimeNames { id: DataIdentifierBorrowed::for_locale(&locale), ..Default::default() }; - self.locations_root - .load_put(provider, Default::default(), variables) - .map_err(|e| MaybePayloadError::into_load_error(e, error_field))? - .map_err(|e| PatternLoadError::Data(e, error_field))?; self.locations .load_put(provider, req, variables) .map_err(|e| MaybePayloadError::into_load_error(e, error_field))? .map_err(|e| PatternLoadError::Data(e, error_field))?; + + #[allow(clippy::unwrap_used)] // we just loaded it + let (dedupe_language, dedupe_script) = self + .locations + .get() + .inner + .get_with_variables(variables) + .unwrap() + .dedupe_target; + let dedupe_locale = DataLocale { + language: dedupe_language, + script: dedupe_script, + ..Default::default() + }; + self.locations_root + .load_put( + provider, + DataRequest { + id: DataIdentifierBorrowed::for_locale(&dedupe_locale), + ..Default::default() + }, + variables, + ) + .map_err(|e| MaybePayloadError::into_load_error(e, error_field))? + .map_err(|e| PatternLoadError::Data(e, error_field))?; Ok(()) } diff --git a/components/datetime/src/provider/time_zones.rs b/components/datetime/src/provider/time_zones.rs index fd2612abe23..141f880d40b 100644 --- a/components/datetime/src/provider/time_zones.rs +++ b/components/datetime/src/provider/time_zones.rs @@ -5,6 +5,7 @@ //! Data provider structs for time zones. use alloc::borrow::Cow; +use icu_locale_core::subtags::{Language, Script}; use icu_pattern::{DoublePlaceholderPattern, SinglePlaceholderPattern}; use icu_provider::prelude::*; use tinystr::TinyAsciiStr; @@ -82,6 +83,8 @@ pub struct TimeZoneEssentialsV1<'data> { #[cfg_attr(feature = "serde", derive(serde::Deserialize))] #[yoke(prove_covariance_manually)] pub struct LocationsV1<'data> { + /// The data struct against which the location names are deduplicated. + pub dedupe_target: (Language, Option