diff --git a/components/collator/src/comparison.rs b/components/collator/src/comparison.rs index cdfb4a36595..639570360bb 100644 --- a/components/collator/src/comparison.rs +++ b/components/collator/src/comparison.rs @@ -844,6 +844,87 @@ impl CollatorBorrowed<'static> { }) } + /// This creates a root collator using baked data only. + /// + /// ✨ *Enabled with the `unstable` Cargo feature.* + #[cfg(feature = "compiled_data")] + #[cfg(feature = "unstable")] + pub const fn new_root() -> Self { + const _: () = assert!( + crate::provider::Baked::SINGLETON_COLLATION_JAMO_V1 + .ce32s + .as_slice() + .len() + == JAMO_COUNT + ); + const _: () = assert!( + crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1 + .last_primaries + .as_slice() + .len() + > (MaxVariable::Currency as usize) + ); + + Self { + special_primaries: const { + &CollationSpecialPrimariesValidated { + last_primaries: zerovec::ZeroSlice::from_ule_slice( + crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1 + .last_primaries + .as_slice() + .as_ule_slice() + .split_at(MaxVariable::Currency as usize) + .0, + ) + .as_zerovec(), + numeric_primary: + crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1 + .numeric_primary, + compressible_bytes: { + const C: &[::ULE] = + crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1 + .last_primaries + .as_slice() + .as_ule_slice(); + if C.len() == MaxVariable::Currency as usize + 16 { + let i = MaxVariable::Currency as usize; + #[allow(clippy::indexing_slicing)] + &[ + C[i], + C[i + 1], + C[i + 2], + C[i + 3], + C[i + 4], + C[i + 5], + C[i + 6], + C[i + 7], + C[i + 8], + C[i + 9], + C[i + 10], + C[i + 11], + C[i + 12], + C[i + 13], + C[i + 14], + C[i + 15], + ] + } else { + CollationSpecialPrimariesValidated::HARDCODED_COMPRESSIBLE_BYTES_FALLBACK + } + }, + } + }, + root: crate::provider::Baked::SINGLETON_COLLATION_ROOT_V1, + tailoring: None, + jamo: crate::provider::Baked::SINGLETON_COLLATION_JAMO_V1, + options: CollatorOptionsBitField::default(), + diacritics: crate::provider::Baked::COLLATION_DIACRITICS_V1_UND, + reordering: None, + decompositions: icu_normalizer::provider::Baked::SINGLETON_NORMALIZER_NFD_DATA_V1, + tables: icu_normalizer::provider::Baked::SINGLETON_NORMALIZER_NFD_TABLES_V1, + lithuanian_dot_above: false, + } + } + /// Cheaply converts a [`CollatorBorrowed<'static>`] into a [`Collator`]. /// /// Note: Due to branching and indirection, using [`Collator`] might inhibit some diff --git a/components/collator/src/provider.rs b/components/collator/src/provider.rs index bae1d632d3f..ac5c3454342 100644 --- a/components/collator/src/provider.rs +++ b/components/collator/src/provider.rs @@ -51,6 +51,7 @@ pub struct Baked; #[cfg(feature = "compiled_data")] #[allow(unused_imports)] +#[allow(missing_docs)] const _: () = { use icu_collator_data::*; pub mod icu { @@ -98,6 +99,8 @@ icu_provider::data_marker!( fallback_config = SCRIPT_FALLBACK, #[cfg(feature = "datagen")] attributes_domain = "collator", + #[cfg(feature = "datagen")] + expose_baked_consts = true, ); icu_provider::data_marker!( /// Data marker for collation jamo data. diff --git a/ffi/capi/tests/missing_apis.txt b/ffi/capi/tests/missing_apis.txt index 23b595fbf31..41af8b2d3a4 100644 --- a/ffi/capi/tests/missing_apis.txt +++ b/ffi/capi/tests/missing_apis.txt @@ -36,6 +36,7 @@ icu::collator::CollationKeySink::write#FnInTrait icu::collator::CollationKeySink::write_byte#FnInTrait icu::collator::CollatorBorrowed::compare_latin1#FnInStruct icu::collator::CollatorBorrowed::compare_latin1_utf16#FnInStruct +icu::collator::CollatorBorrowed::new_root#FnInStruct icu::collator::CollatorBorrowed::write_sort_key_to#FnInStruct icu::collator::CollatorBorrowed::write_sort_key_utf16_to#FnInStruct icu::collator::CollatorBorrowed::write_sort_key_utf8_to#FnInStruct diff --git a/provider/data/collator/data/collation_diacritics_v1.rs.data b/provider/data/collator/data/collation_diacritics_v1.rs.data index 21cdc1f7dd5..4d2d79ce039 100644 --- a/provider/data/collator/data/collation_diacritics_v1.rs.data +++ b/provider/data/collator/data/collation_diacritics_v1.rs.data @@ -22,11 +22,15 @@ macro_rules! __impl_collation_diacritics_v1 { const _: () = <$provider>::MUST_USE_MAKE_PROVIDER_MACRO; #[clippy::msrv = "1.86"] impl $provider { - const DATA_COLLATION_DIACRITICS_V1: icu_provider::baked::zerotrie::Data = { - const TRIE: icu_provider::baked::zerotrie::ZeroTrieSimpleAscii<&'static [u8]> = icu_provider::baked::zerotrie::ZeroTrieSimpleAscii { store: b"\xC3euv\x02\x05e\x80nd\x81i\x82\x1Etrad\x82" }; - const VALUES: &'static [::DataStruct] = &[icu::collator::provider::CollationDiacritics { secondaries: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\0\x8A\0\x88\0\x91\0\x9A\0\xA4\0\xB4\0\x8C\0\x9C\0\x96\0\xB6\0\x92\0\x98\0\x90\0\xA6\0\xA6\0\xB8\0\xBA\0\xBC\0\xA6\0\x84\0\x86\0\xA6\0\xA8\0\xA8\0\xA8\0\xA8\0\xA6\0\xBE\0\xA8\0\xA8\0\xA8\0\xA8\0\xA8\0\xC0\0\xC2\0\xC4\0\xC6\0\xC8\0\xCA\0\xA0\0\xA2\0\xA8\0\xA8\0\xA8\0\xA8\0\xCC\0\xCE\0\xA8\0\xD0\0\xD2\0\x82\0\xA8\0\xD4\0\xB2\0\xAA\0\xAA\0\x9E\0\xD6\0\xA8\0\xA8\0\xA8\0\xA6\0\xA6\0\xA6\0\0\0\0\0\x94\0\0\0\0\0\xD8\0\xA6\0\xA8\0\xA8\0\xA8\0\xA6\0\xA6\0\xA6\0\xA8\0\xA8") } }, icu::collator::provider::CollationDiacritics { secondaries: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\0\x8A\0\x88\0\x8E\0\x9A\0\xA4\0\xB4\0\x8C\0\x9C\0\x96\0\xB6\0\x92\0\x98\0\x90\0\xA6\0\xA6\0\xB8\0\xBA\0\xBC\0\xA6\0\x84\0\x86\0\xA6\0\xA8\0\xA8\0\xA8\0\xA8\0\xA6\0\xBE\0\xA8\0\xA8\0\xA8\0\xA8\0\xA8\0\xC0\0\xC2\0\xC4\0\xC6\0\xC8\0\xCA\0\xA0\0\xA2\0\xA8\0\xA8\0\xA8\0\xA8\0\xCC\0\xCE\0\xA8\0\xD0\0\xD2\0\x82\0\xA8\0\xD4\0\xB2\0\xAA\0\xAA\0\x9E\0\xD6\0\xA8\0\xA8\0\xA8\0\xA6\0\xA6\0\xA6\0\0\0\0\0\x94\0\0\0\0\0\xD8\0\xA6\0\xA8\0\xA8\0\xA8\0\xA6\0\xA6\0\xA6\0\xA8\0\xA8") } }, icu::collator::provider::CollationDiacritics { secondaries: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\0\x8A\x04\x8B\0\x8E\x03\x8B\0\xA4\0\xB4\0\x8C\0\x9C\0\x96\x02\x8B\0\x92\0\x98\0\x90\0\xA6\0\xA6\0\xB8\0\xBA\0\xBC\0\xA6\0\x84\0\x86\0\xA6\0\xA8\0\xA8\0\xA8\0\xA8\0\xA6\0\xBE\0\xA8\0\xA8\0\xA8\0\xA8\0\xA8\0\xC0\0\xC2\x05\x8B\0\xC6\0\xC8\0\xCA\0\xA0\0\xA2\0\xA8\0\xA8\0\xA8\0\xA8\0\xCC\0\xCE\0\xA8\0\xD0\0\xD2\0\x82\0\xA8\0\xD4\0\xB2\0\xAA\0\xAA\0\x9E\0\xD6\0\xA8\0\xA8\0\xA8\0\xA6\0\xA6\0\xA6\0\0\0\0\0\x94\0\0\0\0\0\xD8\0\xA6\0\xA8\0\xA8\0\xA8\0\xA6\0\xA6\0\xA6\0\xA8\0\xA8") } }]; - unsafe { icu_provider::baked::zerotrie::Data::from_trie_and_values_unchecked(TRIE, VALUES) } - }; + #[doc(hidden)] + pub const COLLATION_DIACRITICS_V1_EE: &::DataStruct = &icu::collator::provider::CollationDiacritics { secondaries: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\0\x8A\0\x88\0\x91\0\x9A\0\xA4\0\xB4\0\x8C\0\x9C\0\x96\0\xB6\0\x92\0\x98\0\x90\0\xA6\0\xA6\0\xB8\0\xBA\0\xBC\0\xA6\0\x84\0\x86\0\xA6\0\xA8\0\xA8\0\xA8\0\xA8\0\xA6\0\xBE\0\xA8\0\xA8\0\xA8\0\xA8\0\xA8\0\xC0\0\xC2\0\xC4\0\xC6\0\xC8\0\xCA\0\xA0\0\xA2\0\xA8\0\xA8\0\xA8\0\xA8\0\xCC\0\xCE\0\xA8\0\xD0\0\xD2\0\x82\0\xA8\0\xD4\0\xB2\0\xAA\0\xAA\0\x9E\0\xD6\0\xA8\0\xA8\0\xA8\0\xA6\0\xA6\0\xA6\0\0\0\0\0\x94\0\0\0\0\0\xD8\0\xA6\0\xA8\0\xA8\0\xA8\0\xA6\0\xA6\0\xA6\0\xA8\0\xA8") } }; + #[doc(hidden)] + pub const COLLATION_DIACRITICS_V1_UND: &::DataStruct = &icu::collator::provider::CollationDiacritics { secondaries: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\0\x8A\0\x88\0\x8E\0\x9A\0\xA4\0\xB4\0\x8C\0\x9C\0\x96\0\xB6\0\x92\0\x98\0\x90\0\xA6\0\xA6\0\xB8\0\xBA\0\xBC\0\xA6\0\x84\0\x86\0\xA6\0\xA8\0\xA8\0\xA8\0\xA8\0\xA6\0\xBE\0\xA8\0\xA8\0\xA8\0\xA8\0\xA8\0\xC0\0\xC2\0\xC4\0\xC6\0\xC8\0\xCA\0\xA0\0\xA2\0\xA8\0\xA8\0\xA8\0\xA8\0\xCC\0\xCE\0\xA8\0\xD0\0\xD2\0\x82\0\xA8\0\xD4\0\xB2\0\xAA\0\xAA\0\x9E\0\xD6\0\xA8\0\xA8\0\xA8\0\xA6\0\xA6\0\xA6\0\0\0\0\0\x94\0\0\0\0\0\xD8\0\xA6\0\xA8\0\xA8\0\xA8\0\xA6\0\xA6\0\xA6\0\xA8\0\xA8") } }; + #[doc(hidden)] + pub const COLLATION_DIACRITICS_V1_VI: &::DataStruct = &icu::collator::provider::CollationDiacritics { secondaries: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\0\x8A\x04\x8B\0\x8E\x03\x8B\0\xA4\0\xB4\0\x8C\0\x9C\0\x96\x02\x8B\0\x92\0\x98\0\x90\0\xA6\0\xA6\0\xB8\0\xBA\0\xBC\0\xA6\0\x84\0\x86\0\xA6\0\xA8\0\xA8\0\xA8\0\xA8\0\xA6\0\xBE\0\xA8\0\xA8\0\xA8\0\xA8\0\xA8\0\xC0\0\xC2\x05\x8B\0\xC6\0\xC8\0\xCA\0\xA0\0\xA2\0\xA8\0\xA8\0\xA8\0\xA8\0\xCC\0\xCE\0\xA8\0\xD0\0\xD2\0\x82\0\xA8\0\xD4\0\xB2\0\xAA\0\xAA\0\x9E\0\xD6\0\xA8\0\xA8\0\xA8\0\xA6\0\xA6\0\xA6\0\0\0\0\0\x94\0\0\0\0\0\xD8\0\xA6\0\xA8\0\xA8\0\xA8\0\xA6\0\xA6\0\xA6\0\xA8\0\xA8") } }; + #[doc(hidden)] + pub const COLLATION_DIACRITICS_V1_VI_TRAD: &::DataStruct = Self::COLLATION_DIACRITICS_V1_VI; + const DATA_COLLATION_DIACRITICS_V1: icu_provider::baked::zerotrie::DataRef = unsafe { icu_provider::baked::zerotrie::DataRef::from_trie_and_refs_unchecked(icu_provider::baked::zerotrie::ZeroTrieSimpleAscii { store: b"\xC3euv\x02\x05e\x80nd\x81i\x82\x1Etrad\x82" }, &[Self::COLLATION_DIACRITICS_V1_EE, Self::COLLATION_DIACRITICS_V1_UND, Self::COLLATION_DIACRITICS_V1_VI]) }; } #[clippy::msrv = "1.86"] impl icu_provider::DataProvider for $provider { diff --git a/provider/data/collator/stubdata/collation_diacritics_v1.rs.data b/provider/data/collator/stubdata/collation_diacritics_v1.rs.data index 6fdd54b007c..f8b1801b421 100644 --- a/provider/data/collator/stubdata/collation_diacritics_v1.rs.data +++ b/provider/data/collator/stubdata/collation_diacritics_v1.rs.data @@ -3,9 +3,17 @@ /// hardcoded in this file. This allows the struct to be used with /// `icu`'s `_unstable` constructors. /// +/// Using this implementation will embed the following data in the binary's data segment: +/// * 36B for the lookup data structure (1 data identifiers) +/// * 182B[^1] for the actual data (1 unique structs) +/// +/// [^1]: these numbers can be smaller in practice due to linker deduplication +/// /// This macro requires the following crates: /// * `icu` /// * `icu_provider` +/// * `icu_provider/baked` +/// * `zerovec` #[doc(hidden)] #[macro_export] macro_rules! __impl_collation_diacritics_v1 { @@ -13,9 +21,19 @@ macro_rules! __impl_collation_diacritics_v1 { #[clippy::msrv = "1.86"] const _: () = <$provider>::MUST_USE_MAKE_PROVIDER_MACRO; #[clippy::msrv = "1.86"] + impl $provider { + #[doc(hidden)] + pub const COLLATION_DIACRITICS_V1_UND: &::DataStruct = &icu::collator::provider::CollationDiacritics { secondaries: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\0\x8A\0\x88\0\x8E\0\x9A\0\xA4\0\xB4\0\x8C\0\x9C\0\x96\0\xB6\0\x92\0\x98\0\x90\0\xA6\0\xA6\0\xB8\0\xBA\0\xBC\0\xA6\0\x84\0\x86\0\xA6\0\xA8\0\xA8\0\xA8\0\xA8\0\xA6\0\xBE\0\xA8\0\xA8\0\xA8\0\xA8\0\xA8\0\xC0\0\xC2\0\xC4\0\xC6\0\xC8\0\xCA\0\xA0\0\xA2\0\xA8\0\xA8\0\xA8\0\xA8\0\xCC\0\xCE\0\xA8\0\xD0\0\xD2\0\x82\0\xA8\0\xD4\0\xB2\0\xAA\0\xAA\0\x9E\0\xD6\0\xA8\0\xA8\0\xA8\0\xA6\0\xA6\0\xA6\0\0\0\0\0\x94\0\0\0\0\0\xD8\0\xA6\0\xA8\0\xA8\0\xA8\0\xA6\0\xA6\0\xA6\0\xA8\0\xA8") } }; + const DATA_COLLATION_DIACRITICS_V1: icu_provider::baked::zerotrie::DataRef = unsafe { icu_provider::baked::zerotrie::DataRef::from_trie_and_refs_unchecked(icu_provider::baked::zerotrie::ZeroTrieSimpleAscii { store: b"und\x80" }, &[Self::COLLATION_DIACRITICS_V1_UND]) }; + } + #[clippy::msrv = "1.86"] impl icu_provider::DataProvider for $provider { fn load(&self, req: icu_provider::DataRequest) -> Result, icu_provider::DataError> { - Err(icu_provider::DataErrorKind::IdentifierNotFound.with_req(::INFO, req)) + let mut req = req; + req.id.locale = Default::default(); + let metadata = icu_provider::DataResponseMetadata::default(); + let Some(payload) = icu_provider::baked::DataStore::get(&Self::DATA_COLLATION_DIACRITICS_V1, req.id, req.metadata.attributes_prefix_match) else { return Err(icu_provider::DataErrorKind::IdentifierNotFound.with_req(::INFO, req)) }; + Ok(icu_provider::DataResponse { payload, metadata }) } } }; @@ -24,33 +42,13 @@ macro_rules! __impl_collation_diacritics_v1 { #[clippy::msrv = "1.86"] impl icu_provider::IterableDataProvider for $provider { fn iter_ids(&self) -> Result>, icu_provider::DataError> { - Ok(Default::default()) - } - } - }; - ($ provider : ty , DRY) => { - __impl_collation_diacritics_v1!($provider); - #[clippy::msrv = "1.86"] - impl icu_provider::DryDataProvider for $provider { - fn dry_load(&self, req: icu_provider::DataRequest) -> Result { - Err(icu_provider::DataErrorKind::IdentifierNotFound.with_req(::INFO, req)) + Ok(icu_provider::baked::DataStore::iter(&Self::DATA_COLLATION_DIACRITICS_V1).collect()) } } }; + ($ provider : ty , DRY) => {}; ($ provider : ty , DRY , ITER) => { - __impl_collation_diacritics_v1!($provider); - #[clippy::msrv = "1.86"] - impl icu_provider::DryDataProvider for $provider { - fn dry_load(&self, req: icu_provider::DataRequest) -> Result { - Err(icu_provider::DataErrorKind::IdentifierNotFound.with_req(::INFO, req)) - } - } - #[clippy::msrv = "1.86"] - impl icu_provider::IterableDataProvider for $provider { - fn iter_ids(&self) -> Result>, icu_provider::DataError> { - Ok(Default::default()) - } - } + __impl_collation_diacritics_v1!($provider, ITER); }; } #[doc(inline)] diff --git a/provider/data/collator/stubdata/mod.rs b/provider/data/collator/stubdata/mod.rs index 22486b91659..d8672f4192b 100644 --- a/provider/data/collator/stubdata/mod.rs +++ b/provider/data/collator/stubdata/mod.rs @@ -34,6 +34,7 @@ pub use __make_provider as make_provider; /// This macro requires the following crates: /// * `icu` /// * `icu_provider` +/// * `icu_provider/baked` /// * `zerovec` #[allow(unused_macros)] macro_rules! impl_data_provider {