From b196d9e7a9a2ad8f71097b7fc7135a00ee610518 Mon Sep 17 00:00:00 2001 From: KodrAus Date: Fri, 29 May 2026 21:17:44 +1000 Subject: [PATCH 1/8] initial SpanLinkSet impl --- src/buf.rs | 57 +++ src/lib.rs | 1 + src/span.rs | 1089 ++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 1099 insertions(+), 48 deletions(-) create mode 100644 src/buf.rs diff --git a/src/buf.rs b/src/buf.rs new file mode 100644 index 000000000..aad00b71f --- /dev/null +++ b/src/buf.rs @@ -0,0 +1,57 @@ +use core::fmt; + +pub(super) struct Buffer { + value: [u8; N], + idx: usize, +} + +impl Buffer { + pub(super) fn new() -> Self { + Buffer { + value: [0; N], + idx: 0, + } + } + + pub(super) fn reset(&mut self) { + self.idx = 0; + } + + pub(super) fn as_bytes(&self) -> &[u8] { + &self.value[..self.idx] + } + + pub(super) fn push_str(&mut self, s: &str) -> bool { + let s = s.as_bytes(); + let next_idx = self.idx + s.len(); + + if next_idx <= self.value.len() { + self.value[self.idx..next_idx].copy_from_slice(s); + self.idx = next_idx; + + true + } else { + false + } + } + + pub(super) fn buffer(&mut self, value: impl fmt::Display) -> Result<&[u8], fmt::Error> { + use fmt::Write as _; + + self.idx = 0; + + write!(self, "{}", value)?; + + Ok(&self.value[..self.idx]) + } +} + +impl fmt::Write for Buffer { + fn write_str(&mut self, s: &str) -> fmt::Result { + if self.push_str(s) { + Ok(()) + } else { + Err(fmt::Error) + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 8fd9af8d2..690484b75 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -200,6 +200,7 @@ pub use self::{ value::Value, }; +mod buf; mod macro_hooks; #[cfg(feature = "std")] diff --git a/src/span.rs b/src/span.rs index 550b34049..59a42e308 100644 --- a/src/span.rs +++ b/src/span.rs @@ -40,11 +40,13 @@ use core::{ }; pub use self::completion::Completion; +#[cfg(feature = "alloc")] +pub use self::alloc_support::SpanLinkSet; /** A [W3C Trace Id](https://www.w3.org/TR/trace-context/#trace-id). */ -#[derive(Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct TraceId(NonZeroU128); impl fmt::Debug for TraceId { @@ -201,16 +203,16 @@ impl TraceId { If `hex` is not exactly 32 valid hex characters (`[a-fA-F0-9]`) then this method will fail. */ pub fn try_from_hex(hex: impl fmt::Display) -> Result { - let mut buf = Buffer::<32>::new(); + let mut buf = crate::buf::Buffer::<32>::new(); - Self::try_from_hex_slice(buf.buffer(hex)?) + Self::try_from_hex_slice(buf.buffer(hex).map_err(|_| ParseIdError {})?) } } /** A [W3C Span Id](https://www.w3.org/TR/trace-context/#parent-id). */ -#[derive(Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct SpanId(NonZeroU64); impl fmt::Debug for SpanId { @@ -367,9 +369,9 @@ impl SpanId { If `hex` is not exactly 16 valid hex characters (`[a-fA-F0-9]`) then this method will fail. */ pub fn try_from_hex(hex: impl fmt::Display) -> Result { - let mut buf = Buffer::<16>::new(); + let mut buf = crate::buf::Buffer::<16>::new(); - Self::try_from_hex_slice(buf.buffer(hex)?) + Self::try_from_hex_slice(buf.buffer(hex).map_err(|_| ParseIdError {})?) } } @@ -433,46 +435,6 @@ impl fmt::Display for ParseIdError { #[cfg(feature = "std")] impl std::error::Error for ParseIdError {} -struct Buffer { - value: [u8; N], - idx: usize, -} - -impl Buffer { - fn new() -> Self { - Buffer { - value: [0; N], - idx: 0, - } - } - - fn buffer(&mut self, value: impl fmt::Display) -> Result<&[u8], ParseIdError> { - use fmt::Write as _; - - self.idx = 0; - - write!(self, "{}", value).map_err(|_| ParseIdError {})?; - - Ok(&self.value[..self.idx]) - } -} - -impl fmt::Write for Buffer { - fn write_str(&mut self, s: &str) -> fmt::Result { - let s = s.as_bytes(); - let next_idx = self.idx + s.len(); - - if next_idx <= self.value.len() { - self.value[self.idx..next_idx].copy_from_slice(s); - self.idx = next_idx; - - Ok(()) - } else { - Err(fmt::Error) - } - } -} - /** A diagnostic event that represents a span in a distributed trace. @@ -999,7 +961,7 @@ Links relate spans outside of the normal parent-child hierarchy. Links are largely informative and may not be understood by downstream consumers. In order to create a generic DAG out of span links, the spans would need to belong to separate traces, since the parent-child relationship is still required. */ -#[derive(Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct SpanLink { trace_id: TraceId, span_id: SpanId, @@ -1039,7 +1001,7 @@ impl SpanLink { Try parse a span link from a formatted representation. */ pub fn parse(s: impl fmt::Display) -> Result { - let mut buf = Buffer::<49>::new(); + let mut buf = crate::buf::Buffer::<49>::new(); Self::try_from_slice(buf.buffer(s).map_err(|_| ParseLinkError {})?) } @@ -1129,6 +1091,1037 @@ impl fmt::Display for ParseLinkError { #[cfg(feature = "std")] impl std::error::Error for ParseLinkError {} +#[cfg(feature = "alloc")] +mod alloc_support { + use super::*; + + use alloc::collections::{btree_set, BTreeSet}; + use alloc::string::String; + use core::fmt::Write; + use core::str::FromStr; + + use emit_core::value::{FromValue, ToValue, Value}; + + /** + An error encountered attempting to parse a [`SpanLinkSet`]. + */ + #[derive(Debug)] + pub struct ParseSpanLinkSetError {} + + impl fmt::Display for ParseSpanLinkSetError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "the input was not a valid span link set") + } + } + + #[cfg(feature = "std")] + impl std::error::Error for ParseSpanLinkSetError {} + + /** + A collection of [`SpanLink`]s. + + The set stores sorted, deduplicated span links. Links can be parsed from strings + in the format `[$link,...]`, `{$link,...}`, or `($link,...)` where each `$link` + is a valid [`SpanLink`] string (`traceid-spanid`). + */ + #[derive(Clone, PartialEq, Eq)] + pub struct SpanLinkSet { + links: BTreeSet, + } + + impl fmt::Debug for SpanLinkSet { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(self, f) + } + } + + impl fmt::Display for SpanLinkSet { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_char('[')?; + + let mut first = true; + for link in &self.links { + if !first { + f.write_char(',')?; + } + first = false; + + fmt::Display::fmt(link, f)?; + } + + f.write_char(']') + } + } + + impl SpanLinkSet { + /** + Create a new empty `SpanLinkSet`. + + This method does not allocate. + */ + pub const fn new() -> Self { + SpanLinkSet { + links: BTreeSet::new(), + } + } + + /** + Parse a `SpanLinkSet` from its raw textual representation. + + The input must be enclosed in matching brackets (`[]`, `()`, or `{}`), + with comma-separated [`SpanLink`] strings inside. + */ + pub fn try_from_str(s: &str) -> Result { + let mut set = SpanLinkSet::new(); + + let s = s.trim(); + + if s.len() < 2 { + return Err(ParseSpanLinkSetError {}); + } + + // Must be enclosed by `[]`, `()`, or `{}` + let _container_end = match (&s[0..1], &s[s.len() - 1..]) { + ("[", "]") => ']', + ("(", ")") => ')', + ("{", "}") => '}', + _ => return Err(ParseSpanLinkSetError {}), + }; + let inner = &s[1..s.len() - 1]; + + // Allow empty set: `[]`, `[ ]` + if inner.trim().is_empty() { + return Ok(set); + } + + // Split by comma and parse each link + for part in inner.split(',') { + let part = part.trim(); + if part.is_empty() { + return Err(ParseSpanLinkSetError {}); + } + + // Strip surrounding quotes (from debug format) + let part = part.strip_prefix('"') + .and_then(|s| s.strip_suffix('"')) + .unwrap_or(part); + + let link = SpanLink::try_from_str(part).map_err(|_| ParseSpanLinkSetError {})?; + set.links.insert(link); + } + + Ok(set) + } + + /** + Insert a [`SpanLink`] into the set. + + Returns `true` if the link was not already present. + */ + pub fn insert(&mut self, link: SpanLink) -> bool { + self.links.insert(link) + } + + /** + Get the number of links in the set. + */ + pub fn len(&self) -> usize { + self.links.len() + } + + /** + Returns `true` if the set contains no links. + */ + pub fn is_empty(&self) -> bool { + self.links.is_empty() + } + + /** + Clear all links, allowing the allocation to be re-used. + */ + pub fn clear(&mut self) { + self.links.clear(); + } + + /** + Returns `true` if the set contains the given link. + */ + pub fn contains(&self, link: SpanLink) -> bool { + self.links.contains(&link) + } + + /** + Get the first (smallest) link in the set. + */ + pub fn first(&self) -> Option { + self.links.first().copied() + } + + /** + Get the last (largest) link in the set. + */ + pub fn last(&self) -> Option { + self.links.last().copied() + } + + /** + Iterate over links in sorted order. + */ + pub fn iter(&self) -> Iter<'_> { + Iter(self.links.iter()) + } + } + + impl Default for SpanLinkSet { + fn default() -> Self { + Self::new() + } + } + + impl<'a> IntoIterator for &'a SpanLinkSet { + type IntoIter = Iter<'a>; + type Item = SpanLink; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } + } + + /** + An iterator over sorted links from a [`SpanLinkSet`]. + + This is the result of calling [`SpanLinkSet::iter`]. + */ + pub struct Iter<'a>(btree_set::Iter<'a, SpanLink>); + + impl<'a> Iterator for Iter<'a> { + type Item = SpanLink; + + fn next(&mut self) -> Option { + self.0.next().copied() + } + } + + #[cfg(feature = "sval")] + impl sval::Value for SpanLinkSet { + fn stream<'sval, S: sval::Stream<'sval> + ?Sized>( + &'sval self, + stream: &mut S, + ) -> sval::Result { + stream.seq_begin(Some(self.links.len()))?; + + for link in &self.links { + stream.value_computed(link)?; + } + + stream.seq_end() + } + } + + #[cfg(feature = "serde")] + impl serde::Serialize for SpanLinkSet { + fn serialize( + &self, + serializer: S, + ) -> Result { + use serde::ser::SerializeSeq as _; + + let mut seq = serializer.serialize_seq(Some(self.links.len()))?; + + for link in &self.links { + seq.serialize_element(link)?; + } + + seq.end() + } + } + + impl FromStr for SpanLinkSet { + type Err = ParseSpanLinkSetError; + + fn from_str(s: &str) -> Result { + Self::try_from_str(s) + } + } + + impl ToValue for SpanLinkSet { + fn to_value(&self) -> Value<'_> { + #[cfg(feature = "sval")] + { + Value::capture_sval(self) + } + #[cfg(all(feature = "serde", not(feature = "sval")))] + { + Value::capture_serde(self) + } + #[cfg(all(not(feature = "serde"), not(feature = "sval")))] + { + Value::capture_display(self) + } + } + } + + impl<'a> FromValue<'a> for SpanLinkSet { + fn from_value(v: Value<'a>) -> Option { + if let Some(link_set) = v.downcast_ref::() { + return Some(link_set.clone()); + } + + #[cfg(feature = "sval")] + { + if let Some(link_set) = from_sval(v.by_ref()) { + return Some(link_set); + } + } + + #[cfg(all(not(feature = "sval"), feature = "serde"))] + { + if let Some(link_set) = from_serde(v.by_ref()) { + return Some(link_set); + } + } + + v.parse() + } + } + + #[cfg(any(feature = "sval", feature = "serde"))] + struct Extract { + depth: usize, + links: BTreeSet, + text_buf: crate::buf::Buffer<49>, + full_text: String, + } + + #[cfg(any(feature = "sval", feature = "serde"))] + impl Default for Extract { + fn default() -> Self { + Extract { + depth: 0, + links: BTreeSet::new(), + text_buf: crate::buf::Buffer::new(), + full_text: String::new(), + } + } + } + + #[cfg(any(feature = "sval", feature = "serde"))] + #[derive(Debug)] + struct Incompatible; + + #[cfg(any(feature = "sval", feature = "serde"))] + impl Extract { + fn push_link_fragment(&mut self, fragment: &str) -> Result<(), Incompatible> { + if self.depth == 0 { + self.full_text.push_str(fragment); + return Ok(()); + } + + if self.depth != 1 { + return Err(Incompatible); + } + + if !self.text_buf.push_str(fragment) { + return Err(Incompatible); + } + + Ok(()) + } + + fn push_link(&mut self) -> Result<(), Incompatible> { + if self.depth == 0 { + return Ok(()); + } + + if self.depth != 1 { + return Err(Incompatible); + } + + let link_bytes = self.text_buf.as_bytes(); + let link = SpanLink::try_from_slice(link_bytes) + .map_err(|_| Incompatible)?; + self.links.insert(link); + self.text_buf.reset(); + + Ok(()) + } + + fn down(&mut self) -> Result<(), Incompatible> { + self.depth += 1; + + if self.depth > 1 { + Err(Incompatible) + } else { + Ok(()) + } + } + + fn up(&mut self) -> Result<(), Incompatible> { + self.depth -= 1; + + Ok(()) + } + + fn end(self) -> Option { + if !self.links.is_empty() { + return Some(SpanLinkSet { links: self.links }); + } + + if !self.full_text.is_empty() { + return SpanLinkSet::try_from_str(&self.full_text).ok(); + } + + None + } + } + + #[cfg(feature = "sval")] + fn from_sval(value: Value) -> Option { + #[allow(non_local_definitions)] + impl From for sval::Error { + fn from(_: Incompatible) -> sval::Error { + sval::Error::new() + } + } + + #[allow(non_local_definitions)] + impl<'sval> sval::Stream<'sval> for Extract { + fn null(&mut self) -> sval::Result { + sval::error() + } + + fn bool(&mut self, _: bool) -> sval::Result { + sval::error() + } + + fn text_begin(&mut self, _: Option) -> sval::Result { + Ok(()) + } + + fn text_fragment_computed(&mut self, fragment: &str) -> sval::Result { + self.push_link_fragment(fragment).map_err(Into::into) + } + + fn text_end(&mut self) -> sval::Result { + Ok(self.push_link()?) + } + + fn i64(&mut self, _: i64) -> sval::Result { + sval::error() + } + + fn u64(&mut self, _: u64) -> sval::Result { + sval::error() + } + + fn i128(&mut self, _: i128) -> sval::Result { + sval::error() + } + + fn u128(&mut self, _: u128) -> sval::Result { + sval::error() + } + + fn f64(&mut self, _: f64) -> sval::Result { + sval::error() + } + + fn seq_begin(&mut self, _: Option) -> sval::Result { + Ok(self.down()?) + } + + fn seq_value_begin(&mut self) -> sval::Result { + Ok(()) + } + + fn seq_value_end(&mut self) -> sval::Result { + Ok(()) + } + + fn seq_end(&mut self) -> sval::Result { + Ok(self.up()?) + } + } + + let mut extract = Extract::default(); + sval::stream(&mut extract, &value).ok()?; + extract.end() + } + + #[cfg(all(not(feature = "sval"), feature = "serde"))] + fn from_serde(value: Value) -> Option { + use serde::Serialize as _; + + #[allow(non_local_definitions)] + impl fmt::Display for Incompatible { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("incompatible") + } + } + + #[allow(non_local_definitions)] + impl serde::ser::StdError for Incompatible {} + + #[allow(non_local_definitions)] + impl serde::ser::Error for Incompatible { + fn custom(_: T) -> Self + where + T: fmt::Display, + { + Incompatible + } + } + + #[allow(non_local_definitions)] + impl<'a> serde::Serializer for &'a mut Extract { + type Ok = (); + type Error = Incompatible; + type SerializeSeq = Self; + type SerializeTuple = Self; + type SerializeTupleStruct = Self; + type SerializeTupleVariant = Self; + type SerializeMap = Self; + type SerializeStruct = Self; + type SerializeStructVariant = Self; + + fn serialize_bool(self, _: bool) -> Result { + Err(Incompatible) + } + + fn serialize_i8(self, _: i8) -> Result { + Err(Incompatible) + } + + fn serialize_i16(self, _: i16) -> Result { + Err(Incompatible) + } + + fn serialize_i32(self, _: i32) -> Result { + Err(Incompatible) + } + + fn serialize_i64(self, _: i64) -> Result { + Err(Incompatible) + } + + fn serialize_u8(self, _: u8) -> Result { + Err(Incompatible) + } + + fn serialize_u16(self, _: u16) -> Result { + Err(Incompatible) + } + + fn serialize_u32(self, _: u32) -> Result { + Err(Incompatible) + } + + fn serialize_u64(self, _: u64) -> Result { + Err(Incompatible) + } + + fn serialize_u128(self, _: u128) -> Result { + Err(Incompatible) + } + + fn serialize_i128(self, _: i128) -> Result { + Err(Incompatible) + } + + fn serialize_f32(self, _: f32) -> Result { + Err(Incompatible) + } + + fn serialize_f64(self, _: f64) -> Result { + Err(Incompatible) + } + + fn serialize_char(self, _: char) -> Result { + Err(Incompatible) + } + + fn serialize_str(self, value: &str) -> Result { + if self.depth == 1 { + self.text_buf.reset(); + self.push_link_fragment(value)?; + self.push_link() + } else { + Err(Incompatible) + } + } + + fn serialize_bytes(self, _: &[u8]) -> Result { + Err(Incompatible) + } + + fn serialize_none(self) -> Result { + Err(Incompatible) + } + + fn serialize_some(self, value: &T) -> Result + where + T: ?Sized + serde::Serialize, + { + value.serialize(self) + } + + fn serialize_unit(self) -> Result { + Err(Incompatible) + } + + fn serialize_unit_struct( + self, + name: &'static str, + ) -> Result { + name.serialize(self) + } + + fn serialize_newtype_struct( + self, + _: &'static str, + value: &T, + ) -> Result + where + T: ?Sized + serde::Serialize, + { + value.serialize(self) + } + + fn serialize_newtype_variant( + self, + _: &'static str, + _: u32, + _: &'static str, + value: &T, + ) -> Result + where + T: ?Sized + serde::Serialize, + { + value.serialize(self) + } + + fn serialize_seq( + self, + _: Option, + ) -> Result { + self.down()?; + Ok(self) + } + + fn serialize_tuple( + self, + _: usize, + ) -> Result { + self.down()?; + Ok(self) + } + + fn serialize_tuple_struct( + self, + _: &'static str, + _: usize, + ) -> Result { + self.down()?; + Ok(self) + } + + fn serialize_tuple_variant( + self, + _: &'static str, + _: u32, + _: &'static str, + _: usize, + ) -> Result { + self.down()?; + Ok(self) + } + + fn serialize_map( + self, + _: Option, + ) -> Result { + self.down()?; + Ok(self) + } + + fn serialize_struct( + self, + _: &'static str, + _: usize, + ) -> Result { + self.down()?; + Ok(self) + } + + fn serialize_struct_variant( + self, + _: &'static str, + _: u32, + _: &'static str, + _: usize, + ) -> Result { + self.down()?; + Ok(self) + } + + fn serialize_unit_variant( + self, + _: &'static str, + _: u32, + variant: &'static str, + ) -> Result { + variant.serialize(self) + } + } + + #[allow(non_local_definitions)] + impl<'a> serde::ser::SerializeSeq for &'a mut Extract { + type Ok = (); + type Error = Incompatible; + + fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> + where + T: ?Sized + serde::Serialize, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result { + self.up()?; + Ok(()) + } + } + + #[allow(non_local_definitions)] + impl<'a> serde::ser::SerializeTuple for &'a mut Extract { + type Ok = (); + type Error = Incompatible; + + fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> + where + T: ?Sized + serde::Serialize, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result { + self.up()?; + Ok(()) + } + } + + #[allow(non_local_definitions)] + impl<'a> serde::ser::SerializeTupleStruct for &'a mut Extract { + type Ok = (); + type Error = Incompatible; + + fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> + where + T: ?Sized + serde::Serialize, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result { + self.up()?; + Ok(()) + } + } + + #[allow(non_local_definitions)] + impl<'a> serde::ser::SerializeTupleVariant for &'a mut Extract { + type Ok = (); + type Error = Incompatible; + + fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> + where + T: ?Sized + serde::Serialize, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result { + self.up()?; + Ok(()) + } + } + + #[allow(non_local_definitions)] + impl<'a> serde::ser::SerializeMap for &'a mut Extract { + type Ok = (); + type Error = Incompatible; + + fn serialize_key(&mut self, _: &T) -> Result<(), Self::Error> + where + T: ?Sized + serde::Serialize, + { + Err(Incompatible) + } + + fn serialize_value(&mut self, _: &T) -> Result<(), Self::Error> + where + T: ?Sized + serde::Serialize, + { + Err(Incompatible) + } + + fn end(self) -> Result { + self.up()?; + Ok(()) + } + } + + #[allow(non_local_definitions)] + impl<'a> serde::ser::SerializeStruct for &'a mut Extract { + type Ok = (); + type Error = Incompatible; + + fn serialize_field(&mut self, _: &'static str, _: &T) -> Result<(), Self::Error> + where + T: ?Sized + serde::Serialize, + { + Err(Incompatible) + } + + fn end(self) -> Result { + self.up()?; + Ok(()) + } + } + + #[allow(non_local_definitions)] + impl<'a> serde::ser::SerializeStructVariant for &'a mut Extract { + type Ok = (); + type Error = Incompatible; + + fn serialize_field(&mut self, _: &'static str, _: &T) -> Result<(), Self::Error> + where + T: ?Sized + serde::Serialize, + { + Err(Incompatible) + } + + fn end(self) -> Result { + self.up()?; + Ok(()) + } + } + + let mut extract = Extract::default(); + value.serialize(&mut extract).ok()?; + extract.end() + } + + #[cfg(test)] + mod tests { + use super::*; + + fn link1() -> SpanLink { + SpanLink::new( + TraceId::from_u128(0x0123456789abcdef0123456789abcdef).unwrap(), + SpanId::from_u64(0x0123456789abcdef).unwrap(), + ) + } + + fn link2() -> SpanLink { + SpanLink::new( + TraceId::from_u128(0xfedcba9876543210fedcba9876543210).unwrap(), + SpanId::from_u64(0xfedcba9876543210).unwrap(), + ) + } + + fn set_with_links() -> SpanLinkSet { + let mut set = SpanLinkSet::new(); + set.insert(link1()); + set.insert(link2()); + set + } + + #[test] + fn span_link_set() { + let mut set = SpanLinkSet::new(); + + assert_eq!(0, set.len()); + assert!(set.is_empty()); + assert!(set.first().is_none()); + assert!(set.last().is_none()); + assert!(!set.contains(link1())); + + set.insert(link1()); + assert_eq!(1, set.len()); + assert!(set.contains(link1())); + + set.insert(link2()); + assert_eq!(2, set.len()); + + // Deduplication + assert!(!set.insert(link1())); + assert_eq!(2, set.len()); + + // First/last + assert_eq!(Some(link1()), set.first()); + assert_eq!(Some(link2()), set.last()); + + // Iter + let collected: alloc::vec::Vec<_> = set.iter().collect(); + assert_eq!(alloc::vec![link1(), link2()], collected); + + // Clear + set.clear(); + assert_eq!(0, set.len()); + assert!(set.is_empty()); + } + + #[test] + fn span_link_set_roundtrip() { + for case in [ + SpanLinkSet::new(), + { + let mut set = SpanLinkSet::new(); + set.insert(link1()); + set + }, + set_with_links(), + ] { + let fmt = case.to_string(); + assert_eq!(Some(case), SpanLinkSet::try_from_str(&fmt).ok(), "{fmt}"); + } + } + + #[test] + fn span_link_set_parse() { + for (case, expected) in [ + ( + "[0123456789abcdef0123456789abcdef-0123456789abcdef,fedcba9876543210fedcba9876543210-fedcba9876543210]", + set_with_links(), + ), + ( + "{0123456789abcdef0123456789abcdef-0123456789abcdef,fedcba9876543210fedcba9876543210-fedcba9876543210}", + set_with_links(), + ), + ( + "(0123456789abcdef0123456789abcdef-0123456789abcdef,fedcba9876543210fedcba9876543210-fedcba9876543210)", + set_with_links(), + ), + ( + " [ 0123456789abcdef0123456789abcdef-0123456789abcdef , fedcba9876543210fedcba9876543210-fedcba9876543210 ] ", + set_with_links(), + ), + ("[]", SpanLinkSet::new()), + ] { + assert_eq!( + Some(expected), + SpanLinkSet::try_from_str(case).ok(), + "{case}" + ); + } + } + + #[test] + fn span_link_set_parse_exotic() { + // Deduplication: duplicate links produce single entry + let set = SpanLinkSet::try_from_str( + "[0123456789abcdef0123456789abcdef-0123456789abcdef,0123456789abcdef0123456789abcdef-0123456789abcdef]" + ) + .unwrap(); + + assert_eq!(1, set.len()); + assert!(set.contains(link1())); + } + + #[test] + fn span_link_set_to_from_value() { + for case in [set_with_links()] { + assert_eq!(case, SpanLinkSet::from_value(case.to_value()).unwrap()); + } + } + + #[test] + fn span_link_set_from_value_string() { + for (case, expected) in [ + ( + "[0123456789abcdef0123456789abcdef-0123456789abcdef,fedcba9876543210fedcba9876543210-fedcba9876543210]", + set_with_links(), + ), + ] { + assert_eq!(expected, Value::from(case).cast().unwrap()); + } + } + + #[test] + fn span_link_set_from_value_structured() { + #[cfg(feature = "sval")] + trait CaseSval: sval::Value {} + #[cfg(feature = "sval")] + impl CaseSval for T {} + #[cfg(not(feature = "sval"))] + trait CaseSval {} + #[cfg(not(feature = "sval"))] + impl CaseSval for T {} + + #[cfg(feature = "serde")] + trait CaseSerde: serde::Serialize {} + #[cfg(feature = "serde")] + impl CaseSerde for T {} + #[cfg(not(feature = "serde"))] + trait CaseSerde {} + #[cfg(not(feature = "serde"))] + impl CaseSerde for T {} + + trait Case: CaseSval + CaseSerde + fmt::Debug {} + impl Case for T {} + + fn case(case: &impl Case, expected: &SpanLinkSet) { + assert_eq!(expected, &Value::from_debug(case).cast().unwrap()); + + #[cfg(feature = "sval")] + { + assert_eq!(expected, &Value::from_sval(case).cast().unwrap()); + } + + #[cfg(feature = "serde")] + { + assert_eq!(expected, &Value::from_serde(case).cast().unwrap()); + } + } + + let expected = set_with_links(); + + case( + &[ + "0123456789abcdef0123456789abcdef-0123456789abcdef", + "fedcba9876543210fedcba9876543210-fedcba9876543210", + ], + &expected, + ); + case(&[link1(), link2()], &expected); + + let mut btree_set = alloc::collections::BTreeSet::new(); + btree_set.insert(link1()); + btree_set.insert(link2()); + case(&btree_set, &expected); + } + + #[test] + fn err_span_link_set_invalid() { + for case in [ + "", + "<>", + "0123456789abcdef0123456789abcdef-0123456789abcdef", + "[0123456789abcdef0123456789abcdef-0123456789abcdef,", + "[,]", + "[,,]", + "[invalid]", + "[0123456789abcdef0123456789abcdef-0123456789abcdef,invalid]", + "{]", + "([", + ] { + assert!(SpanLinkSet::try_from_str(case).is_err(), "{case}"); + } + } + } +} + /** An active span in a distributed trace. From dfc7fe0931f3c762492ea50fb057915af6c59a5f Mon Sep 17 00:00:00 2001 From: Ashley Date: Sat, 30 May 2026 18:52:04 +1000 Subject: [PATCH 2/8] align impls of text parsers --- src/buf.rs | 2 + src/metric.rs | 25 ++++---- src/span.rs | 154 +++++++++++++++++++++++++------------------------- 3 files changed, 89 insertions(+), 92 deletions(-) diff --git a/src/buf.rs b/src/buf.rs index aad00b71f..568f3ae68 100644 --- a/src/buf.rs +++ b/src/buf.rs @@ -13,10 +13,12 @@ impl Buffer { } } + #[cfg(any(feature = "sval", feature = "serde"))] pub(super) fn reset(&mut self) { self.idx = 0; } + #[cfg(any(feature = "sval", feature = "serde"))] pub(super) fn as_bytes(&self) -> &[u8] { &self.value[..self.idx] } diff --git a/src/metric.rs b/src/metric.rs index a69b823e2..e6204092c 100644 --- a/src/metric.rs +++ b/src/metric.rs @@ -1679,11 +1679,6 @@ pub mod exp { if !first { s = s.trim_start(); - if s.len() < 2 { - // Unexpected EOF parsing bucket: not enough chars for `,[` - return Err(ParseBucketSetError {}); - } - if &s[0..1] != "," { // Invalid bucket: expected `,` return Err(ParseBucketSetError {}); @@ -1692,6 +1687,8 @@ pub mod exp { } first = false; + // TODO: These accessors can panic on excessive whitespace + // Determine the kind of bucket we're parsing s = s.trim_start(); let (key_start_skip, key_end, value_end) = match &s[0..1] { @@ -2028,14 +2025,10 @@ pub mod exp { Ok(()) } - fn end(self) -> Option { - if self.buckets.len() == 0 { - None - } else { - Some(BucketSet { - buckets: self.buckets, - total: self.count, - }) + fn end(self) -> BucketSet { + BucketSet { + buckets: self.buckets, + total: self.count, } } } @@ -2110,7 +2103,8 @@ pub mod exp { let mut extract = Extract::default(); sval::stream(&mut extract, &value).ok()?; - extract.end() + + Some(extract.end()) } #[cfg(all(not(feature = "sval"), feature = "serde"))] @@ -2484,7 +2478,8 @@ pub mod exp { let mut extract = Extract::default(); value.serialize(&mut extract).ok()?; - extract.end() + + Some(extract.end()) } #[cfg(test)] diff --git a/src/span.rs b/src/span.rs index 59a42e308..9a4039c77 100644 --- a/src/span.rs +++ b/src/span.rs @@ -40,8 +40,6 @@ use core::{ }; pub use self::completion::Completion; -#[cfg(feature = "alloc")] -pub use self::alloc_support::SpanLinkSet; /** A [W3C Trace Id](https://www.w3.org/TR/trace-context/#trace-id). @@ -1092,13 +1090,15 @@ impl fmt::Display for ParseLinkError { impl std::error::Error for ParseLinkError {} #[cfg(feature = "alloc")] -mod alloc_support { +pub mod span_link_set { + /*! + The [`SpanLinkSet`] type. + */ + use super::*; use alloc::collections::{btree_set, BTreeSet}; - use alloc::string::String; - use core::fmt::Write; - use core::str::FromStr; + use core::{fmt::Write, str::FromStr}; use emit_core::value::{FromValue, ToValue, Value}; @@ -1120,9 +1120,7 @@ mod alloc_support { /** A collection of [`SpanLink`]s. - The set stores sorted, deduplicated span links. Links can be parsed from strings - in the format `[$link,...]`, `{$link,...}`, or `($link,...)` where each `$link` - is a valid [`SpanLink`] string (`traceid-spanid`). + The set stores sorted, deduplicated span links. */ #[derive(Clone, PartialEq, Eq)] pub struct SpanLinkSet { @@ -1168,46 +1166,77 @@ mod alloc_support { /** Parse a `SpanLinkSet` from its raw textual representation. - The input must be enclosed in matching brackets (`[]`, `()`, or `{}`), - with comma-separated [`SpanLink`] strings inside. + The input must be enclosed in matching brackets (`[]`, `()`, or `{}`), with comma-separated [`SpanLink`] strings inside. */ - pub fn try_from_str(s: &str) -> Result { + pub fn try_from_str(mut s: &str) -> Result { let mut set = SpanLinkSet::new(); - let s = s.trim(); + s = s.trim(); if s.len() < 2 { return Err(ParseSpanLinkSetError {}); } // Must be enclosed by `[]`, `()`, or `{}` - let _container_end = match (&s[0..1], &s[s.len() - 1..]) { - ("[", "]") => ']', - ("(", ")") => ')', - ("{", "}") => '}', + match (&s[0..1], &s[s.len() - 1..]) { + ("[", "]") => (), + ("(", ")") => (), + ("{", "}") => (), _ => return Err(ParseSpanLinkSetError {}), }; - let inner = &s[1..s.len() - 1]; + s = &s[1..].trim_start(); - // Allow empty set: `[]`, `[ ]` - if inner.trim().is_empty() { - return Ok(set); - } + let mut first = true; - // Split by comma and parse each link - for part in inner.split(',') { - let part = part.trim(); - if part.is_empty() { + while s.len() > 1 { + if s.len() < 49 { + // Unexpected EOF parsing link: not enough chars for `$link` return Err(ParseSpanLinkSetError {}); } - // Strip surrounding quotes (from debug format) - let part = part.strip_prefix('"') - .and_then(|s| s.strip_suffix('"')) - .unwrap_or(part); + // Parse each link + if !first { + if &s[0..1] != "," { + // Invalid link: expected `,` + return Err(ParseSpanLinkSetError {}); + } + s = &s[1..].trim_start(); + } + first = false; + + // TODO: These accessors can panic on excessive whitespace - let link = SpanLink::try_from_str(part).map_err(|_| ParseSpanLinkSetError {})?; + let link = match &s[0..1] { + "\"" => { + // Parse a link surrounded by quotes + if &s[50..51] != "\"" { + // Unquoted + return Err(ParseSpanLinkSetError {}); + } + + let link = &s[1..50]; + s = &s[51..]; + + link + } + _ => { + // Parse an unquoted span link + let link = &s[0..49]; + s = &s[49..]; + + link + } + }; + + let link = SpanLink::try_from_str(link).map_err(|_| ParseSpanLinkSetError {})?; set.links.insert(link); + + s = s.trim_start(); + } + + if s.len() != 1 { + // Unexpected EOF + return Err(ParseSpanLinkSetError {}); } Ok(set) @@ -1311,7 +1340,7 @@ mod alloc_support { stream.seq_begin(Some(self.links.len()))?; for link in &self.links { - stream.value_computed(link)?; + stream.value(link)?; } stream.seq_end() @@ -1320,10 +1349,7 @@ mod alloc_support { #[cfg(feature = "serde")] impl serde::Serialize for SpanLinkSet { - fn serialize( - &self, - serializer: S, - ) -> Result { + fn serialize(&self, serializer: S) -> Result { use serde::ser::SerializeSeq as _; let mut seq = serializer.serialize_seq(Some(self.links.len()))?; @@ -1390,7 +1416,6 @@ mod alloc_support { depth: usize, links: BTreeSet, text_buf: crate::buf::Buffer<49>, - full_text: String, } #[cfg(any(feature = "sval", feature = "serde"))] @@ -1400,7 +1425,6 @@ mod alloc_support { depth: 0, links: BTreeSet::new(), text_buf: crate::buf::Buffer::new(), - full_text: String::new(), } } } @@ -1412,11 +1436,6 @@ mod alloc_support { #[cfg(any(feature = "sval", feature = "serde"))] impl Extract { fn push_link_fragment(&mut self, fragment: &str) -> Result<(), Incompatible> { - if self.depth == 0 { - self.full_text.push_str(fragment); - return Ok(()); - } - if self.depth != 1 { return Err(Incompatible); } @@ -1429,17 +1448,13 @@ mod alloc_support { } fn push_link(&mut self) -> Result<(), Incompatible> { - if self.depth == 0 { - return Ok(()); - } - if self.depth != 1 { return Err(Incompatible); } let link_bytes = self.text_buf.as_bytes(); - let link = SpanLink::try_from_slice(link_bytes) - .map_err(|_| Incompatible)?; + let link = SpanLink::try_from_slice(link_bytes).map_err(|_| Incompatible)?; + self.links.insert(link); self.text_buf.reset(); @@ -1462,16 +1477,8 @@ mod alloc_support { Ok(()) } - fn end(self) -> Option { - if !self.links.is_empty() { - return Some(SpanLinkSet { links: self.links }); - } - - if !self.full_text.is_empty() { - return SpanLinkSet::try_from_str(&self.full_text).ok(); - } - - None + fn end(self) -> SpanLinkSet { + SpanLinkSet { links: self.links } } } @@ -1545,7 +1552,8 @@ mod alloc_support { let mut extract = Extract::default(); sval::stream(&mut extract, &value).ok()?; - extract.end() + + Some(extract.end()) } #[cfg(all(not(feature = "sval"), feature = "serde"))] @@ -1669,10 +1677,7 @@ mod alloc_support { Err(Incompatible) } - fn serialize_unit_struct( - self, - name: &'static str, - ) -> Result { + fn serialize_unit_struct(self, name: &'static str) -> Result { name.serialize(self) } @@ -1700,18 +1705,12 @@ mod alloc_support { value.serialize(self) } - fn serialize_seq( - self, - _: Option, - ) -> Result { + fn serialize_seq(self, _: Option) -> Result { self.down()?; Ok(self) } - fn serialize_tuple( - self, - _: usize, - ) -> Result { + fn serialize_tuple(self, _: usize) -> Result { self.down()?; Ok(self) } @@ -1736,10 +1735,7 @@ mod alloc_support { Ok(self) } - fn serialize_map( - self, - _: Option, - ) -> Result { + fn serialize_map(self, _: Option) -> Result { self.down()?; Ok(self) } @@ -1909,7 +1905,8 @@ mod alloc_support { let mut extract = Extract::default(); value.serialize(&mut extract).ok()?; - extract.end() + + Some(extract.end()) } #[cfg(test)] @@ -2122,6 +2119,9 @@ mod alloc_support { } } +#[cfg(feature = "alloc")] +pub use self::span_link_set::SpanLinkSet; + /** An active span in a distributed trace. From e9cf02a603ec52888736c2cd69f93b8a2896e0c9 Mon Sep 17 00:00:00 2001 From: KodrAus Date: Sat, 30 May 2026 19:54:52 +1000 Subject: [PATCH 3/8] convert parsers to use byte slices instead of str --- src/buf.rs | 41 ++++++++++++++++++++++++++++ src/metric.rs | 74 +++++++++++++++++++++++++-------------------------- src/span.rs | 49 +++++++++++++++++++--------------- 3 files changed, 105 insertions(+), 59 deletions(-) diff --git a/src/buf.rs b/src/buf.rs index 568f3ae68..d15be2f03 100644 --- a/src/buf.rs +++ b/src/buf.rs @@ -1,5 +1,46 @@ use core::fmt; +/// Strip leading ASCII whitespace from a byte slice. +pub(crate) fn trim_start(s: &[u8]) -> &[u8] { + let start = s + .iter() + .position(|&b| !b.is_ascii_whitespace()) + .unwrap_or(s.len()); + &s[start..] +} + +/// Strip trailing ASCII whitespace from a byte slice. +pub(crate) fn trim_end(s: &[u8]) -> &[u8] { + let end = s + .iter() + .rposition(|&b| !b.is_ascii_whitespace()) + .map(|i| i + 1) + .unwrap_or(0); + &s[..end] +} + +/// Strip leading and trailing ASCII whitespace from a byte slice. +pub(crate) fn trim(s: &[u8]) -> &[u8] { + trim_end(trim_start(s)) +} + +/// Find the first occurrence of any needle byte in the haystack, returning +/// the matched position and its associated skip count. +/// +/// Each needle entry is a `(byte, skip)` pair. The `skip` value is returned +/// as `usize` in the result tuple. +pub(crate) fn find(haystack: &[u8], needle: &[(u8, u8)]) -> Option<(usize, usize)> { + needle + .iter() + .filter_map(|(n, cs)| { + haystack + .iter() + .position(|&b| b == *n) + .map(|c| (c, *cs as usize)) + }) + .next() +} + pub(super) struct Buffer { value: [u8; N], idx: usize, diff --git a/src/metric.rs b/src/metric.rs index e6204092c..7497e518b 100644 --- a/src/metric.rs +++ b/src/metric.rs @@ -2,7 +2,7 @@ The [`Metric`] type. */ -use core::{fmt, ops::ControlFlow}; +use core::{fmt, ops::ControlFlow, str}; use emit_core::{ and::And, @@ -1570,6 +1570,7 @@ pub mod exp { use crate::{ alloc::collections::{btree_map, BTreeMap}, + buf::{find, trim, trim_start}, core::{ fmt::{self, Write as _}, str::FromStr, @@ -1647,17 +1648,14 @@ pub mod exp { /** Parse a `BucketSet` from its raw textual representation. */ - pub fn try_from_str(mut s: &str) -> Result { - fn find(haystack: &str, needle: &[(char, u8)]) -> Option<(usize, usize)> { - needle - .iter() - .filter_map(|(c, cs)| haystack.find(*c).map(|c| (c, *cs as usize))) - .next() - } + pub fn try_from_str(s: &str) -> Result { + Self::try_from_slice(s.as_bytes()) + } + fn try_from_slice(mut s: &[u8]) -> Result { let mut set = BucketSet::new(); - s = s.trim(); + s = trim(s); if s.len() < 2 { // Truncated @@ -1665,10 +1663,10 @@ pub mod exp { } // Must be enclosed by `[]`, `()`, or `{}` - let container_end = match (&s[0..1], &s[s.len() - 1..]) { - ("[", "]") => ']', - ("(", ")") => ')', - ("{", "}") => '}', + let container_end = match (s.first(), s.last()) { + (Some(&b'['), Some(&b']')) => b']', + (Some(&b'('), Some(&b')')) => b')', + (Some(&b'{'), Some(&b'}')) => b'}', _ => return Err(ParseBucketSetError {}), }; s = &s[1..]; @@ -1677,9 +1675,9 @@ pub mod exp { while s.len() > 1 { // Parse each bucket if !first { - s = s.trim_start(); + s = trim_start(s); - if &s[0..1] != "," { + if s.first() != Some(&b',') { // Invalid bucket: expected `,` return Err(ParseBucketSetError {}); } @@ -1690,53 +1688,53 @@ pub mod exp { // TODO: These accessors can panic on excessive whitespace // Determine the kind of bucket we're parsing - s = s.trim_start(); - let (key_start_skip, key_end, value_end) = match &s[0..1] { + s = trim_start(s); + let (key_start_skip, key_end, value_end): ( + usize, + &[(u8, u8)], + &[(u8, u8)], + ) = match s.first() { // `[k, v]`, `[k: v]`, or `[k = v]` - "[" => ( - 1, - &[(',', 1u8), (':', 1u8), ('=', 1u8)] as &[(char, u8)], - &[(']', 1u8)] as &[(char, u8)], - ), + Some(&b'[') => { + (1, &[(b',', 1u8), (b':', 1u8), (b'=', 1u8)], &[(b']', 1u8)]) + } // `(k, v)`, `(k: v)`, or `(k = v)` - "(" => ( - 1, - &[(',', 1u8), (':', 1u8), ('=', 1u8)] as &[(char, u8)], - &[(')', 1u8)] as &[(char, u8)], - ), + Some(&b'(') => { + (1, &[(b',', 1u8), (b':', 1u8), (b'=', 1u8)], &[(b')', 1u8)]) + } // `{k, v}`, `{k: v}`, or `{k = v}` - "{" => ( - 1, - &[(',', 1u8), (':', 1u8), ('=', 1u8)] as &[(char, u8)], - &[('}', 1u8)] as &[(char, u8)], - ), + Some(&b'{') => { + (1, &[(b',', 1u8), (b':', 1u8), (b'=', 1u8)], &[(b'}', 1u8)]) + } // `k: v`, or `k = v` _ => ( 0, - &[(':', 1u8), ('=', 1u8)] as &[(char, u8)], - &[(',', 0u8), (container_end, 0u8)] as &[(char, u8)], + &[(b':', 1u8), (b'=', 1u8)], + &[(b',', 0u8), (container_end, 0u8)], ), }; s = &s[key_start_skip..]; // Find the bounds of the key - s = s.trim_start(); + s = trim_start(s); let Some((key_end, key_end_skip)) = find(s, key_end) else { // Unexpected EOF parsing key: expected `$key_end` return Err(ParseBucketSetError {}); }; - let key = &s[..key_end]; + let key = + str::from_utf8(&s[..key_end]).map_err(|_| ParseBucketSetError {})?; s = &s[key_end + key_end_skip..]; // Find the bounds of the value - s = s.trim_start(); + s = trim_start(s); let Some((value_end, value_end_skip)) = find(s, value_end) else { // Unexpected EOF parsing value: expected `$value_end` return Err(ParseBucketSetError {}); }; - let value = &s[..value_end]; + let value = + str::from_utf8(&s[..value_end]).map_err(|_| ParseBucketSetError {})?; s = &s[value_end + value_end_skip..]; // Parse the key and value diff --git a/src/span.rs b/src/span.rs index 9a4039c77..136e8fb84 100644 --- a/src/span.rs +++ b/src/span.rs @@ -1100,6 +1100,7 @@ pub mod span_link_set { use alloc::collections::{btree_set, BTreeSet}; use core::{fmt::Write, str::FromStr}; + use crate::buf::{trim, trim_start}; use emit_core::value::{FromValue, ToValue, Value}; /** @@ -1168,23 +1169,28 @@ pub mod span_link_set { The input must be enclosed in matching brackets (`[]`, `()`, or `{}`), with comma-separated [`SpanLink`] strings inside. */ - pub fn try_from_str(mut s: &str) -> Result { + pub fn try_from_str(s: &str) -> Result { + Self::try_from_slice(s.as_bytes()) + } + + fn try_from_slice(mut s: &[u8]) -> Result { let mut set = SpanLinkSet::new(); - s = s.trim(); + s = trim(s); if s.len() < 2 { return Err(ParseSpanLinkSetError {}); } // Must be enclosed by `[]`, `()`, or `{}` - match (&s[0..1], &s[s.len() - 1..]) { - ("[", "]") => (), - ("(", ")") => (), - ("{", "}") => (), + match (s.first(), s.last()) { + (Some(&b'['), Some(&b']')) => (), + (Some(&b'('), Some(&b')')) => (), + (Some(&b'{'), Some(&b'}')) => (), _ => return Err(ParseSpanLinkSetError {}), }; - s = &s[1..].trim_start(); + s = &s[1..]; + s = trim_start(s); let mut first = true; @@ -1196,20 +1202,21 @@ pub mod span_link_set { // Parse each link if !first { - if &s[0..1] != "," { + if s.first() != Some(&b',') { // Invalid link: expected `,` return Err(ParseSpanLinkSetError {}); } - s = &s[1..].trim_start(); + s = &s[1..]; + s = trim_start(s); } first = false; // TODO: These accessors can panic on excessive whitespace - let link = match &s[0..1] { - "\"" => { + let link = match s.first() { + Some(&b'"') => { // Parse a link surrounded by quotes - if &s[50..51] != "\"" { + if s.get(50) != Some(&b'"') { // Unquoted return Err(ParseSpanLinkSetError {}); } @@ -1228,10 +1235,13 @@ pub mod span_link_set { } }; - let link = SpanLink::try_from_str(link).map_err(|_| ParseSpanLinkSetError {})?; - set.links.insert(link); + let link = SpanLink::try_from_slice(link).map_err(|_| ParseSpanLinkSetError {})?; + if !set.links.insert(link) { + // Duplicate link + return Err(ParseSpanLinkSetError {}); + } - s = s.trim_start(); + s = trim_start(s); } if s.len() != 1 { @@ -2016,14 +2026,11 @@ pub mod span_link_set { #[test] fn span_link_set_parse_exotic() { - // Deduplication: duplicate links produce single entry - let set = SpanLinkSet::try_from_str( + // Duplicate links produce an error + assert!(SpanLinkSet::try_from_str( "[0123456789abcdef0123456789abcdef-0123456789abcdef,0123456789abcdef0123456789abcdef-0123456789abcdef]" ) - .unwrap(); - - assert_eq!(1, set.len()); - assert!(set.contains(link1())); + .is_err()); } #[test] From c3294917f615a1f9679b6309c2585a5c1f64fc1c Mon Sep 17 00:00:00 2001 From: Ashley Date: Sat, 30 May 2026 21:23:03 +1000 Subject: [PATCH 4/8] don't support whitespace around set values --- src/metric.rs | 35 ++++++++++++++++++++-------------- src/span.rs | 53 ++++++++++++++++++++++++++++++++------------------- 2 files changed, 54 insertions(+), 34 deletions(-) diff --git a/src/metric.rs b/src/metric.rs index 7497e518b..29b398649 100644 --- a/src/metric.rs +++ b/src/metric.rs @@ -2,7 +2,7 @@ The [`Metric`] type. */ -use core::{fmt, ops::ControlFlow, str}; +use core::{fmt, ops::ControlFlow}; use emit_core::{ and::And, @@ -1655,8 +1655,6 @@ pub mod exp { fn try_from_slice(mut s: &[u8]) -> Result { let mut set = BucketSet::new(); - s = trim(s); - if s.len() < 2 { // Truncated return Err(ParseBucketSetError {}); @@ -1670,25 +1668,22 @@ pub mod exp { _ => return Err(ParseBucketSetError {}), }; s = &s[1..]; + s = trim_start(s); let mut first = true; while s.len() > 1 { // Parse each bucket if !first { - s = trim_start(s); - if s.first() != Some(&b',') { // Invalid bucket: expected `,` return Err(ParseBucketSetError {}); } s = &s[1..]; + s = trim_start(s); } first = false; - // TODO: These accessors can panic on excessive whitespace - // Determine the kind of bucket we're parsing - s = trim_start(s); let (key_start_skip, key_end, value_end): ( usize, &[(u8, u8)], @@ -1716,25 +1711,23 @@ pub mod exp { s = &s[key_start_skip..]; // Find the bounds of the key - s = trim_start(s); let Some((key_end, key_end_skip)) = find(s, key_end) else { // Unexpected EOF parsing key: expected `$key_end` return Err(ParseBucketSetError {}); }; - let key = - str::from_utf8(&s[..key_end]).map_err(|_| ParseBucketSetError {})?; + let key = str::from_utf8(trim(&s[..key_end])) + .map_err(|_| ParseBucketSetError {})?; s = &s[key_end + key_end_skip..]; // Find the bounds of the value - s = trim_start(s); let Some((value_end, value_end_skip)) = find(s, value_end) else { // Unexpected EOF parsing value: expected `$value_end` return Err(ParseBucketSetError {}); }; - let value = - str::from_utf8(&s[..value_end]).map_err(|_| ParseBucketSetError {})?; + let value = str::from_utf8(trim(&s[..value_end])) + .map_err(|_| ParseBucketSetError {})?; s = &s[value_end + value_end_skip..]; // Parse the key and value @@ -1746,6 +1739,8 @@ pub mod exp { // Duplicate key return Err(ParseBucketSetError {}); } + + s = trim_start(s); } if s.len() != 1 { @@ -2583,6 +2578,18 @@ pub mod exp { set }, ), + ("[ [ 1 , 1 ] , [ 2 , 2 ] ]".to_string(), { + let mut set = BucketSet::new(); + set.observe_all(Point::new(1.0), 1); + set.observe_all(Point::new(2.0), 2); + set + }), + ("[ 1 : 1 , 2 : 2 ]".to_string(), { + let mut set = BucketSet::new(); + set.observe_all(Point::new(1.0), 1); + set.observe_all(Point::new(2.0), 2); + set + }), ] { assert_eq!( Some(expected), diff --git a/src/span.rs b/src/span.rs index 136e8fb84..f97608e9c 100644 --- a/src/span.rs +++ b/src/span.rs @@ -1100,7 +1100,7 @@ pub mod span_link_set { use alloc::collections::{btree_set, BTreeSet}; use core::{fmt::Write, str::FromStr}; - use crate::buf::{trim, trim_start}; + use crate::buf::trim_start; use emit_core::value::{FromValue, ToValue, Value}; /** @@ -1176,8 +1176,6 @@ pub mod span_link_set { fn try_from_slice(mut s: &[u8]) -> Result { let mut set = SpanLinkSet::new(); - s = trim(s); - if s.len() < 2 { return Err(ParseSpanLinkSetError {}); } @@ -1211,8 +1209,6 @@ pub mod span_link_set { } first = false; - // TODO: These accessors can panic on excessive whitespace - let link = match s.first() { Some(&b'"') => { // Parse a link surrounded by quotes @@ -1923,6 +1919,8 @@ pub mod span_link_set { mod tests { use super::*; + use std::collections::BTreeSet; + fn link1() -> SpanLink { SpanLink::new( TraceId::from_u128(0x0123456789abcdef0123456789abcdef).unwrap(), @@ -1999,40 +1997,53 @@ pub mod span_link_set { fn span_link_set_parse() { for (case, expected) in [ ( - "[0123456789abcdef0123456789abcdef-0123456789abcdef,fedcba9876543210fedcba9876543210-fedcba9876543210]", + format!("{:?}", [link1(), link2()]), set_with_links(), ), ( - "{0123456789abcdef0123456789abcdef-0123456789abcdef,fedcba9876543210fedcba9876543210-fedcba9876543210}", + format!("{:?}", ["0123456789abcdef0123456789abcdef-0123456789abcdef", "fedcba9876543210fedcba9876543210-fedcba9876543210"]), set_with_links(), ), ( - "(0123456789abcdef0123456789abcdef-0123456789abcdef,fedcba9876543210fedcba9876543210-fedcba9876543210)", + format!("{:?}", (link1(), link2())), set_with_links(), ), ( - " [ 0123456789abcdef0123456789abcdef-0123456789abcdef , fedcba9876543210fedcba9876543210-fedcba9876543210 ] ", + format!("{:?}", { + let mut set = BTreeSet::new(); + set.insert(link1()); + set.insert(link2()); + set + }), set_with_links(), ), - ("[]", SpanLinkSet::new()), + ( + format!("{:?}", { + let mut set = BTreeSet::new(); + set.insert("0123456789abcdef0123456789abcdef-0123456789abcdef"); + set.insert("fedcba9876543210fedcba9876543210-fedcba9876543210"); + set + }), + set_with_links(), + ), + ( + " [ 0123456789abcdef0123456789abcdef-0123456789abcdef , fedcba9876543210fedcba9876543210-fedcba9876543210 ] ".to_string(), + set_with_links(), + ), + ( + " [ \"0123456789abcdef0123456789abcdef-0123456789abcdef\" , \"fedcba9876543210fedcba9876543210-fedcba9876543210\" ] ".to_string(), + set_with_links(), + ), + ("[]".to_string(), SpanLinkSet::new()), ] { assert_eq!( Some(expected), - SpanLinkSet::try_from_str(case).ok(), + SpanLinkSet::try_from_str(&case).ok(), "{case}" ); } } - #[test] - fn span_link_set_parse_exotic() { - // Duplicate links produce an error - assert!(SpanLinkSet::try_from_str( - "[0123456789abcdef0123456789abcdef-0123456789abcdef,0123456789abcdef0123456789abcdef-0123456789abcdef]" - ) - .is_err()); - } - #[test] fn span_link_set_to_from_value() { for case in [set_with_links()] { @@ -2113,12 +2124,14 @@ pub mod span_link_set { "<>", "0123456789abcdef0123456789abcdef-0123456789abcdef", "[0123456789abcdef0123456789abcdef-0123456789abcdef,", + "[0123456789abcdef0123456789abcdef-0123456789abcdef)", "[,]", "[,,]", "[invalid]", "[0123456789abcdef0123456789abcdef-0123456789abcdef,invalid]", "{]", "([", + "[0123456789abcdef0123456789abcdef-0123456789abcdef,0123456789abcdef0123456789abcdef-0123456789abcdef]", ] { assert!(SpanLinkSet::try_from_str(case).is_err(), "{case}"); } From e96b752da955f885188f3858915f628c52a72593 Mon Sep 17 00:00:00 2001 From: Ashley Date: Sun, 31 May 2026 09:09:08 +1000 Subject: [PATCH 5/8] stub out some fuzzing infrastructure --- Cargo.toml | 2 +- fuzz/Cargo.toml | 16 +++++++++++ fuzz/README.md | 48 +++++++++++++++++++++++++++++++ fuzz/quickcheck.sh | 8 ++++++ fuzz/src/lib.rs | 64 ++++++++++++++++++++++++++++++++++++++++++ fuzz/timestamp/in/1 | 1 + fuzz/timestamp/main.rs | 27 ++++++++++++++++++ 7 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 fuzz/Cargo.toml create mode 100644 fuzz/README.md create mode 100755 fuzz/quickcheck.sh create mode 100644 fuzz/src/lib.rs create mode 100644 fuzz/timestamp/in/1 create mode 100644 fuzz/timestamp/main.rs diff --git a/Cargo.toml b/Cargo.toml index 6a2750c3d..9f3d4cc00 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ members = [ "examples/trace_zipkin", "examples/metric_prometheus", "examples/high_volume", - "test/ui", + "test/ui", "fuzz", ] [package] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 000000000..3019e348f --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "emit_fuzz" +version = "0.0.0" +edition = "2024" +publish = false + +[[bin]] +name = "fuzz_timestamp" +path = "timestamp/main.rs" + +[dependencies.emit] +path = "../" + +[dependencies.afl] +version = "~0.17" +optional = true diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 000000000..7130b239d --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,48 @@ +# Fuzzing + +This directory contains some fuzz testing infrastructure for `emit`, using AFL++. + +## Adding a new test case + +1. Add a `fuzz/$TARGET_NAME/main.rs`, where `$TARGET_NAME` is a name for the test, like `timestamp`. See existing cases for the content to include in this file. +2. Add a `[[bin]]` to `fuzz/Cargo.toml` with the new target included. +3. Add an entry to the `fuzz/quickcheck.sh` to run it. + +## Running fuzz cases + +You'll need to install `cargo.afl`, which can be done through Cargo: + +```shell +cargo install -f argo-afl +``` + +Then, configure your system for fuzzing: + +```shell +cargo afl config --build --force +cargo afl system-config +``` + +Now you can build the fuzz targets: + +```shell +cargo afl build --manifest-path fuzz/Cargo.toml --features afl +``` + +And run one interactively: + +```shell +cargo afl fuzz -i fuzz/$TARGET_NAME/in -o target/fuzz_$TARGET_NAME target/debug/fuzz_$TARGET_NAME +``` + +where `$TARGET_NAME` is the name of the fuzz test you want to run. This should show you an AFL TUI. + +## Reproducing crashes + +Crashes are saved into the `target` directory and are re-tested by the regular unit tests on each fuzz case. Just run: + +```shell +cargo test -p emit_fuzz +``` + +and any crashes will be re-tested. diff --git a/fuzz/quickcheck.sh b/fuzz/quickcheck.sh new file mode 100755 index 000000000..ff465234b --- /dev/null +++ b/fuzz/quickcheck.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# Linux only +# This is just a quick sanity check for CI. For the simple parsers we're fuzzing, crashes tend to be found _very_ quickly + +cargo afl build --manifest-path fuzz/Cargo.toml --features afl + +AFL_NO_UI=1 AFL_QUIET=1 timeout 30s cargo afl fuzz -i fuzz/timestamp/in -o target/fuzz_timestamp target/debug/fuzz_timestamp diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs new file mode 100644 index 000000000..efe4cb645 --- /dev/null +++ b/fuzz/src/lib.rs @@ -0,0 +1,64 @@ +use std::{fs, io::Read, panic}; + +pub fn main(fuzz_fn: impl Fn(&[u8]) + panic::RefUnwindSafe) { + #[cfg(feature = "afl")] + { + afl::fuzz!(|input: &[u8]| { fuzz_fn(input) }); + } + #[cfg(not(feature = "afl"))] + { + let _ = fuzz_fn; + + panic!("must be run with the `afl` feature") + } +} + +pub fn initial_cases(fuzz_target: &str, fuzz_fn: impl Fn(&[u8]) + panic::RefUnwindSafe) { + let dir = format!("{}/{fuzz_target}/in", env!("CARGO_MANIFEST_DIR"),); + + println!("running cases in {dir}"); + + for input in fs::read_dir(&dir).expect("failed to read inputs directory") { + let input = input.expect("invalid file").path(); + + println!("input: {:?}", input); + + let mut f = fs::File::open(input).expect("failed to open"); + let mut input = Vec::new(); + f.read_to_end(&mut input).expect("failed to read file"); + + fuzz_fn(&input); + } +} + +pub fn repro_cases(fuzz_target: &str, fuzz_fn: impl Fn(&[u8]) + panic::RefUnwindSafe) { + let dir = format!( + "{}/target/{fuzz_target}/default/crashes", + env!("CARGO_MANIFEST_DIR"), + ); + + println!("running cases in {dir}"); + + if let Ok(crashes) = fs::read_dir(&dir) { + let mut failed = false; + for crash in crashes { + let crash = crash.expect("invalid file").path(); + + println!("\n-----\nrepro: {:?}", crash); + + let mut f = fs::File::open(crash).expect("failed to open"); + let mut crash = Vec::new(); + f.read_to_end(&mut crash).expect("failed to read file"); + + if let Err(_) = panic::catch_unwind(|| fuzz_fn(&crash)) { + failed = true; + } + + println!("-----"); + } + + if failed { + panic!("some cases failed (see output above for details)"); + } + } +} diff --git a/fuzz/timestamp/in/1 b/fuzz/timestamp/in/1 new file mode 100644 index 000000000..3c0f15c55 --- /dev/null +++ b/fuzz/timestamp/in/1 @@ -0,0 +1 @@ +2026-05-30T22:32:09.000Z \ No newline at end of file diff --git a/fuzz/timestamp/main.rs b/fuzz/timestamp/main.rs new file mode 100644 index 000000000..a82178ad9 --- /dev/null +++ b/fuzz/timestamp/main.rs @@ -0,0 +1,27 @@ +fn main() { + emit_fuzz::main(de); +} + +pub fn de(input: &[u8]) { + // Just make sure we don't panic + let Ok(input) = std::str::from_utf8(input) else { + return; + }; + + let _ = emit::Timestamp::try_from_str(input); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn initial() { + emit_fuzz::initial_cases("timestamp", de); + } + + #[test] + fn repro() { + emit_fuzz::repro_cases("timestamp", de); + } +} From 350d1c11e53dd792c8fb0ad8383077ff5f70f4d1 Mon Sep 17 00:00:00 2001 From: KodrAus Date: Sun, 31 May 2026 15:20:40 +1000 Subject: [PATCH 6/8] run fuzz canary in CI --- .github/workflows/fuzz.yml | 25 +++++++++++++++++++++++++ Cargo.toml | 2 +- fuzz/.gitignore | 6 ++++++ fuzz/Cargo.toml | 7 ++++++- fuzz/canary.sh | 16 ++++++++++++++++ fuzz/quickcheck.sh | 8 -------- fuzz/src/lib.rs | 23 ++++++++++++++++++++--- 7 files changed, 74 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/fuzz.yml create mode 100644 fuzz/.gitignore create mode 100755 fuzz/canary.sh delete mode 100755 fuzz/quickcheck.sh diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 000000000..f9e7ed44a --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,25 @@ +name: fuzz + +on: [push, pull_request] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout sources + uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab + + - name: Install Rust toolchain + run: rustup default nightly + + - name: Install cargo-afl + run: cargo install cargo-afl + + - name: Configure AFL + run: cargo afl config --build --force && cargo afl system-config + + - name: Canary + run: fuzz/canary.sh diff --git a/Cargo.toml b/Cargo.toml index 9f3d4cc00..6a2750c3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ members = [ "examples/trace_zipkin", "examples/metric_prometheus", "examples/high_volume", - "test/ui", "fuzz", + "test/ui", ] [package] diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 000000000..c0e996797 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,6 @@ +.idea +/target +Cargo.lock +/wip +**/*.rs.bk +*.log diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 3019e348f..23aebe2dc 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -4,8 +4,13 @@ version = "0.0.0" edition = "2024" publish = false +[workspace] + +[features] +force = [] + [[bin]] -name = "fuzz_timestamp" +name = "timestamp" path = "timestamp/main.rs" [dependencies.emit] diff --git a/fuzz/canary.sh b/fuzz/canary.sh new file mode 100755 index 000000000..9e3a18845 --- /dev/null +++ b/fuzz/canary.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -e + +# Linux only +# This is just a quick sanity check for CI. For the simple parsers we're fuzzing, crashes tend to be found _very_ quickly + +cargo test --manifest-path fuzz/Cargo.toml +cargo afl build --manifest-path fuzz/Cargo.toml --features afl + +# Add new fuzz cases here +AFL_NO_UI=1 timeout 17s cargo afl fuzz -i fuzz/timestamp/in -o fuzz/target/timestamp fuzz/target/debug/timestamp > /dev/null 2>&1 & + +echo "waiting for fuzz run..." +sleep 20s + +cargo test --manifest-path fuzz/Cargo.toml --features force diff --git a/fuzz/quickcheck.sh b/fuzz/quickcheck.sh deleted file mode 100755 index ff465234b..000000000 --- a/fuzz/quickcheck.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -# Linux only -# This is just a quick sanity check for CI. For the simple parsers we're fuzzing, crashes tend to be found _very_ quickly - -cargo afl build --manifest-path fuzz/Cargo.toml --features afl - -AFL_NO_UI=1 AFL_QUIET=1 timeout 30s cargo afl fuzz -i fuzz/timestamp/in -o target/fuzz_timestamp target/debug/fuzz_timestamp diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index efe4cb645..0c112a091 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -18,7 +18,10 @@ pub fn initial_cases(fuzz_target: &str, fuzz_fn: impl Fn(&[u8]) + panic::RefUnwi println!("running cases in {dir}"); + let mut any = false; for input in fs::read_dir(&dir).expect("failed to read inputs directory") { + any = true; + let input = input.expect("invalid file").path(); println!("input: {:?}", input); @@ -29,27 +32,36 @@ pub fn initial_cases(fuzz_target: &str, fuzz_fn: impl Fn(&[u8]) + panic::RefUnwi fuzz_fn(&input); } + + assert!(any, "no test cases were executed"); } pub fn repro_cases(fuzz_target: &str, fuzz_fn: impl Fn(&[u8]) + panic::RefUnwindSafe) { let dir = format!( - "{}/target/{fuzz_target}/default/crashes", + "{}/target/{fuzz_target}/default", env!("CARGO_MANIFEST_DIR"), ); println!("running cases in {dir}"); - if let Ok(crashes) = fs::read_dir(&dir) { + if let Ok(crashes) = fs::read_dir(format!("{dir}/crashes")) { let mut failed = false; for crash in crashes { let crash = crash.expect("invalid file").path(); - println!("\n-----\nrepro: {:?}", crash); + if let Some("README.txt") = crash.file_name().and_then(|name| name.to_str()) { + continue; + } + + println!("\n-----\nrepro: {crash:?}"); let mut f = fs::File::open(crash).expect("failed to open"); let mut crash = Vec::new(); f.read_to_end(&mut crash).expect("failed to read file"); + println!("repro: {crash:?}"); + println!("repro: {:?}", String::from_utf8_lossy(&crash)); + if let Err(_) = panic::catch_unwind(|| fuzz_fn(&crash)) { failed = true; } @@ -60,5 +72,10 @@ pub fn repro_cases(fuzz_target: &str, fuzz_fn: impl Fn(&[u8]) + panic::RefUnwind if failed { panic!("some cases failed (see output above for details)"); } + } else { + #[cfg(feature = "force")] + { + assert!(fs::exists(&dir).expect("failed to get file info"), "the {fuzz_target} target didn't execute; this probably means the fuzzing harness is broken"); + } } } From 4e12e66b81b9ab282830e896167408be417bb13c Mon Sep 17 00:00:00 2001 From: Ashley Date: Mon, 1 Jun 2026 20:10:38 +1000 Subject: [PATCH 7/8] tighten up Timestamp parser and semantics --- core/src/timestamp.rs | 155 ++++++++++++++++++++++++++++------------- fuzz/README.md | 4 +- fuzz/timestamp/main.rs | 7 +- 3 files changed, 115 insertions(+), 51 deletions(-) diff --git a/core/src/timestamp.rs b/core/src/timestamp.rs index e065c48e9..347622d11 100644 --- a/core/src/timestamp.rs +++ b/core/src/timestamp.rs @@ -75,13 +75,6 @@ pub struct Parts { pub nanos: u32, } -// 2000-03-01 (mod 400 year, immediately after feb29 -const LEAPOCH_SECS: u64 = 946_684_800 + 86400 * (31 + 29); -const DAYS_PER_400Y: i32 = 365 * 400 + 97; -const DAYS_PER_100Y: i32 = 365 * 100 + 24; -const DAYS_PER_4Y: i32 = 365 * 4 + 1; -const DAYS_IN_MONTH: [u8; 12] = [31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29]; - // 1970-01-01T00:00:00.000000000Z const MIN: Duration = Duration::new(0, 0); @@ -170,11 +163,13 @@ impl Timestamp { /** Try get a timestamp from its individual date and time parts. - If the resulting timestamp is within [`Timestamp::MIN`]..=[`Timestamp::MAX`] then this method will return `Some`. Otherwise it will return `None`. - - If any field of `parts` would overflow its maximum value, such as `days: 32`, then it will wrap into the next unit. + If the resulting timestamp is within [`Timestamp::MIN`]..=[`Timestamp::MAX`] then this method will return `Some`. + If it's outside then it will return `None`. If any of the fields in `parts` is out of range for its type, such as `days: 32`, then this method will also return `None`. */ pub fn from_parts(parts: Parts) -> Option { + const DAYS_IN_MONTH: [u8; 12] = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]; + const LEAP_DAYS_IN_MONTH: [u8; 12] = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let is_leap; let start_of_year; let year = (parts.years as i64) - 1900; @@ -237,6 +232,35 @@ impl Timestamp { + i128::from(leaps * 86400 + 946_684_800 + 86400); } + if parts.months < 1 || parts.months > 12 { + // Invalid month + return None; + } + + if parts.days == 0 + || parts.days + > DAYS_IN_MONTH[usize::from(parts.months - 1)] + + (LEAP_DAYS_IN_MONTH[usize::from(parts.months - 1)] * u8::from(is_leap)) + { + // Invalid days + return None; + } + + if parts.hours > 23 { + // Invalid hour + return None; + } + + if parts.minutes > 59 { + // Invalid minutes + return None; + } + + if parts.seconds > 60 { + // Invalid seconds + return None; + } + let seconds_within_month = 86400 * u32::from(parts.days - 1) + 3600 * u32::from(parts.hours) + 60 * u32::from(parts.minutes) @@ -276,6 +300,13 @@ impl Timestamp { The returned parts are in exactly the form needed to display them. Months and days are both one-based. */ pub fn to_parts(&self) -> Parts { + // 2000-03-01 (mod 400 year, immediately after feb29 + const LEAPOCH_SECS: u64 = 946_684_800 + 86400 * (31 + 29); + const DAYS_PER_400Y: i32 = 365 * 400 + 97; + const DAYS_PER_100Y: i32 = 365 * 100 + 24; + const DAYS_PER_4Y: i32 = 365 * 4 + 1; + const DAYS_IN_MONTH: [u8; 12] = [31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29]; + let dur = self.0; let secs = dur.as_secs(); let nanos = dur.subsec_nanos(); @@ -479,25 +510,51 @@ impl fmt::Display for ParseTimestampError { impl std::error::Error for ParseTimestampError {} fn parse_rfc3339(fmt: &str) -> Result { - if fmt.len() > 30 || fmt.len() < 19 { + fn utf8(fmt: &[u8]) -> Result<&str, ParseTimestampError> { + str::from_utf8(fmt).map_err(|_| ParseTimestampError {}) + } + + let fmt = fmt.as_bytes(); + + if fmt.len() > 30 || fmt.len() < 20 { // Invalid length return Err(ParseTimestampError {}); } - if *fmt.as_bytes().last().unwrap() != b'Z' { + if *fmt.last().unwrap() != b'Z' { // Non-UTC return Err(ParseTimestampError {}); } - let years = u16::from_str_radix(&fmt[0..4], 10).map_err(|_| ParseTimestampError {})?; - let months = u8::from_str_radix(&fmt[5..7], 10).map_err(|_| ParseTimestampError {})?; - let days = u8::from_str_radix(&fmt[8..10], 10).map_err(|_| ParseTimestampError {})?; - let hours = u8::from_str_radix(&fmt[11..13], 10).map_err(|_| ParseTimestampError {})?; - let minutes = u8::from_str_radix(&fmt[14..16], 10).map_err(|_| ParseTimestampError {})?; - let seconds = u8::from_str_radix(&fmt[17..19], 10).map_err(|_| ParseTimestampError {})?; - let nanos = if fmt.len() > 19 { + if [fmt[4], fmt[7], fmt[10], fmt[13], fmt[16]] != [b'-', b'-', b'T', b':', b':'] { + // Invalid format + return Err(ParseTimestampError {}); + } + + if fmt[19] != b'.' && fmt.len() > 20 { + // Invalid format + return Err(ParseTimestampError {}); + } + + let years = u16::from_str_radix(utf8(&fmt[0..4])?, 10).map_err(|_| ParseTimestampError {})?; + + let months = u8::from_str_radix(utf8(&fmt[5..7])?, 10).map_err(|_| ParseTimestampError {})?; + + let days = u8::from_str_radix(utf8(&fmt[8..10])?, 10).map_err(|_| ParseTimestampError {})?; + + let hours = u8::from_str_radix(utf8(&fmt[11..13])?, 10).map_err(|_| ParseTimestampError {})?; + + let minutes = + u8::from_str_radix(utf8(&fmt[14..16])?, 10).map_err(|_| ParseTimestampError {})?; + + let seconds = + u8::from_str_radix(utf8(&fmt[17..19])?, 10).map_err(|_| ParseTimestampError {})?; + + let nanos = if fmt.len() > 20 { let subsecond = &fmt[20..fmt.len() - 1]; - u32::from_str_radix(subsecond, 10).unwrap() * 10u32.pow(9 - subsecond.len() as u32) + + u32::from_str_radix(utf8(subsecond)?, 10).map_err(|_| ParseTimestampError {})? + * 10u32.pow(9 - subsecond.len() as u32) } else { 0 }; @@ -587,18 +644,31 @@ mod tests { #[test] fn roundtrip() { - let ts = Timestamp::from_unix(Duration::new(1691961703, 17532)).unwrap(); - - let fmt = ts.to_string(); - - for parsed in [ - Timestamp::try_from_str(&fmt), - Timestamp::parse(&fmt), - fmt.parse(), - ] { - let parsed = parsed.unwrap(); - - assert_eq!(ts, parsed, "{}", fmt); + for days in 1..=31 { + for months in 1..=12 { + for years in [2019, 2020] { + let Some(ts) = Timestamp::from_parts(Parts { + years, + months, + days, + ..Default::default() + }) else { + continue; + }; + + let fmt = ts.to_string(); + + for parsed in [ + Timestamp::try_from_str(&fmt), + Timestamp::parse(&fmt), + fmt.parse(), + ] { + let parsed = parsed.unwrap(); + + assert_eq!(ts, parsed, "{fmt}"); + } + } + } } } @@ -610,6 +680,10 @@ mod tests { "2024-01-01T00:00:00.00000000000000000000000000Z", "2024-01-01T00:00:00.000+10", "Thursday, September 12, 2024", + "2026-02,71N86;\0ʚ;8,111Z", + "0000-00-00T00:00:00.000000000Z", + "2026-05-30T22:32:09.0A0Z", + "2026-05-30T22:32:0Z", ] { assert!(Timestamp::try_from_str(case).is_err()); assert!(Timestamp::parse(case).is_err()); @@ -650,7 +724,7 @@ mod tests { #[test] fn parts_overflow() { - let ts = Timestamp::from_parts(Parts { + assert!(Timestamp::from_parts(Parts { years: 2000, months: 13, days: 32, @@ -659,20 +733,7 @@ mod tests { seconds: 61, nanos: 1000000000, }) - .unwrap(); - - let expected = Timestamp::from_parts(Parts { - years: 2000, - months: 13, - days: 32, - hours: 25, - minutes: 61, - seconds: 62, - nanos: 0, - }) - .unwrap(); - - assert_eq!(expected, ts); + .is_none(),); } #[test] diff --git a/fuzz/README.md b/fuzz/README.md index 7130b239d..69fe4dfa6 100644 --- a/fuzz/README.md +++ b/fuzz/README.md @@ -32,7 +32,7 @@ cargo afl build --manifest-path fuzz/Cargo.toml --features afl And run one interactively: ```shell -cargo afl fuzz -i fuzz/$TARGET_NAME/in -o target/fuzz_$TARGET_NAME target/debug/fuzz_$TARGET_NAME +cargo afl fuzz -i fuzz/$TARGET_NAME/in -o fuzz/target/$TARGET_NAME fuzz/target/debug/$TARGET_NAME ``` where `$TARGET_NAME` is the name of the fuzz test you want to run. This should show you an AFL TUI. @@ -42,7 +42,7 @@ where `$TARGET_NAME` is the name of the fuzz test you want to run. This should s Crashes are saved into the `target` directory and are re-tested by the regular unit tests on each fuzz case. Just run: ```shell -cargo test -p emit_fuzz +cargo test --manifest-path fuzz/Cargo.toml ``` and any crashes will be re-tested. diff --git a/fuzz/timestamp/main.rs b/fuzz/timestamp/main.rs index a82178ad9..4955248f9 100644 --- a/fuzz/timestamp/main.rs +++ b/fuzz/timestamp/main.rs @@ -3,12 +3,15 @@ fn main() { } pub fn de(input: &[u8]) { - // Just make sure we don't panic let Ok(input) = std::str::from_utf8(input) else { return; }; - let _ = emit::Timestamp::try_from_str(input); + let Ok(ts) = emit::Timestamp::try_from_str(input) else { + return; + }; + + assert_eq!(ts, emit::Timestamp::try_from_str(&ts.to_string()).unwrap()); } #[cfg(test)] From 056c7fa16c1883f24c7cfe698a8f151e3af3f4ac Mon Sep 17 00:00:00 2001 From: Ashley Date: Mon, 1 Jun 2026 21:27:19 +1000 Subject: [PATCH 8/8] add fuzz tests for set types and fix some indexing bugs --- fuzz/Cargo.toml | 8 ++++++++ fuzz/bucket_set/in/1 | 1 + fuzz/bucket_set/in/2 | 1 + fuzz/bucket_set/in/3 | 1 + fuzz/bucket_set/in/4 | 1 + fuzz/bucket_set/main.rs | 30 ++++++++++++++++++++++++++++++ fuzz/canary.sh | 2 ++ fuzz/span_link_set/in/1 | 1 + fuzz/span_link_set/in/2 | 1 + fuzz/span_link_set/in/3 | 1 + fuzz/span_link_set/main.rs | 30 ++++++++++++++++++++++++++++++ src/span.rs | 15 ++++++++------- 12 files changed, 85 insertions(+), 7 deletions(-) create mode 100644 fuzz/bucket_set/in/1 create mode 100644 fuzz/bucket_set/in/2 create mode 100644 fuzz/bucket_set/in/3 create mode 100644 fuzz/bucket_set/in/4 create mode 100644 fuzz/bucket_set/main.rs create mode 100644 fuzz/span_link_set/in/1 create mode 100644 fuzz/span_link_set/in/2 create mode 100644 fuzz/span_link_set/in/3 create mode 100644 fuzz/span_link_set/main.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 23aebe2dc..edda417d2 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -13,6 +13,14 @@ force = [] name = "timestamp" path = "timestamp/main.rs" +[[bin]] +name = "bucket_set" +path = "bucket_set/main.rs" + +[[bin]] +name = "span_link_set" +path = "span_link_set/main.rs" + [dependencies.emit] path = "../" diff --git a/fuzz/bucket_set/in/1 b/fuzz/bucket_set/in/1 new file mode 100644 index 000000000..6382feb84 --- /dev/null +++ b/fuzz/bucket_set/in/1 @@ -0,0 +1 @@ +[[1.23456789, 1], [2, 2]] \ No newline at end of file diff --git a/fuzz/bucket_set/in/2 b/fuzz/bucket_set/in/2 new file mode 100644 index 000000000..fdaac4400 --- /dev/null +++ b/fuzz/bucket_set/in/2 @@ -0,0 +1 @@ +{{1.23456789, 1}, {2.0, 2}} \ No newline at end of file diff --git a/fuzz/bucket_set/in/3 b/fuzz/bucket_set/in/3 new file mode 100644 index 000000000..a3d99a2a0 --- /dev/null +++ b/fuzz/bucket_set/in/3 @@ -0,0 +1 @@ +{1.23456789: 1, 2.0: 2} \ No newline at end of file diff --git a/fuzz/bucket_set/in/4 b/fuzz/bucket_set/in/4 new file mode 100644 index 000000000..5ecf73e96 --- /dev/null +++ b/fuzz/bucket_set/in/4 @@ -0,0 +1 @@ +((1.23456789, 1), (2.0, 2)) \ No newline at end of file diff --git a/fuzz/bucket_set/main.rs b/fuzz/bucket_set/main.rs new file mode 100644 index 000000000..488f5f727 --- /dev/null +++ b/fuzz/bucket_set/main.rs @@ -0,0 +1,30 @@ +fn main() { + emit_fuzz::main(de); +} + +pub fn de(input: &[u8]) { + let Ok(input) = std::str::from_utf8(input) else { + return; + }; + + let Ok(set) = emit::metric::exp::BucketSet::try_from_str(input) else { + return; + }; + + assert_eq!(set, emit::metric::exp::BucketSet::try_from_str(&set.to_string()).unwrap()); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn initial() { + emit_fuzz::initial_cases("bucket_set", de); + } + + #[test] + fn repro() { + emit_fuzz::repro_cases("bucket_set", de); + } +} diff --git a/fuzz/canary.sh b/fuzz/canary.sh index 9e3a18845..717f95a12 100755 --- a/fuzz/canary.sh +++ b/fuzz/canary.sh @@ -9,6 +9,8 @@ cargo afl build --manifest-path fuzz/Cargo.toml --features afl # Add new fuzz cases here AFL_NO_UI=1 timeout 17s cargo afl fuzz -i fuzz/timestamp/in -o fuzz/target/timestamp fuzz/target/debug/timestamp > /dev/null 2>&1 & +AFL_NO_UI=1 timeout 17s cargo afl fuzz -i fuzz/bucket_set/in -o fuzz/target/bucket_set fuzz/target/debug/bucket_set > /dev/null 2>&1 & +AFL_NO_UI=1 timeout 17s cargo afl fuzz -i fuzz/span_link_set/in -o fuzz/target/span_link_set fuzz/target/debug/span_link_set > /dev/null 2>&1 & echo "waiting for fuzz run..." sleep 20s diff --git a/fuzz/span_link_set/in/1 b/fuzz/span_link_set/in/1 new file mode 100644 index 000000000..e6f462621 --- /dev/null +++ b/fuzz/span_link_set/in/1 @@ -0,0 +1 @@ +[0123456789abcdef0123456789abcdef-0123456789abcdef, "fedcba9876543210fedcba9876543210-fedcba9876543210"] \ No newline at end of file diff --git a/fuzz/span_link_set/in/2 b/fuzz/span_link_set/in/2 new file mode 100644 index 000000000..45260866f --- /dev/null +++ b/fuzz/span_link_set/in/2 @@ -0,0 +1 @@ +{0123456789abcdef0123456789abcdef-0123456789abcdef, "fedcba9876543210fedcba9876543210-fedcba9876543210"} \ No newline at end of file diff --git a/fuzz/span_link_set/in/3 b/fuzz/span_link_set/in/3 new file mode 100644 index 000000000..44beabd89 --- /dev/null +++ b/fuzz/span_link_set/in/3 @@ -0,0 +1 @@ +(0123456789abcdef0123456789abcdef-0123456789abcdef, "fedcba9876543210fedcba9876543210-fedcba9876543210") \ No newline at end of file diff --git a/fuzz/span_link_set/main.rs b/fuzz/span_link_set/main.rs new file mode 100644 index 000000000..2b187e005 --- /dev/null +++ b/fuzz/span_link_set/main.rs @@ -0,0 +1,30 @@ +fn main() { + emit_fuzz::main(de); +} + +pub fn de(input: &[u8]) { + let Ok(input) = std::str::from_utf8(input) else { + return; + }; + + let Ok(set) = emit::span::SpanLinkSet::try_from_str(input) else { + return; + }; + + assert_eq!(set, emit::span::SpanLinkSet::try_from_str(&set.to_string()).unwrap()); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn initial() { + emit_fuzz::initial_cases("span_link_set", de); + } + + #[test] + fn repro() { + emit_fuzz::repro_cases("span_link_set", de); + } +} diff --git a/src/span.rs b/src/span.rs index f97608e9c..87663ccb6 100644 --- a/src/span.rs +++ b/src/span.rs @@ -1193,11 +1193,6 @@ pub mod span_link_set { let mut first = true; while s.len() > 1 { - if s.len() < 49 { - // Unexpected EOF parsing link: not enough chars for `$link` - return Err(ParseSpanLinkSetError {}); - } - // Parse each link if !first { if s.first() != Some(&b',') { @@ -1223,6 +1218,11 @@ pub mod span_link_set { link } _ => { + if s.len() < 49 { + // Unexpected EOF parsing link: not enough chars for `$link` + return Err(ParseSpanLinkSetError {}); + } + // Parse an unquoted span link let link = &s[0..49]; s = &s[49..]; @@ -2027,11 +2027,11 @@ pub mod span_link_set { set_with_links(), ), ( - " [ 0123456789abcdef0123456789abcdef-0123456789abcdef , fedcba9876543210fedcba9876543210-fedcba9876543210 ] ".to_string(), + "[ 0123456789abcdef0123456789abcdef-0123456789abcdef , fedcba9876543210fedcba9876543210-fedcba9876543210 ]".to_string(), set_with_links(), ), ( - " [ \"0123456789abcdef0123456789abcdef-0123456789abcdef\" , \"fedcba9876543210fedcba9876543210-fedcba9876543210\" ] ".to_string(), + "[ \"0123456789abcdef0123456789abcdef-0123456789abcdef\" , \"fedcba9876543210fedcba9876543210-fedcba9876543210\" ]".to_string(), set_with_links(), ), ("[]".to_string(), SpanLinkSet::new()), @@ -2132,6 +2132,7 @@ pub mod span_link_set { "{]", "([", "[0123456789abcdef0123456789abcdef-0123456789abcdef,0123456789abcdef0123456789abcdef-0123456789abcdef]", + "{0123456789abcdef0123456789Cbcdef-0123456A89abcdef, fba9876543210fedcba9876543210-fedcba9876543210\"}", ] { assert!(SpanLinkSet::try_from_str(case).is_err(), "{case}"); }