From 72f4f5ee84bfbaae9ca97cf5a0bef708197a83ab Mon Sep 17 00:00:00 2001 From: Arunesh Dwivedi Date: Sat, 6 Jun 2026 06:48:58 +0000 Subject: [PATCH] fix: prevent panic on multi-byte UTF-8 in SQL obfuscation When the SQL tokenizer returns an error location that falls in the middle of a multi-byte UTF-8 character (e.g., Chinese, Japanese), byte_offset would not be at a char boundary, causing a panic at &sql[..byte_offset]. Use str::floor_char_boundary() to clamp the offset to the nearest valid char boundary before slicing. Fixes deepflowio/deepflow#11791 --- agent/src/flow_generator/protocol_logs/sql/sql_obfuscate.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/agent/src/flow_generator/protocol_logs/sql/sql_obfuscate.rs b/agent/src/flow_generator/protocol_logs/sql/sql_obfuscate.rs index 0cfc9b3b8e8..02d0c3d0214 100644 --- a/agent/src/flow_generator/protocol_logs/sql/sql_obfuscate.rs +++ b/agent/src/flow_generator/protocol_logs/sql/sql_obfuscate.rs @@ -94,7 +94,8 @@ impl Obfuscator { .map(|l| l.len()) .sum::() + (location.column as usize).saturating_sub(1); - let truncated = &sql[..byte_offset.min(sql.len())]; + let byte_offset = byte_offset.min(sql.len()); + let truncated = &sql[..sql.floor_char_boundary(byte_offset)]; let mut tokens = Tokenizer::new(&dialect, truncated) .with_unescape(false) .tokenize()?;