From 130e396f2b033c043cfc5dffef3ec344ae84ddb2 Mon Sep 17 00:00:00 2001 From: Andy Pfister Date: Mon, 30 Mar 2026 16:53:07 +0200 Subject: [PATCH] Optimise `unescape` --- lib/rdf/ntriples/reader.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/rdf/ntriples/reader.rb b/lib/rdf/ntriples/reader.rb index 8d7df287..5bf0afbd 100644 --- a/lib/rdf/ntriples/reader.rb +++ b/lib/rdf/ntriples/reader.rb @@ -178,6 +178,8 @@ def self.parse_literal(input, **options) ESCAPE_CHARS_ESCAPED_REGEXP = Regexp.union( ESCAPE_CHARS_ESCAPED.keys ).freeze + # Combined pattern for a single-pass unescape (UCHAR first, then escape chars) + UNESCAPE_COMBINED = Regexp.union(UCHAR, ESCAPE_CHARS_ESCAPED_REGEXP).freeze ## # @param [String] string @@ -190,11 +192,13 @@ def self.unescape(string) # greatly reduces the number of allocations and the processing time. string = string.dup.force_encoding(Encoding::UTF_8) unless string.encoding == Encoding::UTF_8 - string - .gsub(UCHAR) do - [($1 || $2).hex].pack('U*') - end - .gsub(ESCAPE_CHARS_ESCAPED_REGEXP, ESCAPE_CHARS_ESCAPED) + # Early return when nothing to unescape: avoids string allocation entirely. + return string unless string.match?(UNESCAPE_COMBINED) + + # Single pass handles both \uXXXX/\UXXXXXXXX and backslash escape chars. + string.gsub(UNESCAPE_COMBINED) do |match| + ($1 || $2) ? [($1 || $2).hex].pack('U*') : ESCAPE_CHARS_ESCAPED[match] + end end ##