diff --git a/core/String.carp b/core/String.carp index 41a595ae9..3c946b504 100644 --- a/core/String.carp +++ b/core/String.carp @@ -50,7 +50,7 @@ (doc from-chars "Creates a string from an array of characters.") (register from-chars (Fn [&(Array Char)] String)) (doc tail "Returns the tail of a string (all characters except the first).") - (register tail (λ [(Ref String)] String)) + (register tail (Fn [&String] String)) (doc format "Formats a string.") (register format (Fn [&String &String] String)) (implements format String.format) @@ -133,15 +133,9 @@ (from-chars &(Array.suffix &(chars s) b))) (doc starts-with? "Check if the string `s` begins with the string `sub`.") - (defn starts-with? [s sub] - (let [ls (length sub)] - (and (>= (length s) ls) (= sub &(prefix s ls))))) - + (register starts-with? (Fn [&String &String] Bool)) (doc ends-with? "Check if the string `s` ends with the string `sub`.") - (defn ends-with? [s sub] - (let [ls (length s) - lsub (length sub)] - (and (>= ls lsub) (= sub &(suffix s (- ls lsub)))))) + (register ends-with? (Fn [&String &String] Bool)) (doc zero "The empty string.") (defn zero [] @"") @@ -240,6 +234,29 @@ (doc lines "splits a string into lines.") (defn lines [s] (split-by s &[\newline])) + + (doc copy-map "Maps a function over the UTF-8 characters (`chars`) of a string, returning a new string.") + (defn copy-map [f s] + (from-chars &(Array.copy-map f &(chars s)))) + + (doc copy-filter "Filters the UTF-8 characters (`chars`) of a string using a predicate, returning a new string.") + (defn copy-filter [f s] + (from-chars &(Array.copy-filter f &(chars s)))) + + (doc any? "Checks if any UTF-8 character (`chars`) in a string satisfies a predicate.") + (defn any? [f s] + (Array.any? f &(chars s))) + + (doc all? "Checks if all UTF-8 characters (`chars`) in a string satisfy a predicate.") + (defn all? [f s] + (Array.all? f &(chars s))) + + (doc reduce "Reduces a string using its UTF-8 characters (`chars`), a function and an initial value.") + (defn reduce [f init s] + (Array.reduce f init &(chars s))) + + (doc replace "Replaces all occurrences of a literal substring in a string with another string.") + (register replace (Fn [&String &String &String] String)) ) (defmodule StringCopy @@ -573,7 +590,6 @@ ) (defmodule Uint32Extra - (doc str "Converts a Uint32 reference to a string.") (defn str [a] (Uint32.str @a)) (implements str Uint32Extra.str) (doc prn "Converts a Uint32 reference to a string for printing.") diff --git a/core/carp_string.h b/core/carp_string.h index 631fd088e..02c4fafbc 100644 --- a/core/carp_string.h +++ b/core/carp_string.h @@ -191,6 +191,20 @@ String String_tail(const String* s) { return news; } +bool String_starts_MINUS_with_QMARK_(const String* s, const String* sub) { + size_t ls = strlen(*s); + size_t lsub = strlen(*sub); + if (lsub > ls) return false; + return memcmp(*s, *sub, lsub) == 0; +} + +bool String_ends_MINUS_with_QMARK_(const String* s, const String* sub) { + size_t ls = strlen(*s); + size_t lsub = strlen(*sub); + if (lsub > ls) return false; + return memcmp(*s + (ls - lsub), *sub, lsub) == 0; +} + String String_empty() { String s = CARP_MALLOC(1); s[0] = '\0'; @@ -394,6 +408,49 @@ int String_index_MINUS_of_MINUS_string(const String* s, const String* needle) { return (int)(result - *s); } +String String_replace(const String* s, const String* needle, const String* replacement) { + size_t needle_len = strlen(*needle); + if (needle_len == 0) { + return String_copy(s); + } + + size_t s_len = strlen(*s); + size_t replacement_len = strlen(*replacement); + + // First pass: count occurrences + size_t count = 0; + const char* tmp = *s; + while ((tmp = strstr(tmp, *needle))) { + count++; + tmp += needle_len; + } + + if (count == 0) { + return String_copy(s); + } + + // Allocate once + size_t new_len = s_len + (count * replacement_len) - (count * needle_len); + String result = CARP_MALLOC(new_len + 1); + if (result == NULL) return NULL; + + // Second pass: fill the buffer + char* insert_ptr = result; + const char* current_ptr = *s; + while (count--) { + const char* next_needle = strstr(current_ptr, *needle); + size_t skipped_len = next_needle - current_ptr; + memcpy(insert_ptr, current_ptr, skipped_len); + insert_ptr += skipped_len; + memcpy(insert_ptr, *replacement, replacement_len); + insert_ptr += replacement_len; + current_ptr = next_needle + needle_len; + } + strcpy(insert_ptr, current_ptr); + + return result; +} + String Pointer_strp(void* in) { int size = snprintf(NULL, 0, "%p", in) + 1; String buffer = CARP_MALLOC(size); diff --git a/test/string.carp b/test/string.carp index 47c14ae04..f9f2ddbbb 100644 --- a/test/string.carp +++ b/test/string.carp @@ -400,4 +400,69 @@ 3 (Dynamic.length (Dynamic.String.to-array "abc")) "Dynamic.String.to-array works as expected (check length)") + + (assert-equal test + "bcde" + &(copy-map &(fn [c] (Char.from-int (Int.inc (Char.to-int @c)))) "abcd") + "copy-map works as expected") + (assert-equal test + "bd" + &(copy-filter &(fn [c] (or (= @c \b) (= @c \d))) "abcd") + "copy-filter works as expected") + (assert-true test + (any? &(fn [c] (= @c \c)) "abcd") + "any? works as expected I") + (assert-false test + (any? &(fn [c] (= @c \z)) "abcd") + "any? works as expected II") + (assert-true test + (all? &(fn [c] (not (= @c \z))) "abcd") + "all? works as expected I") + (assert-false test + (all? &(fn [c] (= @c \a)) "abcd") + "all? works as expected II") + (assert-equal test + 4 + (reduce &(fn [acc c] (Int.inc acc)) 0 "abcd") + "reduce works as expected (counting characters)") + (assert-equal test + "hEllo world" + &(replace "hello world" "e" "E") + "replace works as expected I") + (assert-equal test + "hi there hi" + &(replace "hello there hello" "hello" "hi") + "replace works as expected II") + (assert-equal test + "aaaaa" + &(replace "aba" "b" "aaa") + "replace works as expected III") + (assert-equal test + "xyz" + &(replace "xyz" "" "abc") + "replace with empty needle returns original string") + (assert-equal test + "" + &(replace "aaaaa" "a" "") + "replace with empty replacement works") + (assert-equal test + "hello world" + &(replace "hello world" "universe" "planet") + "replace with non-existent needle returns copy") + (assert-equal test + "abc" + &(replace "xyz" "xyz" "abc") + "replace whole string works") + (assert-equal test + "bba" + &(replace "aaaaa" "aa" "b") + "replace multiple overlapping-style patterns works (non-overlapping)") + (assert-equal test + "!!!hello!!!" + &(replace " hello " " " "!!!") + "replace at start and end works") + (assert-equal test + "hello 🪐 world" + &(replace "hello 🌏 world" "🌏" "🪐") + "replace works with multi-byte Unicode characters") )