|
| 1 | +<?php |
| 2 | + |
| 3 | +/* |
| 4 | + * SPDX-FileCopyrightText: (c) Respect Project Contributors |
| 5 | + * SPDX-License-Identifier: ISC |
| 6 | + * SPDX-FileContributor: Alexandre Gomes Gaigalas <alganet@gmail.com> |
| 7 | + */ |
| 8 | + |
| 9 | +declare(strict_types=1); |
| 10 | + |
| 11 | +namespace Respect\StringFormatter\Internal; |
| 12 | + |
| 13 | +use Respect\StringFormatter\InvalidFormatterException; |
| 14 | + |
| 15 | +use function array_keys; |
| 16 | +use function count; |
| 17 | +use function implode; |
| 18 | +use function mb_strtolower; |
| 19 | +use function mb_strtoupper; |
| 20 | +use function mb_substr; |
| 21 | +use function preg_match; |
| 22 | +use function preg_match_all; |
| 23 | +use function sprintf; |
| 24 | +use function str_starts_with; |
| 25 | +use function strtolower; |
| 26 | +use function substr; |
| 27 | + |
| 28 | +use const PREG_OFFSET_CAPTURE; |
| 29 | + |
| 30 | +final class CompiledPattern |
| 31 | +{ |
| 32 | + private const array FILTERS = [ |
| 33 | + '#' => '.', |
| 34 | + '0' => '\p{N}', |
| 35 | + 'A' => '\p{Lu}', |
| 36 | + 'a' => '\p{Ll}', |
| 37 | + 'C' => '\p{L}', |
| 38 | + 'W' => '\p{L}|\p{N}', |
| 39 | + ]; |
| 40 | + |
| 41 | + private const array TRANSFORM_MAP = ['l' => 'lower', 'u' => 'upper', 'i' => 'invert']; |
| 42 | + |
| 43 | + /** @var array<string, CompiledPattern> */ |
| 44 | + private static array $compiledPatterns = []; |
| 45 | + |
| 46 | + /** @var array<string, string> */ |
| 47 | + private static array $compiledQualifiers = []; |
| 48 | + |
| 49 | + /** @param array<int, array{filter: string, transform: string|null}> $instructions */ |
| 50 | + private function __construct( |
| 51 | + private(set) readonly string $pattern, |
| 52 | + private(set) readonly string $search, |
| 53 | + private(set) readonly string $replacement, |
| 54 | + private(set) readonly array $instructions, |
| 55 | + ) { |
| 56 | + } |
| 57 | + |
| 58 | + public static function compile(string $pattern): self |
| 59 | + { |
| 60 | + if (isset(self::$compiledPatterns[$pattern])) { |
| 61 | + return self::$compiledPatterns[$pattern]; |
| 62 | + } |
| 63 | + |
| 64 | + if ($pattern === '') { |
| 65 | + throw new InvalidFormatterException('Pattern cannot be empty'); |
| 66 | + } |
| 67 | + |
| 68 | + $search = ''; |
| 69 | + $replacement = ''; |
| 70 | + $instructions = []; |
| 71 | + $groupIndex = 1; |
| 72 | + |
| 73 | + $transformState = null; |
| 74 | + $nextTransform = null; |
| 75 | + |
| 76 | + preg_match_all(sprintf( |
| 77 | + '/(?:\\\\.|[%1$s]|(?:\{[^}]*\}|[*+?])|[^\\\%1$s{}+*?]+|.)/u', |
| 78 | + implode('', array_keys(self::FILTERS)), |
| 79 | + ), $pattern, $tokens, PREG_OFFSET_CAPTURE); |
| 80 | + |
| 81 | + $tokenList = $tokens[0]; |
| 82 | + $count = count($tokenList); |
| 83 | + |
| 84 | + for ($i = 0; $i < $count; $i++) { |
| 85 | + [$tokenText, $offset] = $tokenList[$i]; |
| 86 | + |
| 87 | + if (str_starts_with($tokenText, '\\')) { |
| 88 | + if ($tokenText === '\\') { |
| 89 | + throw new InvalidFormatterException('Incomplete escape sequence at end of pattern'); |
| 90 | + } |
| 91 | + |
| 92 | + $char = mb_substr($tokenText, 1); |
| 93 | + |
| 94 | + if ($char === 'd') { |
| 95 | + $inner = '.'; |
| 96 | + $search .= sprintf('((?:.*?%s){0,1})', $inner); |
| 97 | + $replacement .= sprintf('%%%d$', $groupIndex); |
| 98 | + $instructions[$groupIndex] = ['filter' => sprintf('/%s/u', $inner), 'transform' => 'delete']; |
| 99 | + $groupIndex++; |
| 100 | + continue; |
| 101 | + } |
| 102 | + |
| 103 | + if ($char === 'E') { |
| 104 | + $transformState = null; |
| 105 | + continue; |
| 106 | + } |
| 107 | + |
| 108 | + if (isset(self::TRANSFORM_MAP[$char])) { |
| 109 | + $nextTransform = self::TRANSFORM_MAP[$char]; |
| 110 | + continue; |
| 111 | + } |
| 112 | + |
| 113 | + $lowerChar = strtolower($char); |
| 114 | + if (isset(self::TRANSFORM_MAP[$lowerChar]) && $char !== $lowerChar) { |
| 115 | + $transformState = self::TRANSFORM_MAP[$lowerChar]; |
| 116 | + continue; |
| 117 | + } |
| 118 | + |
| 119 | + $replacement .= $char; |
| 120 | + continue; |
| 121 | + } |
| 122 | + |
| 123 | + if (isset(self::FILTERS[$tokenText])) { |
| 124 | + $filterChar = $tokenText; |
| 125 | + $regexQuantifier = '{0,1}'; |
| 126 | + |
| 127 | + if (isset($tokenList[$i + 1]) && preg_match('/^(?:\{[^}]*\}|[*+?])$/u', $tokenList[$i + 1][0])) { |
| 128 | + $i++; |
| 129 | + $regexQuantifier = self::compileQualifier($tokenList[$i][0], $tokenList[$i][1]); |
| 130 | + } |
| 131 | + |
| 132 | + $inner = self::FILTERS[$filterChar]; |
| 133 | + $search .= sprintf('((?:.*?%s)%s)', $inner, $regexQuantifier); |
| 134 | + |
| 135 | + $replacement .= sprintf('%%%d$', $groupIndex); |
| 136 | + $instructions[$groupIndex] = [ |
| 137 | + 'filter' => sprintf('/%s/u', $inner), |
| 138 | + 'transform' => $nextTransform ?? $transformState, |
| 139 | + ]; |
| 140 | + |
| 141 | + $groupIndex++; |
| 142 | + $nextTransform = null; |
| 143 | + continue; |
| 144 | + } |
| 145 | + |
| 146 | + if (preg_match('/^(?:\{[^}]*\}|[*+?])$/u', $tokenText)) { |
| 147 | + throw new InvalidFormatterException( |
| 148 | + sprintf('Quantifier "%s" must follow a filter pattern at position %d', $tokenText[0], $offset), |
| 149 | + ); |
| 150 | + } |
| 151 | + |
| 152 | + if (str_starts_with($tokenText, '{')) { |
| 153 | + throw new InvalidFormatterException( |
| 154 | + sprintf('Invalid or malformed quantifier at position %d', $offset), |
| 155 | + ); |
| 156 | + } |
| 157 | + |
| 158 | + $replacement .= $tokenText; |
| 159 | + } |
| 160 | + |
| 161 | + return self::$compiledPatterns[$pattern] = new self( |
| 162 | + $pattern, |
| 163 | + '/^' . $search . '/us', |
| 164 | + $replacement, |
| 165 | + $instructions, |
| 166 | + ); |
| 167 | + } |
| 168 | + |
| 169 | + public static function transform(string $val, string|null $transform): string |
| 170 | + { |
| 171 | + return match ($transform) { |
| 172 | + 'delete' => '', |
| 173 | + 'lower' => mb_strtolower($val), |
| 174 | + 'upper' => mb_strtoupper($val), |
| 175 | + 'invert' => mb_strtolower($val) ^ mb_strtoupper($val) ^ $val, |
| 176 | + default => $val, |
| 177 | + }; |
| 178 | + } |
| 179 | + |
| 180 | + private static function compileQualifier(string $token, int $offset): string |
| 181 | + { |
| 182 | + if (isset(self::$compiledQualifiers[$token])) { |
| 183 | + return self::$compiledQualifiers[$token]; |
| 184 | + } |
| 185 | + |
| 186 | + if ($token === '*') { |
| 187 | + return '*'; |
| 188 | + } |
| 189 | + |
| 190 | + if ($token === '+') { |
| 191 | + return '{1,}'; |
| 192 | + } |
| 193 | + |
| 194 | + $content = substr($token, 1, -1); |
| 195 | + if ($content === '' || $content === ',' || !preg_match('/^(\d+(?:,\d*)?|,\d+)$/', $content)) { |
| 196 | + throw new InvalidFormatterException(sprintf('Invalid or malformed quantifier at position %d', $offset)); |
| 197 | + } |
| 198 | + |
| 199 | + preg_match('/^\{(\d*)(?:,(\d*))?\}$/', $token, $m); |
| 200 | + $max = $m[2] ?? $m[1]; |
| 201 | + |
| 202 | + return self::$compiledQualifiers[$token] = $max === '' ? '*' : sprintf('{0,%s}', $max); |
| 203 | + } |
| 204 | +} |
0 commit comments