Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ lazy val main = (project in file("sjsonnet"))
"org.scala-lang.modules" %% "scala-collection-compat" % "2.11.0",
"org.tukaani" % "xz" % "1.8",
"org.yaml" % "snakeyaml" % "1.33",
"com.google.re2j" % "re2j" % "1.7",
),
libraryDependencies ++= Seq(
"com.lihaoyi" %% "utest" % "0.8.2",
Expand Down
3 changes: 2 additions & 1 deletion build.sc
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ object sjsonnet extends Module {
ivy"org.json:json:20240303",
ivy"org.tukaani:xz::1.10",
ivy"org.lz4:lz4-java::1.8.0",
ivy"org.yaml:snakeyaml::1.33"
ivy"org.yaml:snakeyaml::1.33",
ivy"com.google.re2j:re2j:1.7",
)
def scalacOptions = Seq("-opt:l:inline", "-opt-inline-from:sjsonnet.**")

Expand Down
10 changes: 10 additions & 0 deletions sjsonnet/src-js/sjsonnet/Platform.scala
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
package sjsonnet

import java.io.File
import java.util
import java.util.regex.Pattern


object Platform {
def gzipBytes(s: Array[Byte]): String = {
throw new Exception("GZip not implemented in Scala.js")
Expand Down Expand Up @@ -34,4 +39,9 @@ object Platform {
def hashFile(file: File): String = {
throw new Exception("hashFile not implemented in Scala.js")
}

private val regexCache = new util.concurrent.ConcurrentHashMap[String, Pattern]
def getPatternFromCache(pat: String) : Pattern = regexCache.computeIfAbsent(pat, _ => Pattern.compile(pat))

def regexQuote(s: String): String = Pattern.quote(s)
}
8 changes: 8 additions & 0 deletions sjsonnet/src-jvm/sjsonnet/Platform.scala
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
package sjsonnet

import java.io.{BufferedInputStream, ByteArrayOutputStream, File, FileInputStream}
import java.util
import java.util.Base64
import java.util.zip.GZIPOutputStream
import com.google.re2j.Pattern
import net.jpountz.xxhash.{StreamingXXHash64, XXHashFactory}
import org.json.{JSONArray, JSONObject}
import org.tukaani.xz.LZMA2Options
import org.tukaani.xz.XZOutputStream
import org.yaml.snakeyaml.{LoaderOptions, Yaml}
import org.yaml.snakeyaml.constructor.SafeConstructor

import scala.jdk.CollectionConverters._

object Platform {
Expand Down Expand Up @@ -107,4 +110,9 @@ object Platform {

hash.getValue.toString
}

private val regexCache = new util.concurrent.ConcurrentHashMap[String, Pattern]
def getPatternFromCache(pat: String) : Pattern = regexCache.computeIfAbsent(pat, _ => Pattern.compile(pat))

def regexQuote(s: String): String = Pattern.quote(s)
}
7 changes: 7 additions & 0 deletions sjsonnet/src-native/sjsonnet/Platform.scala
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package sjsonnet

import java.io.{ByteArrayOutputStream, File}
import java.util
import java.util.Base64
import java.util.zip.GZIPOutputStream
import java.util.regex.Pattern

object Platform {
def gzipBytes(b: Array[Byte]): String = {
Expand Down Expand Up @@ -50,4 +52,9 @@ object Platform {
// File hashes in Scala Native are just the file content
scala.io.Source.fromFile(file).mkString
}

private val regexCache = new util.concurrent.ConcurrentHashMap[String, Pattern]
def getPatternFromCache(pat: String) : Pattern = regexCache.computeIfAbsent(pat, _ => Pattern.compile(pat))

def regexQuote(s: String): String = Pattern.quote(s)
}
4 changes: 1 addition & 3 deletions sjsonnet/src/sjsonnet/PrettyYamlRenderer.scala
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
package sjsonnet

import java.io.{StringWriter, Writer}
import java.util.regex.Pattern

import upickle.core.{ArrVisitor, ObjVisitor}
import fastparse.IndexedParserInput

import scala.collection.mutable
/**
Expand Down Expand Up @@ -240,7 +238,7 @@ object PrettyYamlRenderer{
*/
def writeBlockString(str: String, out: Writer, depth: Int, indent: Int, lineComment: String) = {
val len = str.length()
val splits = YamlRenderer.newlinePattern.split(str, -1)
val splits = Platform.getPatternFromCache("\n").split(str, -1)
val blockOffsetNumeral = if (str.charAt(0) != ' ') "" else indent
val (blockStyle, dropRight) =
(str.charAt(len - 1), if (len > 2) Some(str.charAt(len - 2)) else None) match{
Expand Down
28 changes: 12 additions & 16 deletions sjsonnet/src/sjsonnet/Std.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@ import java.io.StringWriter
import java.nio.charset.StandardCharsets.UTF_8
import java.util.Base64
import java.util
import java.util.regex.Pattern
import sjsonnet.Expr.Member.Visibility

import scala.collection.Searching._
import scala.collection.mutable
import scala.util.matching.Regex

/**
* The Jsonnet standard library, `std`, with each builtin function implemented
Expand All @@ -19,8 +17,8 @@ import scala.util.matching.Regex
class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.empty) {
private val dummyPos: Position = new Position(null, 0)
private val emptyLazyArray = new Array[Lazy](0)
private val leadingWhiteSpacePattern = Pattern.compile("^[ \t\n\f\r\u0085\u00A0']+")
private val trailingWhiteSpacePattern = Pattern.compile("[ \t\n\f\r\u0085\u00A0']+$")
private val leadingWhiteSpacePattern = Platform.getPatternFromCache("^[ \t\n\f\r\u0085\u00A0']+")
private val trailingWhiteSpacePattern = Platform.getPatternFromCache("[ \t\n\f\r\u0085\u00A0']+$")
private val oldNativeFunctions = Map(
builtin("gzip", "v"){ (_, _, v: Val) =>
v match{
Expand Down Expand Up @@ -48,7 +46,7 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
},
)
require(oldNativeFunctions.forall(k => !additionalNativeFunctions.contains(k._1)), "Conflicting native functions")
private val nativeFunctions = oldNativeFunctions ++ additionalNativeFunctions
private val nativeFunctions = oldNativeFunctions ++ additionalNativeFunctions ++ StdRegex.functions

private object AssertEqual extends Val.Builtin2("assertEqual", "a", "b") {
def evalRhs(v1: Val, v2: Val, ev: EvalScope, pos: Position): Val = {
Expand Down Expand Up @@ -474,26 +472,24 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
Val.Str(pos, str.asString.replaceAll(from.asString, to.asString))
override def specialize(args: Array[Expr]) = args match {
case Array(str, from: Val.Str, to) =>
try { (new SpecFrom(Pattern.compile(from.value)), Array(str, to)) } catch { case _: Exception => null }
try { (new SpecFrom(from.value), Array(str, to)) } catch { case _: Exception => null }
case _ => null
}
private class SpecFrom(from: Pattern) extends Val.Builtin2("strReplaceAll", "str", "to") {
private class SpecFrom(from: String) extends Val.Builtin2("strReplaceAll", "str", "to") {
def evalRhs(str: Val, to: Val, ev: EvalScope, pos: Position): Val =
Val.Str(pos, from.matcher(str.asString).replaceAll(to.asString))
Val.Str(pos, Platform.getPatternFromCache(from).matcher(str.asString).replaceAll(to.asString))
}
}

private object StripUtils {
private def getLeadingPattern(chars: String): Pattern =
Pattern.compile("^[" + Regex.quote(chars) + "]+")
private def getLeadingPattern(chars: String): String = "^[" + Platform.regexQuote(chars) + "]+"

private def getTrailingPattern(chars: String): Pattern =
Pattern.compile("[" + Regex.quote(chars) + "]+$")
private def getTrailingPattern(chars: String): String = "[" + Platform.regexQuote(chars) + "]+$"

def unspecializedStrip(str: String, chars: String, left: Boolean, right: Boolean): String = {
var s = str
if (right) s = getTrailingPattern(chars).matcher(s).replaceAll("")
if (left) s = getLeadingPattern(chars).matcher(s).replaceAll("")
if (right) s = Platform.getPatternFromCache(getTrailingPattern(chars)).matcher(s).replaceAll("")
if (left) s = Platform.getPatternFromCache(getLeadingPattern(chars)).matcher(s).replaceAll("")
s
}

Expand All @@ -503,8 +499,8 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
right: Boolean,
functionName: String
) extends Val.Builtin1(functionName, "str") {
private[this] val leftPattern = getLeadingPattern(chars)
private[this] val rightPattern = getTrailingPattern(chars)
private[this] val leftPattern = Platform.getPatternFromCache(getLeadingPattern(chars))
private[this] val rightPattern = Platform.getPatternFromCache(getTrailingPattern(chars))

def evalRhs(str: Val, ev: EvalScope, pos: Position): Val = {
var s = str.asString
Expand Down
89 changes: 89 additions & 0 deletions sjsonnet/src/sjsonnet/StdRegex.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package sjsonnet

import sjsonnet.Expr.Member.Visibility
import sjsonnet.Val.Obj

object StdRegex {
def functions: Map[String, Val.Builtin] = Map(
"regexPartialMatch" -> new Val.Builtin2("regexPartialMatch", "pattern", "str") {
override def evalRhs(pattern: Val, str: Val, ev: EvalScope, pos: Position): Val = {
val compiledPattern = Platform.getPatternFromCache(pattern.asString)
val matcher = compiledPattern.matcher(str.asString)
var returnStr: Val = null
val captures = Array.newBuilder[Val]
val groupCount = matcher.groupCount()
while (matcher.find()) {
if (returnStr == null) {
val m = matcher.group(0)
if (m != null) {
returnStr = Val.Str(pos.noOffset, matcher.group(0))
} else {
returnStr = Val.Null(pos.noOffset)
}
}
for (i <- 1 to groupCount) {
val m = matcher.group(i)
if (m == null) {
captures += Val.Null(pos.noOffset)
} else {
captures += Val.Str(pos.noOffset, m)
}
}
}
val result = captures.result()
Val.Obj.mk(pos.noOffset,
"string" -> new Obj.ConstMember(true, Visibility.Normal,
if (returnStr == null) Val.Null(pos.noOffset) else returnStr),
"captures" -> new Obj.ConstMember(true, Visibility.Normal, new Val.Arr(pos.noOffset, result))
)
}
},
"regexFullMatch" -> new Val.Builtin2("regexFullMatch", "pattern", "str") {
override def evalRhs(pattern: Val, str: Val, ev: EvalScope, pos: Position): Val = {
val compiledPattern = Platform.getPatternFromCache(pattern.asString)
val matcher = compiledPattern.matcher(str.asString)
if (!matcher.matches()) {
Val.Obj.mk(pos.noOffset,
"string" -> new Obj.ConstMember(true, Visibility.Normal, Val.Null(pos.noOffset)),
"captures" -> new Obj.ConstMember(true, Visibility.Normal, new Val.Arr(pos.noOffset, Array.empty[Lazy]))
)
} else {
val captures = Array.newBuilder[Val]
val groupCount = matcher.groupCount()
for (i <- 0 to groupCount) {
val m = matcher.group(i)
if (m == null) {
captures += Val.Null(pos.noOffset)
} else {
captures += Val.Str(pos.noOffset, m)
}
}
val result = captures.result()
Val.Obj.mk(pos.noOffset,
"string" -> new Obj.ConstMember(true, Visibility.Normal, result.head),
"captures" -> new Obj.ConstMember(true, Visibility.Normal, new Val.Arr(pos.noOffset, result.drop(1)))
)
}
}
},
"regexGlobalReplace" -> new Val.Builtin3("regexGlobalReplace", "str", "pattern", "to") {
override def evalRhs(str: Val, pattern: Val, to: Val, ev: EvalScope, pos: Position): Val = {
val compiledPattern = Platform.getPatternFromCache(pattern.asString)
val matcher = compiledPattern.matcher(str.asString)
Val.Str(pos.noOffset, matcher.replaceAll(to.asString))
}
},
"regexReplace" -> new Val.Builtin3("regexGlobalReplace", "str", "pattern", "to") {
override def evalRhs(str: Val, pattern: Val, to: Val, ev: EvalScope, pos: Position): Val = {
val compiledPattern = Platform.getPatternFromCache(pattern.asString)
val matcher = compiledPattern.matcher(str.asString)
Val.Str(pos.noOffset, matcher.replaceFirst(to.asString))
}
},
"regexQuoteMeta" -> new Val.Builtin1("regexQuoteMeta", "str") {
override def evalRhs(str: Val, ev: EvalScope, pos: Position): Val = {
Val.Str(pos.noOffset, Platform.regexQuote(str.asString))
}
}
)
}
3 changes: 1 addition & 2 deletions sjsonnet/src/sjsonnet/TomlRenderer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package sjsonnet
import upickle.core.{ArrVisitor, CharBuilder, ObjVisitor, SimpleVisitor, Visitor}

import java.io.StringWriter
import java.util.regex.Pattern


class TomlRenderer(out: StringWriter = new java.io.StringWriter(), cumulatedIndent: String, indent: String) extends SimpleVisitor[StringWriter, StringWriter]{
Expand Down Expand Up @@ -117,7 +116,7 @@ class TomlRenderer(out: StringWriter = new java.io.StringWriter(), cumulatedInde
}

object TomlRenderer {
private val bareAllowed = Pattern.compile("[A-Za-z0-9_-]+")
private val bareAllowed = Platform.getPatternFromCache("[A-Za-z0-9_-]+")
def escapeKey(key: String): String = if (bareAllowed.matcher(key).matches()) key else {
val out = new StringWriter()
BaseRenderer.escape(out, key, unicode = true)
Expand Down
19 changes: 7 additions & 12 deletions sjsonnet/src/sjsonnet/YamlRenderer.scala
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
package sjsonnet

import java.io.StringWriter
import java.util.regex.Pattern
import upickle.core.{ArrVisitor, ObjVisitor, SimpleVisitor, Visitor}

import scala.util.Try



class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayInObject: Boolean = false,
quoteKeys: Boolean = true, indent: Int = 2) extends BaseCharRenderer(_out, indent){
Expand Down Expand Up @@ -52,7 +48,7 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
elemBuilder.append('"')
elemBuilder.append('"')
} else if (s.charAt(len - 1) == '\n') {
val splits = YamlRenderer.newlinePattern.split(s)
val splits = Platform.getPatternFromCache("\n").split(s.toString)
elemBuilder.append('|')
depth += 1
splits.foreach { split =>
Expand Down Expand Up @@ -174,15 +170,14 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
}
}
object YamlRenderer{
val newlinePattern: Pattern = Pattern.compile("\n")
private val safeYamlKeyPattern = Pattern.compile("^[a-zA-Z0-9/._-]+$")
private val safeYamlKeyPattern = Platform.getPatternFromCache("^[a-zA-Z0-9/._-]+$")
private val yamlReserved = Set("true", "false", "null", "yes", "no", "on", "off", "y", "n", ".nan",
"+.inf", "-.inf", ".inf", "null", "-", "---", "''")
private val yamlTimestampPattern = Pattern.compile("^(?:[0-9]*-){2}[0-9]*$")
private val yamlBinaryPattern = Pattern.compile("^[-+]?0b[0-1_]+$")
private val yamlHexPattern = Pattern.compile("[-+]?0x[0-9a-fA-F_]+")
private val yamlFloatPattern = Pattern.compile( "^-?([0-9_]*)*(\\.[0-9_]*)?(e[-+][0-9_]+)?$" )
private val yamlIntPattern = Pattern.compile("^[-+]?[0-9_]+$")
private val yamlTimestampPattern = Platform.getPatternFromCache("^(?:[0-9]*-){2}[0-9]*$")
private val yamlBinaryPattern = Platform.getPatternFromCache("^[-+]?0b[0-1_]+$")
private val yamlHexPattern = Platform.getPatternFromCache("[-+]?0x[0-9a-fA-F_]+")
private val yamlFloatPattern = Platform.getPatternFromCache( "^-?([0-9_]*)*(\\.[0-9_]*)?(e[-+][0-9_]+)?$" )
private val yamlIntPattern = Platform.getPatternFromCache("^[-+]?[0-9_]+$")

private def isSafeBareKey(k: String) = {
val l = k.toLowerCase
Expand Down
2 changes: 1 addition & 1 deletion sjsonnet/test/src/sjsonnet/OldYamlRenderer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class OldYamlRenderer(out: StringWriter = new java.io.StringWriter(), indentArra
val len = s.length()
if (len == 0) out.append("\"\"")
else if (s.charAt(len - 1) == '\n') {
val splits = YamlRenderer.newlinePattern.split(s)
val splits = Platform.getPatternFromCache("\n").split(s.toString)
out.append('|')
depth += 1
splits.foreach { split =>
Expand Down
38 changes: 38 additions & 0 deletions sjsonnet/test/src/sjsonnet/StdRegexTests.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package sjsonnet

import sjsonnet.TestUtils.eval
import utest._

object StdRegexTests extends TestSuite {
def tests: Tests = Tests {
test("std.native - regex") {
eval("""std.native("regexPartialMatch")("a(b)c", "cabc")""") ==> ujson.Obj(
"string" -> "abc",
"captures" -> ujson.Arr("b")
)
eval("""std.native("regexPartialMatch")("a(b)c", "def")""") ==> ujson.Obj(
"string" -> ujson.Null,
"captures" -> ujson.Arr()
)
eval("""std.native("regexPartialMatch")("a(b)c", "abcabc")""") ==> ujson.Obj(
"string" -> "abc",
"captures" -> ujson.Arr("b", "b")
)
eval("""std.native("regexFullMatch")("a(b)c", "abc")""") ==> ujson.Obj(
"string" -> "abc",
"captures" -> ujson.Arr("b")
)
eval("""std.native("regexFullMatch")("a(b)c", "cabc")""") ==> ujson.Obj(
"string" -> ujson.Null,
"captures" -> ujson.Arr()
)
eval("""std.native("regexFullMatch")("a(b)c", "def")""") ==> ujson.Obj(
"string" -> ujson.Null,
"captures" -> ujson.Arr()
)
eval("""std.native("regexGlobalReplace")("abcbbb", "b", "d")""") ==> ujson.Str("adcddd")
eval("""std.native("regexReplace")("abcbbb", "b", "d")""") ==> ujson.Str("adcbbb")
eval("""std.native("regexQuoteMeta")("a.b")""") ==> ujson.Str(Platform.regexQuote("a.b"))
}
}
}