diff --git a/CHANGELOG.md b/CHANGELOG.md index d621a9fa..887367fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +#### 2.2.0 Milestone Release + +- New API to specify resource guardrails on expressions (PR #795) +- Fix ISO8601 regex pattern (PR #793) +- Prevent $lookup from accessing object prototype members (PR #794) +- Enable OIDC publishing to NPM (PR #792) +- Publish step to be triggered by new version tag (PR #796) + #### 2.1.1 Maintenance Release - Fix picture string parsing for $formatNumber (PR #788) diff --git a/package.json b/package.json index f3976807..871d5497 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "jsonata", - "version": "2.1.1", + "version": "2.2.0", "description": "JSON query and transformation language", "module": "jsonata.js", "main": "jsonata.js", diff --git a/website/sidebars.json b/website/sidebars.json index 498b93e4..df211192 100644 --- a/website/sidebars.json +++ b/website/sidebars.json @@ -4,6 +4,6 @@ "Language Guide": ["simple", "predicate", "expressions", "construction", "composition", "sorting-grouping", "processing", "programming", "regex", "date-time"], "Operators": ["path-operators", "numeric-operators", "comparison-operators", "boolean-operators", "other-operators"], "Function Library": ["string-functions", "numeric-functions", "aggregation-functions", "boolean-functions", "array-functions", "object-functions", "date-time-functions", "higher-order-functions"], - "Extending JSONata": ["embedding-extending", "contributing"] + "Extending JSONata": ["embedding-extending", "guardrails", "contributing"] } } diff --git a/website/versioned_docs/version-2.2.0/embedding-extending.md b/website/versioned_docs/version-2.2.0/embedding-extending.md new file mode 100644 index 00000000..ac97fdfc --- /dev/null +++ b/website/versioned_docs/version-2.2.0/embedding-extending.md @@ -0,0 +1,177 @@ +--- +id: version-2.2.0-embedding-extending +title: Embedding and Extending JSONata +sidebar_label: Embedding and Extending JSONata +original_id: embedding-extending +--- + +## API + +### jsonata(str[, options]) + +Parse a string `str` as a JSONata expression and return a compiled JSONata expression object. + +`options`, if present, is used to control certain aspects of the evaluator, and can be used to protect the server from expressions that take longer to execute than expected. See [Configuring Guardrails](guardrails) for more details. + +```javascript +var expression = jsonata("$sum(example.value)"); +``` + +If the expression is not valid JSONata, an `Error` is thrown containing information about the nature of the syntax error, for example: + +``` +{ + code: "S0202", + stack: "...", + position: 16, + token: "}", + value: "]", + message: "Syntax error: expected ']' got '}'" +} +``` + +`expression` has three methods: + +### expression.evaluate(input[, bindings[, callback]]) + +Run the compiled JSONata expression against object `input` and return the result as a new object. + +```javascript +var result = await expression.evaluate({example: [{value: 4}, {value: 7}, {value: 13}]}); +``` + +`input` should be a JavaScript value such as would be returned from `JSON.parse()`. If `input` could not have been parsed from a JSON string (is circular, contains functions, ...), `evaluate`'s behaviour is not defined. `result` is a new JavaScript value suitable for `JSON.stringify()`ing. + +`bindings`, if present, contain variable names and values (including functions) to be bound: + +```javascript +await jsonata("$a + $b()").evaluate({}, {a: 4, b: () => 78}); +// returns 82 +``` + +`expression.evaluate()` may throw a run-time `Error`: + +```javascript +var expression = jsonata("$notafunction()"); // OK, valid JSONata +await expression.evaluate({}); // Throws +``` + +The `Error` contains information about the nature of the run-time error, for example: + +``` +{ + code: "T1006", + stack: "...", + position: 14, + token: "notafunction", + message: "Attempted to invoke a non-function" +} +``` + +If `callback(err, value)` is supplied, `expression.evaluate()` returns `undefined`, the expression is run asynchronously and the `Error` or result is passed to `callback`. + +```javascript +await jsonata("7 + 12").evaluate({}, {}, (error, result) => { + if(error) { + console.error(error); + return; + } + console.log("Finished with", result); +}); +console.log("Started"); + +// Prints "Started", then "Finished with 19" +``` + +### expression.assign(name, value) + +Permanently binds a value to a name in the expression, similar to how `bindings` worked above. Modifies `expression` in place and returns `undefined`. Useful in a JSONata expression factory. + +```javascript +var expression = jsonata("$a + $b()"); +expression.assign("a", 4); +expression.assign("b", () => 1); + +await expression.evaluate({}); // 5 +``` + +Note that the `bindings` argument in the `expression.evaluate()` call clobbers these values: + +```javascript +await expression.evaluate({}, {a: 109}); // 110 +``` + +### expression.registerFunction(name, implementation[, signature]) + +Permanently binds a function to a name in the expression. + +```javascript +var expression = jsonata("$greet()"); +expression.registerFunction("greet", () => "Hello world"); + +await expression.evaluate({}); // "Hello world" +``` + +You can do this using `expression.assign` or `bindings` in `expression.evaluate`, but `expression.registerFunction` allows you to specify a function `signature`. This is a terse string which tells JSONata the expected input argument types and return value type of the function. JSONata raises a run-time error if the actual input argument types do not match (the return value type is not checked yet). + +```javascript +var expression = jsonata("$add(61, 10005)"); +expression.registerFunction("add", (a, b) => a + b, ""); + +await expression.evaluate({}); // 10066 +``` + +### Function signature syntax + +A function signature is a string of the form ``. `params` is a sequence of type symbols, each one representing an input argument's type. `return` is a single type symbol representing the return value type. + +Type symbols work as follows: + +Simple types: + +- `b` - Boolean +- `n` - number +- `s` - string +- `l` - `null` + +Complex types: + +- `a` - array +- `o` - object +- `f` - function + +Union types: + +- `(sao)` - string, array or object +- `(o)` - same as `o` +- `u` - equivalent to `(bnsl)` i.e. Boolean, number, string or `null` +- `j` - any JSON type. Equivalent to `(bnsloa)` i.e. Boolean, number, string, `null`, object or array, but not function +- `x` - any type. Equivalent to `(bnsloaf)` + +Parametrised types: + +- `a` - array of strings +- `a` - array of values of any type + +Some examples of signatures of built-in JSONata functions: + +- `$count` has signature ``; it accepts an array and returns a number. +- `$append` has signature ``; it accepts two arrays and returns an array. +- `$sum` has signature `:n>`; it accepts an array of numbers and returns a number. +- `$reduce` has signature `:j>`; it accepts a reducer function `f` and an `a` (array of JSON objects) and returns a JSON object. + +Each type symbol may also have *options* applied. + +- `+` - one or more arguments of this type + - E.g. `$zip` has signature ``; it accepts one array, or two arrays, or three arrays, or... +- `?` - optional argument + - E.g. `$join` has signature `s?:s>`; it accepts an array of strings and an optional joiner string which defaults to the empty string. It returns a string. +- `-` - if this argument is missing, use the context value ("focus"). + - E.g. `$length` has signature ``; it can be called as `$length(OrderID)` (one argument) but equivalently as `OrderID.$length()`. + +### Writing higher-order function extensions + +It is possible to write an extension function that takes one or more functions in its list of arguments and/or returns + a function as its return value. + + diff --git a/website/versioned_docs/version-2.2.0/guardrails.md b/website/versioned_docs/version-2.2.0/guardrails.md new file mode 100644 index 00000000..d11eeb0c --- /dev/null +++ b/website/versioned_docs/version-2.2.0/guardrails.md @@ -0,0 +1,160 @@ +--- +id: version-2.2.0-guardrails +title: Configuring Guardrails +sidebar_label: Configuring Guardrails +original_id: guardrails +--- + +## Guardrails + +This page contains information relating to the JavaScript [reference implementation](https://github.com/jsonata-js/jsonata) of JSONata, and not the JSONata expression language itself. + +JSONata is a Turing-complete expression language, and as such, it is possible to write unbounded, or infinite loops. This can be a potential problem if an application using JSONata is exposing the ability for client users to input expressions that are evaluated on the server. A user could accidently or maliciously provide an expression that, if evaluated unchecked, could cause a denial of service situation. + +This JSONata library provides a set of configurable 'guardrails' that limit the compute and memory resources that a single expression can consume. If this library is being used in a hosted environment to allow end users to provide their own expressions, then it would be prudent to set constraints. The following sections describe each of the guardrails and how to configure them. It does not provide recommended values or defaults. + +### Stack overflow + +In common with other functional languages, JSONata supports looping by writing [recursive functions](https://en.wikipedia.org/wiki/Functional_programming#Recursion). The JSONata evaluator processes an expression using a set of mutually recursive functions (eval-apply cycle). When a function is invoked (by itself or by another function), the call stack in the host JavaScript runtime will grow. If this stack grows too deep, evaluator could exhaust the memory of the host process causing it to crash. + +The JSONata evaluator can be configured with a maximum stack[^stack] limit to prevent an expression from doing this by specifying the `stack` option. Error `D1011` will be thrown if the expression grows the stack beyond the specified limit. + +```javascript +const jsonata = require('jsonata'); + +const data = {JSON: data}; +const options = { + stack: 500 +}; + +(async () => { + const expression = jsonata('', options); + const result = await expression.evaluate(data); +})() +``` + + +As an example, the [Ackermann function](https://en.wikipedia.org/wiki/Ackermann_function) could be implemented in JSONata using: + +``` +( + $ack := function($m, $n) { + $m = 0 ? $n + 1 : + $n = 0 ? $ack($m - 1, 1) : + $ack($m - 1, $ack($m, $n - 1)) + }; + + $ack(3, 4) +) +``` + +Invoked as `$ack(3, 4)` would quickly evaluate to `125`. However, `$ack(4, 3)`, although theoretically computable, will readily hit the configured stack guardrail before causing any problems to the host server. + +[^stack]: The term 'stack' is a slight misnomer here; it actually limits the number of times round the eval-apply cycle, which is related to the JavaScript stack depth. + +### Excessive execution time + +It's possible (and desirable) to write [tail recursive](programming#tail-call-optimization-tail-recursion) functions that don't grow the stack at all. For these types of functions, a [stack guardrail](#stack-overflow) would not be sufficient to protect against unbounded loops. + +The JSONata evaluator can be configured with a maximum time limit to protect against runaway expressions by specifying the `timeout` option. Error `D1012` will be thrown if the expression runs for longer than the specified timeout (in milliseconds). + +It's good practice to specify both `stack` and `timeout`. + +```javascript +const jsonata = require('jsonata'); + +const data = {JSON: data}; +const options = { + stack: 500, + timeout: 1000 // in milliseconds +}; + +(async () => { + const expression = jsonata('', options); + const result = await expression.evaluate(data); +})() +``` + +As an example, an infinite loop could be written in JSONata: + +``` +( + $inf := function() { + $inf() + }; + + $inf() +) +``` + +This is tail recursive, and would run forever without the timeout guardrail. + +### Excessive sequence length + +It's possible to write expressions that result in excessively long result sequences. This could ultimately lead to memory exhaustion in the host server. The `sequence` option can be set to specify the maximum sequence length that can be created by an expression, including any intermediate sequences created by sub-expressions. Error `D2015` will be thrown if, during the evaluation of an expression, the evaluator attempts to generate a sequence exceeding this upper limit. + + +```javascript +const jsonata = require('jsonata'); + +const data = {JSON: data}; +const options = { + sequence: 1e6 // maximum of one million items in a sequence +}; + +(async () => { + const expression = jsonata('', options); + const result = await expression.evaluate(data); +})() +``` + +As an example, the following JSONata expression attempts to generate a sequence of 100 million numbers. The guardrail configured above would prevent this. + +``` +[1..10000].([1..10000]) +``` + +### Rogue regular expressions + +A number of functions use [regular expressions](regex) to process strings. Alongside the power and flexibility that regexes provide, there are situations whereby badly crafted or malicious expressions could cause the processing engine take an [excessive amount of time](https://en.wikipedia.org/wiki/ReDoS) (exponential to the input string length). Since the regex processing is not implemented in the core JSONata (eval-apply) evaluator, the `timeout` guardrail cannot protect against this. + +It is possible to specify which regex processor is invoked by the JSONata evaluator. This is configured using the `RegexEngine` option. When this is not set, the evaluator will use the default JavaScript [RegExp](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp) class. + +The [packaged version of JSONata](https://www.npmjs.com/package/jsonata) has no runtime dependencies on other packages, but it is possible to use the `RegexEngine` option to invoke a third-party ReDoS library whenever a regular expression is encountered in a JSONata expression. + +The following code shows how this is done using the [redos-detector](https://github.com/tjenkinson/redos-detector) module: + +```javascript +const jsonata = require('jsonata'); +const redos = require('redos-detector'); + +// Simple wrapper that invokes redos-detector before delegating +// to built-in RegExp class +const SafeRegExp = function(regex) { + if (!redos.isSafe(regex).safe) { + throw { + code: 'U1001', + stack: (new Error()).stack, + value: regex, + message: 'Rejecting regex (potential ReDoS): ' + regex + }; + } + this.regex = regex; + }; + +SafeRegExp.prototype.exec = function(str) { + return this.regex.exec(str); +} + +const data = {JSON: data}; +const options = { + RegexEngine: SafeRegExp +}; + +(async () => { + const expression = jsonata('', options); + const result = await expression.evaluate(data); +})() +``` + +Other similar libraries are available. This is not an endorsement of any particular one. The developer should choose one according to their requirements. diff --git a/website/versioned_docs/version-2.2.0/overview.md b/website/versioned_docs/version-2.2.0/overview.md new file mode 100644 index 00000000..64629783 --- /dev/null +++ b/website/versioned_docs/version-2.2.0/overview.md @@ -0,0 +1,42 @@ +--- +id: version-2.2.0-overview +title: JSONata Documentation +sidebar_label: Overview +original_id: overview +--- + +## Introduction + +JSONata is a lightweight query and transformation language for JSON data. Inspired by the 'location path' semantics of XPath 3.1, it allows sophisticated queries to be expressed in a compact and intuitive notation. A rich complement of built in operators and functions is provided for manipulating and combining extracted data, and the results of queries can be formatted into any JSON output structure using familiar JSON object and array syntax. Coupled with the facility to create user defined functions, advanced expressions can be built to tackle any JSON query and transformation task. + +

+ +## Get JSONata + +* Try it out at [try.jsonata.org](http://try.jsonata.org/) +* Install the module from [NPM](https://www.npmjs.com/package/jsonata) +* Fork the repo on [GitHub](https://github.com/jsonata-js/jsonata) + +## Implementations of JSONata + +The following are known implementations of JSONata in addition to the primary implementation in JavaScript in the above repo. + +|Language|Link|Notes|JSONata version| +|---|---|---|---| +|C|https://github.com/qlyoung/jsonata-c|Runs JSONata in embedded JS engine|1.8.3| +|C++|https://github.com/rayokota/jsonata-cpp|Native port of reference|2.1.0| +|Go|https://github.com/jsonata-go/jsonata|Native implementation|2.0.6| +|Go|https://github.com/RecoLabs/gnata|Native implementation|2.x| +|Go|https://github.com/blues/jsonata-go|Native implementation|1.5.4| +|Go|https://github.com/yxuco/gojsonata|Native implementation| | +|Java|https://github.com/IBM/JSONata4Java|Native implementation| | +|Java|https://github.com/dashjoin/jsonata-java|Native port of reference|2.1.0| +|.NET|https://github.com/mikhail-barg/jsonata.net.native|Native implementation|2.1.0| +|Python|https://github.com/qlyoung/pyjsonata|API bindings based on C bindings|1.8.3| +|Python|https://github.com/rayokota/jsonata-python|Native port of reference|2.1.0| +|Rust|https://github.com/johanventer/jsonata-rust|Implementation work in progress| | +|Rust|https://github.com/Stedi/jsonata-rs|Actively-developed fork of jsonata-rust| | + +## Find out more + +* Introduction at [London Node User Group meetup](https://www.youtube.com/watch?v=TDWf6R8aqDo) diff --git a/website/versioned_sidebars/version-2.2.0-sidebars.json b/website/versioned_sidebars/version-2.2.0-sidebars.json new file mode 100644 index 00000000..74f9bfb5 --- /dev/null +++ b/website/versioned_sidebars/version-2.2.0-sidebars.json @@ -0,0 +1,43 @@ +{ + "version-2.2.0-docs": { + "Getting Started": [ + "version-2.2.0-overview", + "version-2.2.0-using-nodejs", + "version-2.2.0-using-browser" + ], + "Language Guide": [ + "version-2.2.0-simple", + "version-2.2.0-predicate", + "version-2.2.0-expressions", + "version-2.2.0-construction", + "version-2.2.0-composition", + "version-2.2.0-sorting-grouping", + "version-2.2.0-processing", + "version-2.2.0-programming", + "version-2.2.0-regex", + "version-2.2.0-date-time" + ], + "Operators": [ + "version-2.2.0-path-operators", + "version-2.2.0-numeric-operators", + "version-2.2.0-comparison-operators", + "version-2.2.0-boolean-operators", + "version-2.2.0-other-operators" + ], + "Function Library": [ + "version-2.2.0-string-functions", + "version-2.2.0-numeric-functions", + "version-2.2.0-aggregation-functions", + "version-2.2.0-boolean-functions", + "version-2.2.0-array-functions", + "version-2.2.0-object-functions", + "version-2.2.0-date-time-functions", + "version-2.2.0-higher-order-functions" + ], + "Extending JSONata": [ + "version-2.2.0-embedding-extending", + "version-2.2.0-guardrails", + "version-2.2.0-contributing" + ] + } +} diff --git a/website/versions.json b/website/versions.json index b50b4055..08a7a1b5 100644 --- a/website/versions.json +++ b/website/versions.json @@ -1,4 +1,5 @@ [ + "2.2.0", "2.1.0", "2.0.0", "1.8.0",