From 3d017f42226c14e4520ee8dcf7fd68a7841a0bd5 Mon Sep 17 00:00:00 2001 From: "mikhail.barg" Date: Mon, 30 Mar 2026 20:25:45 +0300 Subject: [PATCH 1/2] Attempt to properly implement `+` signature semantics --- src/signature.js | 15 ++++++++++++--- .../groups/function-signatures/case035.json | 4 ++-- .../groups/function-signatures/case040.json | 2 +- .../groups/function-signatures/case041.json | 6 ++++++ .../groups/function-signatures/case042.json | 6 ++++++ .../groups/function-signatures/case043.json | 9 +++++++++ .../groups/function-signatures/case044.json | 6 ++++++ 7 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 test/test-suite/groups/function-signatures/case041.json create mode 100644 test/test-suite/groups/function-signatures/case042.json create mode 100644 test/test-suite/groups/function-signatures/case043.json create mode 100644 test/test-suite/groups/function-signatures/case044.json diff --git a/src/signature.js b/src/signature.js index d0eb4e41..69066d49 100644 --- a/src/signature.js +++ b/src/signature.js @@ -248,11 +248,20 @@ const signature = (() => { }; } } else { - validatedArgs.push(arg); - argIndex++; + //validatedArgs.push(arg); + //argIndex++; + validatedArgs.push(undefined); } + } else if (match.length > 1 && param.type !== 'a') { // may have matched multiple args (if the regex ends with a '+') + //the signature should have been like 'n+' - we still pack multiple args into array + var argsAsArray = []; + match.split('').forEach(function () { + arg = args[argIndex]; + argsAsArray.push(arg); + argIndex++; + }); + validatedArgs.push(argsAsArray); } else { - // may have matched multiple args (if the regex ends with a '+') // split into single tokens match.split('').forEach(function (single) { arg = args[argIndex]; diff --git a/test/test-suite/groups/function-signatures/case035.json b/test/test-suite/groups/function-signatures/case035.json index d2a1a5c8..c0a03b74 100644 --- a/test/test-suite/groups/function-signatures/case035.json +++ b/test/test-suite/groups/function-signatures/case035.json @@ -3,7 +3,7 @@ "dataset": null, "bindings": {}, "result": { - "$arg1": 1, - "$arg2": 2 + "$arg1": [1, 2], + "$arg2": 3 } } \ No newline at end of file diff --git a/test/test-suite/groups/function-signatures/case040.json b/test/test-suite/groups/function-signatures/case040.json index 25199bd4..208d9e6f 100644 --- a/test/test-suite/groups/function-signatures/case040.json +++ b/test/test-suite/groups/function-signatures/case040.json @@ -2,5 +2,5 @@ "expr": "λ($arg1, $arg2)>{[$arg1, $arg2]}(1, 2, 3)", "dataset": null, "bindings": {}, - "result": [1, 2] + "result": [1, 2, 3] } \ No newline at end of file diff --git a/test/test-suite/groups/function-signatures/case041.json b/test/test-suite/groups/function-signatures/case041.json new file mode 100644 index 00000000..119dc92e --- /dev/null +++ b/test/test-suite/groups/function-signatures/case041.json @@ -0,0 +1,6 @@ +{ + "expr": "λ($arg1, $arg2){{\"arg1\": $arg1, \"arg2\": $arg2}}(1, 2, 3)", + "dataset": null, + "bindings": {}, + "result": {"arg2": [1, 2, 3]} +} \ No newline at end of file diff --git a/test/test-suite/groups/function-signatures/case042.json b/test/test-suite/groups/function-signatures/case042.json new file mode 100644 index 00000000..3577fc41 --- /dev/null +++ b/test/test-suite/groups/function-signatures/case042.json @@ -0,0 +1,6 @@ +{ + "expr": "λ($arg1, $arg2)>{[$arg1, $arg2]}(\"1\", 2, 3)", + "dataset": null, + "bindings": {}, + "result": ["1", 2, 3] +} \ No newline at end of file diff --git a/test/test-suite/groups/function-signatures/case043.json b/test/test-suite/groups/function-signatures/case043.json new file mode 100644 index 00000000..4c55d42f --- /dev/null +++ b/test/test-suite/groups/function-signatures/case043.json @@ -0,0 +1,9 @@ +{ + "expr": "λ($arg1, $arg2){{\"arg1\": $arg1, \"arg2\": $arg2}}(\"1\", 2, 3)", + "dataset": null, + "bindings": {}, + "result": { + "arg1": "1", + "arg2": [2, 3] + } +} \ No newline at end of file diff --git a/test/test-suite/groups/function-signatures/case044.json b/test/test-suite/groups/function-signatures/case044.json new file mode 100644 index 00000000..3259763c --- /dev/null +++ b/test/test-suite/groups/function-signatures/case044.json @@ -0,0 +1,6 @@ +{ + "expr": "λ($arg1, $arg2){{\"arg1\": $arg1, \"arg2\": $arg2}}(2)", + "dataset": null, + "bindings": {}, + "result": {"arg2": 2} +} \ No newline at end of file From c127bc01b9f2856432720b5766de198216a4b0ce Mon Sep 17 00:00:00 2001 From: "mikhail.barg" Date: Sun, 26 Apr 2026 14:23:40 +0300 Subject: [PATCH 2/2] - added signature test cases from https://try.jsonata.org/99bbwUgg0 - approached signature2 handling [WIP] --- package.json | 1 + src/signature2.js | 364 ++++++++++++++++++ test/signature2-tests.js | 49 +++ .../groups/function-signatures/case045.json | 6 + .../groups/function-signatures/case046.json | 6 + .../groups/function-signatures/case047.json | 6 + 6 files changed, 432 insertions(+) create mode 100644 src/signature2.js create mode 100644 test/signature2-tests.js create mode 100644 test/test-suite/groups/function-signatures/case045.json create mode 100644 test/test-suite/groups/function-signatures/case046.json create mode 100644 test/test-suite/groups/function-signatures/case047.json diff --git a/package.json b/package.json index 120d8e56..19466af9 100644 --- a/package.json +++ b/package.json @@ -13,6 +13,7 @@ "scripts": { "pretest": "npm run lint", "mocha": "nyc ./node_modules/mocha/bin/_mocha -- \"test/**/*.js\"", + "mocha2": "nyc ./node_modules/mocha/bin/_mocha -- \"test/signature2-tests.js\"", "test": "npm run mocha", "posttest": "npm run check-coverage && npm run browserify && npm run minify && npm run build-es5", "build-es5": "npm run mkdir-dist && npm run regenerator && npm run browserify-es5 && npm run minify-es5", diff --git a/src/signature2.js b/src/signature2.js new file mode 100644 index 00000000..eb121034 --- /dev/null +++ b/src/signature2.js @@ -0,0 +1,364 @@ +var utils = require('./utils'); + +const signature = (() => { + 'use strict'; + + /** + * Parses a function signature definition and returns a validation function + * @param {string} signature - the signature between the + * @returns {Function} validation function + */ + function parseSignature(signature) { + // https://swtch.com/~rsc/regexp/regexp1.html + + const context_index = -1; //fake index for context value to use as `source_arg_index` in match.add_matched_arg + const undefined_index = -2; //fake index for undefined value to use as `source_arg_index` in match.add_matched_arg + + class AutomataState { + constructor() {} + push(current_match, match_info) { + throw "should not happen"; + } + link(to_state) { + this.next_state = to_state; + } + } + + class EndState extends AutomataState { + constructor() { + super(); + } + push(current_match, match_info) { + current_match.success = true; + match_info.check_match(current_match); + } + } + + class MatchContextState extends AutomataState { + constructor(matcher, target_arg_index) { + super(); + this.matcher = matcher; + this.target_arg_index = target_arg_index; + } + push(current_match, match_info) { + if (match_info.context === undefined) { + // no context to match against -> fail + match_info.check_match(current_match); + } else if (this.matcher.match(match_info.context)) { + // success + current_match.add_matched_arg(this.target_arg_index, context_index); + current_match.uses_context = true; + // proceed + this.next_state.push(current_match, match_info); + } else { + // failed to match + match_info.check_match(current_match); + } + } + } + + class EmptyMatchState extends AutomataState { + constructor(target_arg_index) { + super(); + this.target_arg_index = target_arg_index; + } + push(current_match, match_info) { + current_match.add_matched_arg(this.target_arg_index, undefined_index); + // proceed + this.next_state.push(current_match, match_info); + } + } + + class MatchArgState extends AutomataState { + constructor(matcher, target_arg_index) { + super(); + this.matcher = matcher; + this.target_arg_index = target_arg_index; + } + push(current_match, match_info) { + if (current_match.source_arg_index >= match_info.args.length) { + //no source arg to match -> fail + match_info.check_match(current_match); + return; + } + const value = match_info.args[current_match.source_arg_index] + if (this.matcher.match(value)) { + // success + current_match.add_matched_arg(this.target_arg_index, current_match.source_arg_index); + current_match.source_arg_index++; + // proceed + this.next_state.push(current_match, match_info); + } else { + // failed to match + match_info.check_match(current_match); + } + } + } + + class SplitState extends AutomataState { + constructor(alternative_state) { + super(); + this.alternative_state = alternative_state; + } + push(current_match, match_info) { + const alternative_match = current_match.clone(); + this.next_state.push(current_match, match_info); + this.alternative_state.push(alternative_match, match_info); + } + } + + class AutomataFragment { + constructor(state) { + this.start_state = state; + this.end_states = [state]; + } + link(to_fragment) { + this.end_states.forEach((end_state) => { + end_state.link(to_fragment.start_state); + }) + } + } + + function getSymbol(value) { + let symbol; + if (utils.isFunction(value)) { + symbol = 'f'; + } else { + const type = typeof value; + switch (type) { + case 'string': + symbol = 's'; + break; + case 'number': + symbol = 'n'; + break; + case 'boolean': + symbol = 'b'; + break; + case 'object': + if (value === null) { + symbol = 'l'; + } else if (Array.isArray(value)) { + symbol = 'a'; + } else { + symbol = 'o'; + } + break; + case 'undefined': + default: + // any value can be undefined, but should be allowed to match + symbol = 'm'; // m for missing + } + } + return symbol; + }; + + function createMatcher(char) { + let chars = ''; + switch (char) { + case 'b': + case 'n': + case 's': + case 'l': + case 'o': + chars = char; + break; + case 'a': + case 'f': + //TODO: sub signatures! + chars = char; + break; + case 'u': + chars = 'bnsl'; + break; + case 'j': + chars = 'bnsloa'; + break; + case 'x': + chars = 'bnsloaf'; + } + // any value can be undefined, but should be allowed to match + chars = `${chars}m`; // m for missing + return { + chars: chars, + match: function(value) { + const symbol = getSymbol(value); + return chars.includes(symbol); + const type = typeof val; + switch (type) { + case 'number': + return true; + default: + return false; + } + } + }; + } + + + + let fragments = []; + let target_arg_index = 0; + for (let position = 0; position < signature.length; position++) { + let symbol = signature.charAt(position); + if (symbol === ':') { + // TODO figure out what to do with the return type + // ignore it for now + break; + } + switch (symbol) { + case '+': + { + if (fragments.length === 0) { + throw "empty '+'"; + } + const last_fragment = fragments.at(-1); + const split = new SplitState(last_fragment.start_state); + last_fragment.end_states.forEach((end_state) => { + end_state.link(split); + }); + last_fragment.end_states = [split]; + } + break; + case '-': + { + if (fragments.length === 0) { + throw "empty '-'"; + } + const last_fragment = fragments.at(-1); + if (last_fragment.end_states.length !== 1 + || last_fragment.start_state !== last_fragment.end_states[0] + || !(last_fragment.start_state instanceof MatchArgState) + ) { + throw "applying '-' to non-matcher state"; + } + const old_state = last_fragment.start_state; + const split = new SplitState(old_state); + const new_state = new MatchContextState(old_state.matcher, old_state.target_arg_index); + split.link(new_state); + last_fragment.end_states.push(new_state); + last_fragment.start_state = split; + } + break; + case '?': + { + if (fragments.length === 0) { + throw "empty '?'"; + } + const last_fragment = fragments.at(-1); + const split = new SplitState(last_fragment.start_state); + const new_state = new EmptyMatchState(target_arg_index - 1); //for previous arg + split.link(new_state); + last_fragment.end_states.push(new_state); + last_fragment.start_state = split; + } + break; + default: + { + fragments.push( + new AutomataFragment( + new MatchArgState(createMatcher(symbol), target_arg_index) + ) + ); + target_arg_index++; + } + break; + } + } + fragments.push(new AutomataFragment(new EndState())); + + for (let fragment_index = 0; fragment_index < fragments.length - 1; fragment_index++) { + fragments[fragment_index].link(fragments[fragment_index + 1]); + } + + var automata_root_state = fragments[0].start_state; + + class Match { + constructor() { + this.success = false; + this.matched_args = []; + this.source_arg_index = 0; + this.uses_context = false; + } + add_matched_arg(match_arg_index, source_arg_index) { + if (this.matched_args[match_arg_index] === undefined) { + //no value yet -> set it + this.matched_args[match_arg_index] = source_arg_index; + } else if (Array.isArray(this.matched_args[match_arg_index])) { + //already have array of values -> append to it (support for '+') + this.matched_args[match_arg_index].push(source_arg_index); + } else { + //just a single value -> convert to array (support for '+') + this.matched_args[match_arg_index] = [this.matched_args[match_arg_index], source_arg_index]; + } + } + clone() { + var result = new Match(); + result.success = this.success; + result.matched_args = structuredClone(this.matched_args); + result.source_arg_index = this.source_arg_index; + result.uses_context = this.uses_context; + return result; + }; + } + + return { + definition: signature, + validate: function (args, context) { + + var match_info = { + args: args, + context: context, + best_match: new Match(), + check_match: function(match) { + if (this.best_match.success && !match.success) { + //nothing to do + } else if (!this.best_match.success && match.success) { + this.best_match = match; + } else if (match.source_arg_index > this.best_match.source_arg_index) { + this.best_match = match; + } else if (match.source_arg_index == this.best_match.source_arg_index + && match.uses_context + && !this.best_match.uses_context + ) { + this.best_match = match; + } + } + }; + + //actual match happens here + automata_root_state.push(new Match(), match_info); + + if (match_info.best_match.success) { + var result = []; + match_info.best_match.matched_args.forEach(arg_index => { + if (Array.isArray(arg_index)) { + var nested = []; + arg_index.forEach(sub_index => { + nested.push(match_info.args[sub_index]); + }); + result.push(nested); + } else if (arg_index === context_index) { + result.push(match_info.context); + } else if (arg_index === undefined_index) { + result.push(undefined); + } else { + result.push(match_info.args[arg_index]); + } + }); + return result; + } + throw { + code: "T0410", + stack: (new Error()).stack, + value: match_info.args[match_info.best_match.source_arg_index], + index: match_info.best_match.source_arg_index + 1 + }; + } + }; + } + + return parseSignature; +})(); + +module.exports = signature; diff --git a/test/signature2-tests.js b/test/signature2-tests.js new file mode 100644 index 00000000..90d42802 --- /dev/null +++ b/test/signature2-tests.js @@ -0,0 +1,49 @@ +"use strict"; + +var chai = require("chai"); +var chaiAsPromised = require("chai-as-promised"); +chai.use(chaiAsPromised); +var expect = chai.expect; +var parseSignature = require("../src/signature2"); + + +describe("Signature2 tests", () => { + describe("Should parse tests", function() { + const testCases = [ + { sig: 'n', args:[10], res:[10], descr:"Simple number" }, + { sig: 'nn', args:[10, 20], res:[10, 20], descr:"Simple 2 numbers" }, + { sig: 'ns', args:[10, "a"], res:[10, "a"], descr:"Simple number-string" }, + { sig: 'ns-', args:[10, "a"], res:[10, "a"], descr:"Number-string (may context)" }, + { sig: 'ns-', args:[10], context:"ctx", res:[10, "ctx"], descr:"Number-string (from context)" }, + { sig: 'ns-', args:[10, "a"], context:"ctx", res:[10, "a"], descr:"Number-string (not from context)" }, + { sig: 'ns?', args:[10, "a"], res:[10, "a"], descr:"Number-string optional" }, + { sig: 'ns?', args:[10], res:[10, undefined], descr:"Number-string undefined" }, + { sig: 's?n', args:["a", 10], res:["a", 10], descr:"String-number" }, + { sig: 's?n', args:[10], res:[undefined, 10], descr:"String-number undefined" }, + { sig: 's?s?', args:["a", "b"], res:["a", "b"], descr:"String-string" }, + { sig: 's?s?', args:["a"], res:[undefined, "a"], descr:"String-string ambiguous -- implementation specific" }, + { sig: 's?s?', args:[], res:[undefined, undefined], descr:"String-string ambiguous -- implementation specific" }, + { sig: 's?s', args:["a", "b"], res:["a", "b"], descr:"Leading optional - full" }, + { sig: 's?s', args:["b"], res:[undefined, "b"], descr:"Leading optional - undefined" }, + { sig: 'n+', args:[10], res:[10], descr:"One or more: single" }, + { sig: 'n+', args:[10, 20], res:[[10, 20]], descr:"One or more: array pack" }, + { sig: 'n+s', args:[10, 20, "s"], res:[[10, 20], "s"], descr:"One or more: array pack, string" }, + { sig: 'n+n', args:[10, 20, 30], res:[[10, 20], 30], descr:"One or more: array pack properly (035)" }, + { sig: 'ns', args:[10, "s", 30], res:[10, "s"], descr:"Excessive args are skipped" }, + { sig: 's?n+', args:[10, 20], res:[undefined, [10, 20]], descr:"(041)" }, + { sig: 's?n+', args:["s", 10, 20], res:["s", [10, 20]], descr:"(042)" }, + { sig: 's?n?', args:[20], res:[undefined, 20], descr:"(044)" }, + { sig: 'n?s-', args:[], context:"b", res:[undefined, "b"], descr:"(045)" }, + { sig: 's?s-', args:[], context:"b", res:[undefined, "b"], descr:"(046)" }, + { sig: 's?s-', args:["a"], context:"b", res:["a", "b"], descr:"(047)" }, + + ]; + testCases.forEach((c) => { + it(`${c.sig} ${c.args} -> ${c.res} : ${c.descr}`, function() { + const signature = parseSignature(c.sig); + const args = signature.validate(c.args, c.context); + expect(args).to.deep.equal(c.res); + }) + }); + }); +}); diff --git a/test/test-suite/groups/function-signatures/case045.json b/test/test-suite/groups/function-signatures/case045.json new file mode 100644 index 00000000..ad55a033 --- /dev/null +++ b/test/test-suite/groups/function-signatures/case045.json @@ -0,0 +1,6 @@ +{ + "expr": "($numStr := λ($a, $b) {{'num1': $a,'str2': $b}}; 'str2'~> $numStr; )", + "dataset": null, + "bindings": {}, + "result": {"str2": "str2"} +} \ No newline at end of file diff --git a/test/test-suite/groups/function-signatures/case046.json b/test/test-suite/groups/function-signatures/case046.json new file mode 100644 index 00000000..45a75bd6 --- /dev/null +++ b/test/test-suite/groups/function-signatures/case046.json @@ -0,0 +1,6 @@ +{ + "expr": "( $strStr := λ($a, $b) { { 'str1': $a, 'str2': $b } }; 'str2' ~> $strStr; )", + "dataset": null, + "bindings": {}, + "result": {"str2": "str2"} +} \ No newline at end of file diff --git a/test/test-suite/groups/function-signatures/case047.json b/test/test-suite/groups/function-signatures/case047.json new file mode 100644 index 00000000..c3548911 --- /dev/null +++ b/test/test-suite/groups/function-signatures/case047.json @@ -0,0 +1,6 @@ +{ + "expr": "( $strStr := λ($a, $b) { { 'str1': $a, 'str2': $b } }; 'str2' ~> $strStr('str1'); )", + "dataset": null, + "bindings": {}, + "result": {"str1": "str1", "str2": "str2"} +} \ No newline at end of file