diff --git a/encoders/src/main/scala/au/csiro/pathling/sql/types/FlexiDecimalSupport.scala b/encoders/src/main/scala/au/csiro/pathling/sql/types/FlexiDecimalSupport.scala index 2525a57ae1..5a0bed1ca3 100644 --- a/encoders/src/main/scala/au/csiro/pathling/sql/types/FlexiDecimalSupport.scala +++ b/encoders/src/main/scala/au/csiro/pathling/sql/types/FlexiDecimalSupport.scala @@ -47,7 +47,7 @@ object FlexiDecimalSupport { lit(normalizedValue.scale()).as("scale") ) } else { - lit(null) + lit(null).cast(FlexiDecimal.DATA_TYPE) } } diff --git a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/encoding/QuantityEncoding.java b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/encoding/QuantityEncoding.java index 322c330eed..fb8bde07a4 100644 --- a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/encoding/QuantityEncoding.java +++ b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/encoding/QuantityEncoding.java @@ -253,17 +253,22 @@ private static Ucum.ValueWithUnit canonicalOf(@Nonnull final FhirPathQuantity qu public static Column encodeLiteral(@Nonnull final FhirPathQuantity quantity) { final BigDecimal value = quantity.getValue(); @Nullable final Ucum.ValueWithUnit canonical = canonicalOf(quantity); + // Cast the struct to the canonical Quantity schema so that fields set to lit(null) carry their + // declared types instead of Spark's NullType (VOID). VOID-typed fields prevent the struct from + // being converted to VARIANT, which is required by variantTransformTree in + // repeat()/repeatAll(). return toStruct( - lit(null), - lit(value), - lit(value.scale()), - lit(null), - lit(quantity.getUnitName()), - lit(quantity.getSystem()), - lit(quantity.getCode()), - FlexiDecimalSupport.toLiteral(canonical != null ? canonical.value() : null), - lit(canonical != null ? canonical.unit() : null), - lit(null)); + lit(null), + lit(value), + lit(value.scale()), + lit(null), + lit(quantity.getUnitName()), + lit(quantity.getSystem()), + lit(quantity.getCode()), + FlexiDecimalSupport.toLiteral(canonical != null ? canonical.value() : null), + lit(canonical != null ? canonical.unit() : null), + lit(null)) + .cast(dataType()); } /** diff --git a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/CombineOperator.java b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/CombineOperator.java new file mode 100644 index 0000000000..c99f730bd5 --- /dev/null +++ b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/CombineOperator.java @@ -0,0 +1,64 @@ +/* + * Copyright © 2018-2026 Commonwealth Scientific and Industrial Research + * Organisation (CSIRO) ABN 41 687 119 230. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package au.csiro.pathling.fhirpath.operator; + +import au.csiro.pathling.fhirpath.collection.Collection; +import jakarta.annotation.Nonnull; +import org.apache.spark.sql.Column; + +/** + * Provides the functionality of the FHIRPath {@code combine(other)} function, which merges two + * collections into a single collection without eliminating duplicate values. Combining an empty + * collection with a non-empty collection returns the non-empty collection. There is no expectation + * of order in the resulting collection. + * + *

Unlike {@link UnionOperator}, {@code combine} does not deduplicate and does not need to + * consult the collection's equality comparator. The array-level merge primitive is shared with + * {@link UnionOperator} via {@link CombiningLogic}. + * + * @author Piotr Szul + * @see combine + */ +public class CombineOperator extends SameTypeBinaryOperator { + + @Nonnull + @Override + protected Collection handleOneEmpty( + @Nonnull final Collection nonEmpty, @Nonnull final BinaryOperatorInput input) { + // Combine preserves duplicates, so no deduplication is required against an empty peer. + return nonEmpty; + } + + @Nonnull + @Override + protected Collection handleEquivalentTypes( + @Nonnull final Collection left, + @Nonnull final Collection right, + @Nonnull final BinaryOperatorInput input) { + final Column leftArray = CombiningLogic.prepareArray(left); + final Column rightArray = CombiningLogic.prepareArray(right); + final Column combined = CombiningLogic.combineArrays(leftArray, rightArray); + return left.copyWithColumn(combined); + } + + @Nonnull + @Override + public String getOperatorName() { + return "combine"; + } +} diff --git a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/CombiningLogic.java b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/CombiningLogic.java new file mode 100644 index 0000000000..cf53854a39 --- /dev/null +++ b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/CombiningLogic.java @@ -0,0 +1,116 @@ +/* + * Copyright © 2018-2026 Commonwealth Scientific and Industrial Research + * Organisation (CSIRO) ABN 41 687 119 230. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package au.csiro.pathling.fhirpath.operator; + +import static org.apache.spark.sql.functions.array_distinct; +import static org.apache.spark.sql.functions.array_union; +import static org.apache.spark.sql.functions.concat; + +import au.csiro.pathling.fhirpath.collection.Collection; +import au.csiro.pathling.fhirpath.collection.DecimalCollection; +import au.csiro.pathling.fhirpath.comparison.ColumnEquality; +import au.csiro.pathling.sql.SqlFunctions; +import jakarta.annotation.Nonnull; +import lombok.experimental.UtilityClass; +import org.apache.spark.sql.Column; + +/** + * Shared array-level primitives used by the FHIRPath combining operators. These helpers are used by + * {@link UnionOperator} (which deduplicates) and {@link CombineOperator} (which concatenates + * without deduplication) to share type reconciliation, Decimal normalization, and comparator-aware + * merging. + * + *

The helpers operate on already type-reconciled, non-empty {@link Collection} instances. Empty + * operand handling and type reconciliation remain the responsibility of the enclosing operator's + * {@link SameTypeBinaryOperator} template methods. + * + * @author Piotr Szul + */ +@UtilityClass +public class CombiningLogic { + + /** + * Extracts the array column from a collection in a form suitable for combining. For {@link + * DecimalCollection}, normalizes the Decimal representation to {@code DECIMAL(32,6)} so that two + * operands with different precisions can be merged without schema mismatch. + * + * @param collection the collection to extract the array column from + * @return the array column ready for combining + */ + @Nonnull + public static Column prepareArray(@Nonnull final Collection collection) { + if (collection instanceof final DecimalCollection decimalCollection) { + return decimalCollection.normalizeDecimalType().getColumn().plural().getValue(); + } + return collection.getColumn().plural().getValue(); + } + + /** + * Deduplicates the values in an array using the appropriate equality strategy. Types that use + * default SQL equality leverage Spark's {@code array_distinct}, while types with custom equality + * (Quantity, Coding, temporal types) use element-wise comparison via {@link + * SqlFunctions#arrayDistinctWithEquality}. + * + * @param arrayColumn the array column to deduplicate + * @param comparator the equality comparator that defines element equality + * @return the deduplicated array column + */ + @Nonnull + public static Column dedupeArray( + @Nonnull final Column arrayColumn, @Nonnull final ColumnEquality comparator) { + if (comparator.usesDefaultSqlEquality()) { + return array_distinct(arrayColumn); + } + return SqlFunctions.arrayDistinctWithEquality(arrayColumn, comparator::equalsTo); + } + + /** + * Merges two arrays and deduplicates the result using the appropriate equality strategy. Types + * that use default SQL equality leverage Spark's {@code array_union}, while types with custom + * equality use element-wise comparison via {@link SqlFunctions#arrayUnionWithEquality}. + * + * @param leftArray the left array column + * @param rightArray the right array column + * @param comparator the equality comparator that defines element equality + * @return the merged, deduplicated array column + */ + @Nonnull + public static Column unionArrays( + @Nonnull final Column leftArray, + @Nonnull final Column rightArray, + @Nonnull final ColumnEquality comparator) { + if (comparator.usesDefaultSqlEquality()) { + return array_union(leftArray, rightArray); + } + return SqlFunctions.arrayUnionWithEquality(leftArray, rightArray, comparator::equalsTo); + } + + /** + * Concatenates two arrays without deduplication, preserving all duplicate values from both + * operands. Used by the FHIRPath {@code combine(other)} function. + * + * @param leftArray the left array column + * @param rightArray the right array column + * @return the concatenated array column + */ + @Nonnull + public static Column combineArrays( + @Nonnull final Column leftArray, @Nonnull final Column rightArray) { + return concat(leftArray, rightArray); + } +} diff --git a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/UnionOperator.java b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/UnionOperator.java index aa8c711fc5..abeb4899ed 100644 --- a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/UnionOperator.java +++ b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/UnionOperator.java @@ -17,13 +17,7 @@ package au.csiro.pathling.fhirpath.operator; -import static org.apache.spark.sql.functions.array_distinct; -import static org.apache.spark.sql.functions.array_union; - import au.csiro.pathling.fhirpath.collection.Collection; -import au.csiro.pathling.fhirpath.collection.DecimalCollection; -import au.csiro.pathling.fhirpath.comparison.ColumnEquality; -import au.csiro.pathling.sql.SqlFunctions; import jakarta.annotation.Nonnull; import org.apache.spark.sql.Column; @@ -38,7 +32,8 @@ * *

Equality semantics are determined by the collection's comparator. Types using default SQL * equality leverage Spark's native array operations, while types with custom equality (Quantity, - * Coding, temporal types) use element-wise comparison. + * Coding, temporal types) use element-wise comparison. The array-level merge primitives are shared + * with {@link CombineOperator} via {@link CombiningLogic}. * * @author Piotr Szul * @see union @@ -49,8 +44,8 @@ public class UnionOperator extends SameTypeBinaryOperator { @Override protected Collection handleOneEmpty( @Nonnull final Collection nonEmpty, @Nonnull final BinaryOperatorInput input) { - final Column array = getArrayForUnion(nonEmpty); - final Column deduplicatedArray = deduplicateArray(array, nonEmpty.getComparator()); + final Column array = CombiningLogic.prepareArray(nonEmpty); + final Column deduplicatedArray = CombiningLogic.dedupeArray(array, nonEmpty.getComparator()); return nonEmpty.copyWithColumn(deduplicatedArray); } @@ -60,66 +55,13 @@ protected Collection handleEquivalentTypes( @Nonnull final Collection left, @Nonnull final Collection right, @Nonnull final BinaryOperatorInput input) { - - final Column leftArray = getArrayForUnion(left); - final Column rightArray = getArrayForUnion(right); - final Column unionResult = unionArrays(leftArray, rightArray, left.getComparator()); - + final Column leftArray = CombiningLogic.prepareArray(left); + final Column rightArray = CombiningLogic.prepareArray(right); + final Column unionResult = + CombiningLogic.unionArrays(leftArray, rightArray, left.getComparator()); return left.copyWithColumn(unionResult); } - /** - * Extracts and prepares an array column for union operations. For DecimalCollection, normalizes - * to DECIMAL(32,6) to ensure type compatibility. - * - * @param collection the collection to extract array from - * @return the array column ready for union operation - */ - @Nonnull - private Column getArrayForUnion(@Nonnull final Collection collection) { - if (collection instanceof DecimalCollection decimalCollection) { - return decimalCollection.normalizeDecimalType().getColumn().plural().getValue(); - } - return collection.getColumn().plural().getValue(); - } - - /** - * Deduplicates an array using the appropriate strategy based on comparator type. - * - * @param arrayColumn the array column to deduplicate - * @param comparator the equality comparator to use - * @return deduplicated array column - */ - @Nonnull - private Column deduplicateArray( - @Nonnull final Column arrayColumn, @Nonnull final ColumnEquality comparator) { - if (comparator.usesDefaultSqlEquality()) { - return array_distinct(arrayColumn); - } else { - return SqlFunctions.arrayDistinctWithEquality(arrayColumn, comparator::equalsTo); - } - } - - /** - * Merges and deduplicates two arrays using the appropriate strategy. - * - * @param leftArray the left array column - * @param rightArray the right array column - * @param comparator the equality comparator to use - * @return merged and deduplicated array column - */ - @Nonnull - private Column unionArrays( - @Nonnull final Column leftArray, - @Nonnull final Column rightArray, - @Nonnull final ColumnEquality comparator) { - if (comparator.usesDefaultSqlEquality()) { - return array_union(leftArray, rightArray); - } else { - return SqlFunctions.arrayUnionWithEquality(leftArray, rightArray, comparator::equalsTo); - } - } - @Nonnull @Override public String getOperatorName() { diff --git a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/parser/Visitor.java b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/parser/Visitor.java index d81c30c825..2837532848 100644 --- a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/parser/Visitor.java +++ b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/parser/Visitor.java @@ -19,6 +19,7 @@ import static java.util.Objects.requireNonNull; +import au.csiro.pathling.errors.InvalidUserInputError; import au.csiro.pathling.errors.UnsupportedFhirPathFeatureError; import au.csiro.pathling.fhirpath.FhirPath; import au.csiro.pathling.fhirpath.collection.Collection; @@ -26,12 +27,14 @@ import au.csiro.pathling.fhirpath.operator.AsOperator; import au.csiro.pathling.fhirpath.operator.BinaryOperatorType; import au.csiro.pathling.fhirpath.operator.CollectionOperations; +import au.csiro.pathling.fhirpath.operator.CombineOperator; import au.csiro.pathling.fhirpath.operator.FhirPathBinaryOperator; import au.csiro.pathling.fhirpath.operator.IsOperator; import au.csiro.pathling.fhirpath.operator.MethodDefinedOperator; import au.csiro.pathling.fhirpath.operator.MethodInvocationError; import au.csiro.pathling.fhirpath.operator.PolarityOperator; import au.csiro.pathling.fhirpath.operator.SubsettingOperations; +import au.csiro.pathling.fhirpath.operator.UnionOperator; import au.csiro.pathling.fhirpath.parser.generated.FhirPathBaseVisitor; import au.csiro.pathling.fhirpath.parser.generated.FhirPathParser.AdditiveExpressionContext; import au.csiro.pathling.fhirpath.parser.generated.FhirPathParser.AndExpressionContext; @@ -49,6 +52,7 @@ import au.csiro.pathling.fhirpath.parser.generated.FhirPathParser.TypeExpressionContext; import au.csiro.pathling.fhirpath.parser.generated.FhirPathParser.UnionExpressionContext; import au.csiro.pathling.fhirpath.path.Paths; +import au.csiro.pathling.fhirpath.path.Paths.EvalFunction; import au.csiro.pathling.fhirpath.path.Paths.EvalOperator; import jakarta.annotation.Nonnull; import jakarta.annotation.Nullable; @@ -99,9 +103,47 @@ public FhirPath visitInvocationExpression(@Nullable final InvocationExpressionCo final FhirPath invocationSubject = new Visitor().visit(requireNonNull(ctx).expression()); final FhirPath invocationVerb = ctx.invocation().accept(new InvocationVisitor()); + + // Desugar the FHIRPath combining functions `x.union(y)` and `x.combine(y)` into + // `EvalOperator` ASTs so the function forms are strictly equivalent to the `|` operator + // (and to a peer `combine` operator). This is the only way to honour the spec's + // `name.select(use.union(given)) ≡ name.select(use | given)` equivalence: evaluating + // the argument via the normal function invocation path would lose the surrounding + // iteration focus. See https://hl7.org/fhirpath/#combining for the spec definition. + if (invocationVerb instanceof final EvalFunction call) { + @Nullable + final FhirPathBinaryOperator combiningOperator = + combiningOperatorFor(call.functionIdentifier()); + if (combiningOperator != null) { + if (call.arguments().size() != 1) { + throw new InvalidUserInputError( + "Function `" + + call.functionIdentifier() + + "` requires exactly one argument, got " + + call.arguments().size()); + } + return new EvalOperator(invocationSubject, call.arguments().getFirst(), combiningOperator); + } + } + return invocationSubject.andThen(invocationVerb); } + private static final Map COMBINING_FUNCTION_OPERATORS = + Map.of("union", new UnionOperator(), "combine", new CombineOperator()); + + /** + * Returns the binary operator instance to use when desugaring a combining-function invocation, or + * {@code null} if the function name is not one of the combining functions. + * + * @param functionName The FHIRPath function name being invoked + * @return The operator to use for desugaring, or {@code null} for any other function name + */ + @Nullable + private static FhirPathBinaryOperator combiningOperatorFor(@Nonnull final String functionName) { + return COMBINING_FUNCTION_OPERATORS.get(functionName); + } + private static final Map BINARY_OPERATORS = MethodDefinedOperator.mapOf(CollectionOperations.class); diff --git a/fhirpath/src/test/java/au/csiro/pathling/fhirpath/dsl/CombiningFunctionsDslTest.java b/fhirpath/src/test/java/au/csiro/pathling/fhirpath/dsl/CombiningFunctionsDslTest.java new file mode 100644 index 0000000000..1fccc44258 --- /dev/null +++ b/fhirpath/src/test/java/au/csiro/pathling/fhirpath/dsl/CombiningFunctionsDslTest.java @@ -0,0 +1,469 @@ +/* + * Copyright © 2018-2026 Commonwealth Scientific and Industrial Research + * Organisation (CSIRO) ABN 41 687 119 230. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package au.csiro.pathling.fhirpath.dsl; + +import static au.csiro.pathling.test.yaml.FhirTypedLiteral.toCoding; +import static au.csiro.pathling.test.yaml.FhirTypedLiteral.toDate; +import static au.csiro.pathling.test.yaml.FhirTypedLiteral.toDateTime; +import static au.csiro.pathling.test.yaml.FhirTypedLiteral.toQuantity; +import static au.csiro.pathling.test.yaml.FhirTypedLiteral.toTime; + +import au.csiro.pathling.test.dsl.FhirPathDslTestBase; +import au.csiro.pathling.test.dsl.FhirPathTest; +import java.util.List; +import java.util.stream.Stream; +import org.junit.jupiter.api.DynamicTest; + +/** + * Tests for the FHIRPath combining functions {@code union()} and {@code combine()}. The function + * forms are parser-desugared into the same AST used by the {@code |} operator (for {@code union}) + * and a peer {@code CombineOperator} (for {@code combine}), so this class focuses on proving the + * desugaring works across the type matrix, the iteration-context equivalence with the operator + * form, the duplicate-preservation semantics of {@code combine}, and the arity / type error paths. + * Exhaustive merge-semantics coverage for the {@code union} case is provided by {@link + * CombiningOperatorsDslTest}. + */ +public class CombiningFunctionsDslTest extends FhirPathDslTestBase { + + // --------------------------------------------------------------------------------------------- + // union() — parser desugaring sanity across the type matrix. + // --------------------------------------------------------------------------------------------- + + @FhirPathTest + public Stream testUnionFunctionEmptyCollections() { + return builder() + .withSubject(sb -> sb) + .group("union() empty operand handling") + .testEmpty("{}.union({})", "union() of two empty collections is empty") + .testEmpty("({} | {}).union({} | {})", "Grouped empty unions via function form") + .testEquals( + List.of(1, 2), + "{}.union(1 | 1 | 2)", + "union() with empty input deduplicates the argument collection") + .testEquals( + List.of(1, 2, 3), + "(1 | 1 | 2 | 3).union({})", + "union() with empty argument deduplicates the input collection") + .build(); + } + + @FhirPathTest + public Stream testUnionFunctionPrimitives() { + return builder() + .withSubject( + sb -> + sb.boolArray("trueFalse", true, false) + .integerArray("oneTwo", 1, 2) + .integerArray("twoThree", 2, 3) + .stringArray("abc", "a", "b", "c") + .stringArray("bcd", "b", "c", "d") + .decimalArray("dec12", 1.1, 2.2)) + .group("union() smoke tests for primitive types") + .testEquals(List.of(true, false), "true.union(false)", "Boolean union()") + .testTrue("true.union(true)", "Boolean union() deduplicates") + .testEquals( + List.of(1, 2, 3), "oneTwo.union(twoThree)", "Integer union() merges and deduplicates") + .testEquals( + List.of(1, 2), "oneTwo.union(oneTwo)", "Integer union() deduplicates identical arrays") + .testEquals( + List.of("a", "b", "c", "d"), "abc.union(bcd)", "String union() merges and deduplicates") + .testEquals( + List.of(1.1, 2.2), "dec12.union(dec12)", "Decimal union() deduplicates identical") + .testEquals(List.of(1.0, 2.0), "1.union(2.0)", "Integer.union(Decimal) promotes to Decimal") + .testEquals(List.of(1.0, 2.0), "1.0.union(2)", "Decimal.union(Integer) promotes to Decimal") + .testEquals(List.of(true, false), "trueFalse.union({})", "union({}) deduplicates input") + .build(); + } + + @FhirPathTest + public Stream testUnionFunctionTemporalAndQuantityAndCoding() { + return builder() + .withSubject(sb -> sb) + .group("union() smoke tests for temporal, Quantity, and Coding types") + .testEquals( + toDate("2020-01-01"), + "@2020-01-01.union(@2020-01-01)", + "Date union() deduplicates identical dates") + .testEquals( + List.of(toDate("2020-01-01"), toDate("2020-01-02")), + "@2020-01-01.union(@2020-01-02)", + "Date union() merges distinct dates") + .testEquals( + toDateTime("2020-01-01T10:00:00"), + "@2020-01-01T10:00:00.union(@2020-01-01T10:00:00)", + "DateTime union() deduplicates identical datetimes") + .testEquals( + toTime("12:00"), "@T12:00.union(@T12:00)", "Time union() deduplicates identical times") + .testEquals( + toQuantity("1000 'mg'"), + "1000 'mg'.union(1 'g')", + "Quantity union() recognises equal values in different units") + .testEquals( + toCoding("http://loinc.org|8867-4||'Heart rate'"), + "http://loinc.org|8867-4||'Heart rate'.union(" + + "http://loinc.org|8867-4||'Heart rate')", + "Coding union() deduplicates identical codings") + .build(); + } + + // --------------------------------------------------------------------------------------------- + // combine() — full coverage (this is the genuinely new behaviour). + // --------------------------------------------------------------------------------------------- + + @FhirPathTest + public Stream testCombineEmptyCollections() { + return builder() + .withSubject(sb -> sb) + .group("combine() empty operand handling") + .testEmpty("{}.combine({})", "combine() of two empty collections is empty") + .testEquals(1, "{}.combine(1)", "combine() with empty input returns non-empty argument") + .testEquals(1, "1.combine({})", "combine() with empty argument returns non-empty input") + .testEquals( + List.of(1, 2, 3), + "{}.combine(1 | 2 | 3)", + "combine({}, non-empty) returns the non-empty operand") + .testEquals( + List.of(1, 2, 3), + "(1 | 2 | 3).combine({})", + "combine(non-empty, {}) returns the non-empty operand") + .build(); + } + + @FhirPathTest + public Stream testCombinePreservesDuplicates() { + return builder() + .withSubject( + sb -> + sb.integerArray("oneOne", 1, 1) + .integerArray("oneTwoThree", 1, 2, 3) + .stringArray("aa", "a", "a") + .stringArray("ab", "a", "b")) + .group("combine() duplicate preservation") + .testEquals( + List.of(1, 1), + "1.combine(1)", + "combine() of two equal singletons preserves both copies") + .testEquals( + List.of(1, 1, 1), + "oneOne.combine(1)", + "combine() preserves duplicates within left operand") + .testEquals( + List.of(1, 1, 1, 2, 3), + "oneOne.combine(oneTwoThree)", + "combine() concatenates without deduplicating between operands") + .testEquals( + List.of(1, 2, 3, 1, 1), + "oneTwoThree.combine(oneOne)", + "combine() preserves operand order of duplicates (right concatenation)") + .testEquals( + List.of("a", "a", "a", "b"), + "aa.combine(ab)", + "combine() preserves duplicates across String arrays") + .build(); + } + + @FhirPathTest + public Stream testCombineTypePromotion() { + return builder() + .withSubject(sb -> sb) + .group("combine() type reconciliation") + .testEquals(List.of(1.0, 2.0), "1.combine(2.0)", "combine() promotes Integer to Decimal") + .testEquals( + List.of(1.0, 2.0, 2.0), + "(1 | 2).combine(2.0)", + "combine() promotes Integer collection to Decimal and preserves duplicates") + .testEquals( + List.of(1.0, 1.0), + "1.combine(1.0)", + "combine() does not deduplicate equal values across promoted types") + .build(); + } + + @FhirPathTest + public Stream testCombineQuantityAndCodingNoDedup() { + return builder() + .withSubject(sb -> sb) + .group("combine() does not deduplicate Quantity or Coding values") + .testEquals( + List.of(toQuantity("1000 'mg'"), toQuantity("1 'g'")), + "1000 'mg'.combine(1 'g')", + "combine() keeps both Quantity values even when they are equal under Quantity" + + " equality") + .testEquals( + List.of( + toCoding("http://loinc.org|8867-4||'Heart rate'"), + toCoding("http://loinc.org|8867-4||'Heart rate'")), + "http://loinc.org|8867-4||'Heart rate'.combine(" + + "http://loinc.org|8867-4||'Heart rate')", + "combine() keeps both Coding values when they are identical") + .build(); + } + + @FhirPathTest + public Stream testCombineTemporal() { + return builder() + .withSubject(sb -> sb) + .group("combine() on temporal types does not deduplicate") + .testEquals( + List.of(toDate("2020-01-01"), toDate("2020-01-01")), + "@2020-01-01.combine(@2020-01-01)", + "combine() preserves identical Date values") + .testEquals( + List.of(toDateTime("2020-01-01T10:00:00"), toDateTime("2020-01-01T10:00:00")), + "@2020-01-01T10:00:00.combine(@2020-01-01T10:00:00)", + "combine() preserves identical DateTime values") + .testEquals( + List.of(toTime("12:00"), toTime("12:00")), + "@T12:00.combine(@T12:00)", + "combine() preserves identical Time values") + .build(); + } + + // --------------------------------------------------------------------------------------------- + // union() is equivalent to the `|` operator across the type matrix. + // --------------------------------------------------------------------------------------------- + + @FhirPathTest + public Stream testUnionFunctionEquivalentToPipe() { + return builder() + .withSubject(sb -> sb.integerArray("ints", 1, 2, 3).stringArray("strs", "a", "b")) + .group("union() and `|` produce identical results") + .testEquals(List.of(1, 2, 3), "ints.union(ints)", "union() self-merge deduplicates") + .testEquals(List.of(1, 2, 3), "ints | ints", "`|` self-merge deduplicates (control)") + .testEquals(List.of("a", "b"), "strs.union(strs)", "union() self-merge deduplicates") + .testEquals(List.of("a", "b"), "strs | strs", "`|` self-merge deduplicates (control)") + .testEquals( + List.of(1, 2, 3, 4), "ints.union(4)", "union() with literal argument merges with dedup") + .testEquals( + List.of(1, 2, 3, 4), + "ints | 4", + "`|` with literal argument merges with dedup (control)") + .testEquals( + List.of(1, 2, 3, 4), + "(ints.union(3)).union(4)", + "Chained union() calls behave like chained `|`") + .testEquals( + List.of(1, 2, 3, 4), + "(ints | 3) | 4", + "Chained `|` operators produce the same result (control)") + .build(); + } + + /** + * Pairs a {@code union()} call against the equivalent {@code |} expression across the full + * FHIRPath type matrix. Every primitive type required by the spec's "Union is equivalent to the + * pipe operator" scenario gets a direct per-type comparison here, so that any future regression + * that breaks the parser desugaring (or the structural equivalence it relies on) fails loudly in + * this test. + */ + @FhirPathTest + public Stream testUnionFunctionEquivalentToPipeAcrossTypeMatrix() { + return builder() + .withSubject(sb -> sb) + .group("union() ≡ `|` — Boolean") + .testEquals(List.of(true, false), "true.union(false)", "Boolean union() function form") + .testEquals(List.of(true, false), "true | false", "Boolean union operator form (control)") + .group("union() ≡ `|` — Integer") + .testEquals(List.of(1, 2, 3), "(1 | 2).union(2 | 3)", "Integer union() function form") + .testEquals(List.of(1, 2, 3), "(1 | 2) | (2 | 3)", "Integer union operator form (control)") + .group("union() ≡ `|` — Decimal") + .testEquals( + List.of(1.1, 2.2, 3.3), "(1.1 | 2.2).union(2.2 | 3.3)", "Decimal union() function form") + .testEquals( + List.of(1.1, 2.2, 3.3), + "(1.1 | 2.2) | (2.2 | 3.3)", + "Decimal union operator form (control)") + .group("union() ≡ `|` — String") + .testEquals( + List.of("a", "b", "c"), "('a' | 'b').union('b' | 'c')", "String union() function form") + .testEquals( + List.of("a", "b", "c"), + "('a' | 'b') | ('b' | 'c')", + "String union operator form (control)") + .group("union() ≡ `|` — Date") + .testEquals( + List.of(toDate("2020-01-01"), toDate("2020-01-02")), + "@2020-01-01.union(@2020-01-02)", + "Date union() function form") + .testEquals( + List.of(toDate("2020-01-01"), toDate("2020-01-02")), + "@2020-01-01 | @2020-01-02", + "Date union operator form (control)") + .group("union() ≡ `|` — DateTime") + .testEquals( + List.of(toDateTime("2020-01-01T10:00:00"), toDateTime("2020-01-01T11:00:00")), + "@2020-01-01T10:00:00.union(@2020-01-01T11:00:00)", + "DateTime union() function form") + .testEquals( + List.of(toDateTime("2020-01-01T10:00:00"), toDateTime("2020-01-01T11:00:00")), + "@2020-01-01T10:00:00 | @2020-01-01T11:00:00", + "DateTime union operator form (control)") + .group("union() ≡ `|` — Time") + .testEquals( + List.of(toTime("12:00"), toTime("13:00")), + "@T12:00.union(@T13:00)", + "Time union() function form") + .testEquals( + List.of(toTime("12:00"), toTime("13:00")), + "@T12:00 | @T13:00", + "Time union operator form (control)") + .group("union() ≡ `|` — Quantity") + .testEquals( + toQuantity("1000 'mg'"), + "1000 'mg'.union(1 'g')", + "Quantity union() function form (equal values, first retained)") + .testEquals( + toQuantity("1000 'mg'"), "1000 'mg' | 1 'g'", "Quantity union operator form (control)") + .group("union() ≡ `|` — Coding") + .testEquals( + List.of( + toCoding("http://loinc.org|8867-4||'Heart rate'"), + toCoding("http://loinc.org|8480-6||'Systolic blood pressure'")), + "http://loinc.org|8867-4||'Heart rate'.union(" + + "http://loinc.org|8480-6||'Systolic blood pressure')", + "Coding union() function form") + .testEquals( + List.of( + toCoding("http://loinc.org|8867-4||'Heart rate'"), + toCoding("http://loinc.org|8480-6||'Systolic blood pressure'")), + "http://loinc.org|8867-4||'Heart rate' | " + + "http://loinc.org|8480-6||'Systolic blood pressure'", + "Coding union operator form (control)") + .build(); + } + + // --------------------------------------------------------------------------------------------- + // Iteration-context pin-down tests — the critical reason for parser desugaring. + // These expressions would silently return wrong results if union()/combine() were implemented + // as regular method-defined functions whose arguments go through the standard + // FunctionParameterResolver path. + // --------------------------------------------------------------------------------------------- + + @FhirPathTest + public Stream testUnionIterationContextInsideSelect() { + return builder() + .withSubject( + sb -> + sb.elementArray( + "names", + n1 -> n1.string("use", "official").stringArray("given", "John", "Johnny"), + n2 -> n2.string("use", "maiden").stringArray("given", "Smith"))) + .group( + "union() honours the surrounding iteration context" + + " (name.select(use.union(given)) ≡ name.select(use | given))") + .testEquals( + List.of("official", "John", "Johnny", "maiden", "Smith"), + "names.select(use.union(given))", + "Function form: `given` resolves against the current name element, not the root") + .testEquals( + List.of("official", "John", "Johnny", "maiden", "Smith"), + "names.select(use | given)", + "Operator form produces the same result (control)") + .build(); + } + + @FhirPathTest + public Stream testUnionIterationContextWithOverlap() { + return builder() + .withSubject( + sb -> + sb.elementArray( + "names", + n1 -> n1.string("use", "John").stringArray("given", "John", "Johnny"), + n2 -> n2.string("use", "Jane").stringArray("given", "Jane", "Janey"))) + .group("union() deduplicates within each iteration element") + .testEquals( + List.of("John", "Johnny", "Jane", "Janey"), + "names.select(use.union(given))", + "union() inside select deduplicates overlapping use/given per name element") + .testEquals( + List.of("John", "Johnny", "Jane", "Janey"), + "names.select(use | given)", + "`|` inside select deduplicates overlapping use/given per name element (control)") + .build(); + } + + @FhirPathTest + public Stream testCombineIterationContextInsideSelect() { + return builder() + .withSubject( + sb -> + sb.elementArray( + "names", + n1 -> n1.string("use", "John").stringArray("given", "John", "Johnny"), + n2 -> n2.string("use", "Jane").stringArray("given", "Jane", "Janey"))) + .group( + "combine() honours the surrounding iteration context and preserves duplicates" + + " per element") + .testEquals( + List.of("John", "John", "Johnny", "Jane", "Jane", "Janey"), + "names.select(use.combine(given))", + "Function form: `given` resolves per-name, and combine() preserves duplicates") + .build(); + } + + // --------------------------------------------------------------------------------------------- + // Error paths: incompatible polymorphic types. + // --------------------------------------------------------------------------------------------- + + @FhirPathTest + public Stream testUnionIncompatibleTypesRaisesError() { + return builder() + .withSubject(sb -> sb.integer("i", 1).bool("b", true).string("s", "x")) + .group("union() of incompatible polymorphic types raises an error") + .testError("i.union(b)", "Integer.union(Boolean) is not supported") + .testError("b.union(i)", "Boolean.union(Integer) is not supported") + .testError("s.union(i)", "String.union(Integer) is not supported") + .build(); + } + + @FhirPathTest + public Stream testCombineIncompatibleTypesRaisesError() { + return builder() + .withSubject(sb -> sb.integer("i", 1).bool("b", true).string("s", "x")) + .group("combine() of incompatible polymorphic types raises an error") + .testError("i.combine(b)", "Integer.combine(Boolean) is not supported") + .testError("b.combine(i)", "Boolean.combine(Integer) is not supported") + .testError("s.combine(i)", "String.combine(Integer) is not supported") + .build(); + } + + // --------------------------------------------------------------------------------------------- + // Arity errors: raised at parse time by the desugaring visitor. + // --------------------------------------------------------------------------------------------- + + @FhirPathTest + public Stream testUnionArityErrors() { + return builder() + .withSubject(sb -> sb.integerArray("ints", 1, 2, 3)) + .group("union() with wrong argument count is rejected at parse time") + .testError("ints.union()", "union() with zero arguments is rejected") + .testError("ints.union(1, 2)", "union() with two arguments is rejected") + .build(); + } + + @FhirPathTest + public Stream testCombineArityErrors() { + return builder() + .withSubject(sb -> sb.integerArray("ints", 1, 2, 3)) + .group("combine() with wrong argument count is rejected at parse time") + .testError("ints.combine()", "combine() with zero arguments is rejected") + .testError("ints.combine(1, 2)", "combine() with two arguments is rejected") + .build(); + } +} diff --git a/fhirpath/src/test/java/au/csiro/pathling/fhirpath/dsl/RepeatFunctionDslTest.java b/fhirpath/src/test/java/au/csiro/pathling/fhirpath/dsl/RepeatFunctionDslTest.java index a68a51cef9..908eec59e5 100644 --- a/fhirpath/src/test/java/au/csiro/pathling/fhirpath/dsl/RepeatFunctionDslTest.java +++ b/fhirpath/src/test/java/au/csiro/pathling/fhirpath/dsl/RepeatFunctionDslTest.java @@ -393,4 +393,27 @@ public Stream testRepeatChainedOperations() { "repeat(item).where().linkId chains where and path after repeat") .build(); } + + /** + * Verifies that repeat($this) works correctly over Quantity literal collections produced by + * combine(). This was broken because Quantity literal structs contained VOID-typed fields which + * are incompatible with to_variant_object() used by variantTransformTree. + * + * @see #2588 + */ + @FhirPathTest + public Stream testRepeatQuantityLiterals() { + return builder() + .withSubject(sb -> sb) + .group("repeat($this) over combined Quantity literals (#2588)") + .testEquals( + 2, + "(1 year).combine(12 months).repeat($this).count()", + "repeat($this) returns both indefinite calendar durations (non-comparable units)") + .testEquals( + 1, + "(3 'min').combine(180 seconds).repeat($this).count()", + "repeat($this) deduplicates equivalent canonicalisable quantities (3 min = 180 s)") + .build(); + } } diff --git a/fhirpath/src/test/resources/fhirpath-js/config.yaml b/fhirpath/src/test/resources/fhirpath-js/config.yaml index cb8f17014d..43ba3b3246 100644 --- a/fhirpath/src/test/resources/fhirpath-js/config.yaml +++ b/fhirpath/src/test/resources/fhirpath-js/config.yaml @@ -176,6 +176,19 @@ excludeSet: Pathling does not support accessing the id attribute of primitive elements. any: - "Patient.name.given.repeat(id)" + - title: "Indefinite calendar duration equality in repeat() deduplication" + type: wontfix + outcome: failure + comment: | + Pathling does not perform year-to-month conversion during Quantity equality + comparison. The reference implementation treats 1 year = 12 months, so + repeat($this) deduplicates them to one value. Pathling keeps both because its + QuantityComparator only compares canonical forms (null for indefinite durations) + or exact original values (which require matching unit codes). This is the same + limitation as the "Indefinite calendar duration union deduplication" exclusion + in the 5.4_combining section. + any: + - "(1 year).combine(12 months).repeat($this)" - title: Equality exclusions glob: "fhirpath-js/cases/6.1_equality.yaml" exclude: @@ -444,14 +457,18 @@ excludeSet: - title: Combining exclusions glob: "fhirpath-js/cases/5.4_combining.yaml" exclude: - - title: "Polymorphic unions not supported" + - title: "Polymorphic unions and combines not supported" type: feature id: "#2398" comment: | - Union operator currently only supports primitive types with native Spark equality - (Boolean, Integer, String). Cross-type unions like Integer | Boolean are not supported. + The combining operators (`|`, `union()`, `combine()`) currently only support + primitive types with native Spark equality (Boolean, Integer, String). Cross-type + merges like Integer | Boolean or Functions.attrdouble.combine(Boolean) are not + supported because the SameTypeBinaryOperator dispatch cannot reconcile them to a + common FHIRPath type. any: - "Functions.attrdouble | Functions.coll1.colltrue.attr" + - "Functions.attrdouble.combine(Functions.coll1.colltrue.attr)" - title: "Indefinite calendar duration union deduplication" type: wontfix outcome: failure @@ -462,17 +479,21 @@ excludeSet: both values since the equality comparison returns NULL for incomparable quantities. any: - "(1 year | 12 months)" - - title: "Union with count() on primitive elements with extensions but no value" + - title: "Union/combine with count() on primitive elements with extensions but no value" type: feature outcome: failure id: "#437" comment: | - This test uses count() on a union that includes Patient.name.given[3], which + These tests use count() on a merge that includes Patient.name.given[3], which is a primitive element with extensions but no value. Since Pathling doesn't - support accessing such elements, the union returns 0 elements instead of 1, - causing count() to return 0 instead of 1. + support accessing such elements, the merge returns fewer elements than the + reference implementation, causing count() to return the wrong result. The same + underlying #437 limitation applies equally to the `|` operator and to the + desugared `union()` / `combine()` function forms. any: - "(Patient.name.given[0] | Patient.name.given[3]).count()" + - "Patient.name.given[0].union(Patient.name.given[3]).count()" + - "(Patient.name.given[0]).combine(Patient.name.given[3]).count()" - title: FHIR R4 Exclusions glob: "fhirpath-js/cases/fhir-r4.yaml" exclude: diff --git a/openspec/changes/archive/2026-04-11-add-combining-functions/.openspec.yaml b/openspec/changes/archive/2026-04-11-add-combining-functions/.openspec.yaml new file mode 100644 index 0000000000..11393eacc4 --- /dev/null +++ b/openspec/changes/archive/2026-04-11-add-combining-functions/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-04-11 diff --git a/openspec/changes/archive/2026-04-11-add-combining-functions/design.md b/openspec/changes/archive/2026-04-11-add-combining-functions/design.md new file mode 100644 index 0000000000..9e869d5e5e --- /dev/null +++ b/openspec/changes/archive/2026-04-11-add-combining-functions/design.md @@ -0,0 +1,287 @@ +## Context + +Pathling's FHIRPath engine already implements the `|` union operator via +`UnionOperator` (extending `SameTypeBinaryOperator`). That implementation +handles type reconciliation (e.g. Integer → Decimal promotion), Decimal +precision normalization to DECIMAL(32,6), empty-operand semantics, and +comparator-aware deduplication (default SQL equality for simple types; +element-wise equality for Quantity, Coding, and temporal types via +`SqlFunctions.arrayDistinctWithEquality` / `arrayUnionWithEquality`). + +The FHIRPath specification requires two equivalent function-form entry +points in §6.5 Combining: + +- `union(other : collection) : collection` — declared synonymous with `|`. +- `combine(other : collection) : collection` — union without deduplication. + +Neither function currently exists. Users writing the function form see a +"function not found" error even though `|` works. `combine()` has no +equivalent operator at all. + +### The iteration-context problem + +The spec explicitly states, via its worked example: + +> `name.select(use.union(given))` is the same as `name.select(use | given)`, +> noting that the union function does not introduce an iteration context + +For this equivalence to hold inside an iterating function such as +`select`, the `given` argument must resolve against the current name +element, not against the root `%context`. + +Pathling's current architecture cannot deliver that equivalence if +`union()` is implemented as a regular method-defined function: + +1. The parser turns `use.union(given)` into + `Composite([Traversal(use), EvalFunction(union, [given])])`. +2. `Composite.apply(n, ec)` reduces left-to-right: the first element + produces `n.use`, which then becomes the input to the second + element; the original `n` is discarded from the chain. +3. When `EvalFunction(union, [given])` invokes the function, + `FunctionParameterResolver` evaluates the `given` argument against + `evaluationContext.getInputContext()`. That field is fixed at + construction time (`FhirEvaluationContext.inputContext`) and returns + the root resource — **not** the current iteration item `n`. + +In contrast, the `|` operator takes the chained form +`EvalOperator(leftPath=use, rightPath=given, op=UnionOperator)` and its +`invokeWithPaths` evaluates _both_ operand paths against the current +input `n`, so `given` resolves correctly. + +This gap would only surface for arguments that are relative paths +evaluated inside an iteration context. It has not been exercised before +because no existing function takes a `Collection` non-lambda argument — +`union()`/`combine()` are the first. + +Fixing the resolver architecture to propagate an "outer focus" through +`Composite.apply` is a significant refactor with broad risk. Fortunately +the spec itself describes `union()` as _synonymous_ with `|`, which +invites a cheaper solution: rewrite `x.union(y)` and `x.combine(y)` to +operator-form ASTs at parse time. + +## Goals / Non-Goals + +**Goals:** + +- Implement `union()` and `combine()` as FHIRPath surface features that + are strictly observationally equivalent to the `|` operator (for + `union`) or to a structurally identical operator (for `combine`). +- Preserve exact semantic equivalence between `x.union(y)` and `x | y` + for every input shape, including arguments that are relative paths + inside iteration contexts. +- Share code between the `|` operator, `union()`, and `combine()` so + that type reconciliation, Decimal normalization, and comparator + handling live in exactly one place. +- Match the type-reconciliation policy of `|` for both functions, + including rejecting incompatible polymorphic types (e.g. + Integer ∪ Boolean) consistently. +- Provide DSL test coverage that mirrors the existing `|` coverage and + adds `combine()`-specific duplicate-preservation cases, with + pin-down tests for the iteration-context equivalence. + +**Non-Goals:** + +- Changing the behaviour of `|` for any input. +- Refactoring `Composite`/`EvalFunction`/`FunctionParameterResolver` to + propagate an "outer focus" through chained expressions. That is a + broader architectural change that would benefit future + `Collection`-argument functions but is out of scope for this issue. +- Supporting polymorphic unions across incompatible types (this remains + a known limitation tracked in the YAML config). +- Lifting the indefinite calendar duration limitation in union + deduplication. +- Exposing `union()` / `combine()` as standalone (subject-less) function + calls. The spec defines them only as method invocations on an input + collection, so the parser desugaring will only recognise the + `x.union(y)` / `x.combine(y)` shape. +- Adding language bindings (Python / R) beyond what the FHIRPath engine + surface already exposes automatically. + +## Decisions + +### Decision 1: Extract a shared `CombiningLogic` helper + +Both the `|` operator and the new `combine` operator need the same +pipeline for the merge step: + +``` +get array column → normalise Decimal → + (dedupe | concatenate) → wrap in the reconciled Collection +``` + +Options considered: + +| Option | Verdict | +| ----------------------------------------------------------------- | ---------------------------------------------------------------------------------------- | +| Extract a shared helper (`CombiningLogic`) used by both operators | **Chosen** | +| Duplicate the array plumbing in each operator | Rejected: two places to fix decimal / comparator bugs | +| Call `UnionOperator` directly from `CombineOperator` | Rejected: `union` dedupes, `combine` does not — the entry points are genuinely different | + +The helper is placed in the operator package (peer to `UnionOperator` +and the new `CombineOperator`) and exposes small composable primitives: + +- `prepareArray(Collection)` — extract the array column, normalising + Decimal precision. +- `dedupeArray(Column, ColumnEquality)` — deduplicate using either + Spark's `array_distinct` or `SqlFunctions.arrayDistinctWithEquality` + depending on the comparator. +- `unionArrays(Column, Column, ColumnEquality)` — merge + dedupe two + arrays. +- `combineArrays(Column, Column)` — concatenate two arrays without + dedup (`functions.concat` or equivalent). + +`UnionOperator` is refactored to delegate its +`handleOneEmpty` / `handleEquivalentTypes` bodies to these helpers, +preserving all current behaviour and error messages. + +### Decision 2: Parser-level desugaring of the function forms + +The core architectural choice. Rather than registering `union` / +`combine` as method-defined functions (which cannot deliver the spec's +iteration-context equivalence — see Context), we desugar them at parse +time into the same `EvalOperator` AST that `|` uses. + +In `Visitor.visitInvocationExpression`, after building the +`invocationSubject` and `invocationVerb`: + +1. If the verb is an `EvalFunction` whose identifier is `union` or + `combine`, and exactly one argument was provided, synthesise an + `EvalOperator(invocationSubject, operator, argument)` where + `operator` is `UnionOperator` (for `union`) or the new + `CombineOperator` (for `combine`). +2. Any other argument count raises an invalid-user-input error at + parse time, indicating the function requires exactly one argument. +3. Any other function name falls through to the default + `invocationSubject.andThen(invocationVerb)` composition. + +This makes `x.union(y)` parse to the **same AST** as `x | y`, so +semantic equivalence is guaranteed by construction. `x.combine(y)` +parses to a structurally identical AST with a different merge step. + +Consequences: + +- Neither `union` nor `combine` appears in the function registry. This + is consistent with their spec: they have no standalone (subject-less) + call form. +- Function-lookup consumers (e.g. `fhirpath-lab-server` enumerating + registered functions) will not list `union` / `combine`. This is + acceptable: users can still write them, and they behave identically + to `|`. If a follow-up issue needs to surface these in function + catalogues, a separate "virtual function" mechanism can be added. +- Error messages for incompatible operand types will read + `Operator \`|\` is not supported for: ...`(for union, because both +forms share the same AST) and`Operator \`combine\` is not supported + for: ...`(for combine). The user wrote a function call but sees`Operator ...` in the error — a small UX oddity, flagged as an + open question but not blocking. + +Options considered: + +| Option | Verdict | +| --------------------------------------------------------------------- | -------------------------------------------------------------------- | +| Parser-level desugaring (Decision 2) | **Chosen** — smallest change that delivers perfect spec equivalence | +| Regular `MethodDefinedFunction` + accept iteration-context divergence | Rejected — ships a spec deviation that would confuse users | +| Architectural fix: propagate outer focus through `Composite.apply` | Rejected for this issue — too broad; tracked as a future improvement | + +### Decision 3: Introduce a new `CombineOperator` + +Add `CombineOperator` in +`fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/` as a peer +to `UnionOperator`. It extends `SameTypeBinaryOperator` so that it +inherits the exact same empty-operand / reconcile / dispatch template +that `UnionOperator` uses. + +- `handleOneEmpty(nonEmpty, input)` returns the non-empty collection + unchanged (no dedup, no normalisation needed beyond what the + collection already carries). +- `handleEquivalentTypes(left, right, input)` calls + `CombiningLogic.combineArrays` after `prepareArray`. +- `getOperatorName()` returns `"combine"`, so failure messages read + `Operator \`combine\` is not supported for: ...`. +- `handleNonEquivalentTypes` inherits the default from + `SameTypeBinaryOperator` (throws `InvalidUserInputError`). + +`CombineOperator` is **not** wired into the operator grammar (there is +no `combine` symbol in FHIRPath). It is reachable only via the parser +desugaring in Decision 2. + +### Decision 4: Test layout — new `CombiningFunctionsDslTest` + +The existing `CombiningOperatorsDslTest` is dense and `|`-focused. +Rather than bloat it, add a new `CombiningFunctionsDslTest` that: + +1. Mirrors the operator test structure for `union()` across the full + type matrix (Boolean, Integer, Decimal, String, Date, DateTime, + Time, Quantity, Coding). +2. Exercises `combine()`'s distinct behaviour (duplicate preservation, + empty handling, type promotion, error on incompatible types, + non-dedup for Quantity and Coding). +3. Includes equivalence tests proving `x.union(y)` and `x | y` produce + equal results across the same type matrix. +4. Covers the iteration-context pin-down cases inside `select`: + `Patient.name.select(use.union(given))` must equal + `Patient.name.select(use | given)`, and the `combine` analogue must + preserve duplicates while still respecting the iteration focus. + +These tests are the strongest guarantee that Decision 2's desugaring +actually delivers the promised spec equivalence; the iteration-context +case is the one that would have failed under Option B (regular +function-form). + +### Decision 5: YAML exclusions review + +Inspect `fhirpath/src/test/resources/fhirpath-js/config.yaml` for any +entries that exclude reference tests solely because `union()` or +`combine()` were not implemented. Any such exclusion is removed as part +of this change. Exclusions that document genuine limitations +(polymorphic unions, calendar duration dedup) remain and apply equally +to the function forms because they share the same AST. + +## Risks / Trade-offs + +- **[Risk] Refactoring `UnionOperator` regresses `|` behaviour.** + → Mitigation: the existing exhaustive `CombiningOperatorsDslTest` + must stay green. No behaviour change is intended; the refactor is + pure extraction. + +- **[Risk] Parser desugaring intercepts too much.** Someone registering + a future function called `union` or `combine` through the function + registry would silently be shadowed by the desugaring. + → Mitigation: document Decision 2 in the design file and in the + parser visitor. These names are reserved for the spec-defined + combining semantics, which aligns with the FHIRPath spec's + reservation of them. + +- **[Risk] Error-message UX.** Users writing `x.union(true)` see + `Operator \`|\` is not supported for: ...`, which may be confusing + because they called a function. + → Accepted as a small UX oddity for now. Flagged as an open question. + A follow-up improvement could preserve the source form through the + AST for error formatting. + +- **[Risk] Function discovery tools miss `union`/`combine`.** External + tools enumerating `FunctionRegistry` do not see these names. + → Accepted. Future work can add a virtual-function mechanism if + needed. + +- **[Trade-off] Extra class: `CombineOperator`.** Net code growth is + small because most behaviour lives in the shared `CombiningLogic`. + → Accepted. + +## Migration Plan + +Not applicable — this is a pure addition to the FHIRPath language +surface. No deprecations, no data migration, no configuration changes. +The `|` operator continues to work as before. + +## Open Questions + +- Should error messages distinguish the function form from the operator + form (e.g. `Function \`union\` is not supported for: ...`)? This + requires threading source-form information through the AST. Out of + scope for this issue; tracked as an open improvement. +- Should `union()` / `combine()` be exposed via a virtual-function + mechanism so that function-catalogue tooling sees them? Out of scope; + flagged for future work. +- Does the site documentation page at `site/docs/fhirpath/functions.md` + (or equivalent) need updating as part of this change, or as a + follow-up docs PR? Treated as a docs-side task during implementation. diff --git a/openspec/changes/archive/2026-04-11-add-combining-functions/proposal.md b/openspec/changes/archive/2026-04-11-add-combining-functions/proposal.md new file mode 100644 index 0000000000..cc0e464f94 --- /dev/null +++ b/openspec/changes/archive/2026-04-11-add-combining-functions/proposal.md @@ -0,0 +1,77 @@ +## Why + +Pathling implements the `|` union operator but does not implement the FHIRPath +[combining functions](https://hl7.org/fhirpath/#combining) `union()` and +`combine()`. The spec declares `x.union(y)` to be _synonymous_ with `x | y`, so +users writing expressions in the function form — which is common in +specifications and tutorials — currently get a "function not found" error even +though the underlying capability already exists. `combine()` (a union that +preserves duplicates) has no equivalent at all and is simply missing. + +Closing this gap unblocks idiomatic FHIRPath authoring, improves conformance +against the reference fhirpath.js test suite, and adds one genuinely new +capability (`combine()`) that cannot currently be expressed. + +## What Changes + +- Add the FHIRPath `union(other : collection) : collection` function that + merges two collections and eliminates duplicates, with behaviour strictly + equivalent to the existing `|` operator. +- Add the FHIRPath `combine(other : collection) : collection` function that + merges two collections without eliminating duplicates. +- Implement both functions via **parser-level desugaring**: the invocation + visitor rewrites `x.union(y)` and `x.combine(y)` into the same + `EvalOperator` AST shape that `x | y` uses, so semantic equivalence with + the operator form — including the spec's + `name.select(use.union(given)) ≡ name.select(use | given)` equivalence + inside iteration contexts — is guaranteed by construction. +- Introduce a new `CombineOperator` (peer to `UnionOperator`) that provides + the concatenate-without-dedup merge step used by the desugared + `combine()` form. +- Extract a shared `CombiningLogic` helper so that the existing + `UnionOperator` and the new `CombineOperator` share type reconciliation, + Decimal normalization, and comparator-aware merging. +- Neither `union` nor `combine` is registered in the function registry — + the FHIRPath spec defines them only as method invocations on an input + collection, and the desugaring happens before the registry is consulted. +- Remove any corresponding exclusions from + `fhirpath/src/test/resources/fhirpath-js/config.yaml` that exist solely + because these functions were not implemented. + +No behaviour of the existing `|` operator changes. There are no breaking +changes. + +## Capabilities + +### New Capabilities + +- `fhirpath-union`: The FHIRPath `union(other)` function that merges two + collections and eliminates duplicates via equality, synonymous with `|`. +- `fhirpath-combine`: The FHIRPath `combine(other)` function that merges two + collections without eliminating duplicates. + +### Modified Capabilities + + + +## Impact + +- **Code**: + - `fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/CombiningLogic.java` — new shared helper holding the array-level merge primitives. + - `fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/CombineOperator.java` — new same-type binary operator for the concatenate-without-dedup merge. + - `fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/UnionOperator.java` — refactor to delegate its merge steps to `CombiningLogic`. + - `fhirpath/src/main/java/au/csiro/pathling/fhirpath/parser/Visitor.java` — desugar `x.union(y)` / `x.combine(y)` in the invocation visitor into the corresponding `EvalOperator` AST. +- **Tests**: + - New DSL test file `CombiningFunctionsDslTest` covering `union()`, + `combine()`, a `union() ≡ |` equivalence property, and iteration-context + pin-down cases inside `select`. + - Possible removal of YAML exclusions in + `fhirpath/src/test/resources/fhirpath-js/config.yaml` that become + redundant once the functions exist. +- **APIs**: The public FHIRPath surface gains two new function forms that + share behaviour with existing/new binary operators. The Java, Python, and + R library APIs are unaffected beyond what the FHIRPath engine already + exposes. +- **Dependencies**: None. +- **Documentation**: `site/docs/fhirpath/functions.md` (or equivalent) should + gain entries for `union()` and `combine()`. diff --git a/openspec/changes/archive/2026-04-11-add-combining-functions/specs/fhirpath-combine/spec.md b/openspec/changes/archive/2026-04-11-add-combining-functions/specs/fhirpath-combine/spec.md new file mode 100644 index 0000000000..d09eeb4b84 --- /dev/null +++ b/openspec/changes/archive/2026-04-11-add-combining-functions/specs/fhirpath-combine/spec.md @@ -0,0 +1,89 @@ +## ADDED Requirements + +### Requirement: FHIRPath combine function + +The FHIRPath engine SHALL implement the `combine(other : collection) : collection` +function as defined in the FHIRPath specification §6.5 Combining. The function +SHALL merge the input collection with the `other` collection into a single +collection **without** eliminating duplicate values. The result SHALL have no +guaranteed order. + +Combining an empty collection with a non-empty collection SHALL return the +non-empty collection. Combining two empty collections SHALL return an empty +collection. + +The function SHALL apply the same type reconciliation as the `|` operator +(for example promoting Integer to Decimal, normalizing Decimal precision). +Where the operand types cannot be reconciled to a common FHIRPath type, the +engine SHALL raise an invalid user input error consistent with the `|` +operator's behaviour for incompatible polymorphic types. + +The function SHALL NOT introduce an iteration context. The `other` argument +SHALL be evaluated against the same context that applies to the function's +input. + +#### Scenario: Combine preserves duplicates from both sides + +- **WHEN** the expression `(1 | 1).combine(1 | 2)` is evaluated +- **THEN** the result is a collection of four elements containing three + `1` values and one `2` value (order-independent) + +#### Scenario: Combine preserves duplicates within a single side + +- **WHEN** the expression `(1 | 1 | 2 | 3).combine({})` is evaluated +- **THEN** the result is a collection of four elements `{1, 1, 2, 3}` + (order-independent) + +#### Scenario: Combine with an empty input returns the other collection unchanged + +- **WHEN** the expression `{}.combine(1 | 1 | 2)` is evaluated +- **THEN** the result is a collection of three elements `{1, 1, 2}` + (order-independent) + +#### Scenario: Combine of two empty collections is empty + +- **WHEN** the expression `{}.combine({})` is evaluated +- **THEN** the result is the empty collection + +#### Scenario: Combine promotes Integer to Decimal + +- **WHEN** the expression `(1).combine(2.0)` is evaluated +- **THEN** the result is a collection of two Decimal elements + `{1.0, 2.0}` (order-independent) + +#### Scenario: Combine does not deduplicate equal Quantity values + +- **WHEN** the expression `(1 'mg').combine(1000 'ug')` is evaluated +- **THEN** the result is a collection containing both Quantity values, + regardless of their equality under Quantity comparison semantics + +#### Scenario: Combine does not deduplicate equal Coding values + +- **WHEN** two collections containing identical Coding values are combined +- **THEN** the result contains both Coding values + +#### Scenario: Combine of incompatible polymorphic types raises an error + +- **WHEN** the expression `(1).combine(true)` is evaluated +- **THEN** the engine raises an invalid user input error describing + incompatible operand types + +#### Scenario: Combine uses the surrounding iteration context inside select + +- **WHEN** the expression `Patient.name.select(use.combine(given))` is + evaluated against a Patient resource with multiple `name` elements +- **THEN** for each `name` element the result includes the `use` and + `given` values of that specific `name`, preserving duplicates across + the iteration + +#### Scenario: Combine with missing argument is rejected at parse time + +- **WHEN** the expression `(1 | 2).combine()` is parsed +- **THEN** the parser raises an invalid user input error indicating that + `combine` requires exactly one argument + +#### Scenario: Combine with too many arguments is rejected at parse time + +- **WHEN** the expression `(1 | 2).combine(3, 4)` is parsed +- **THEN** the parser raises an invalid user input error indicating that + `combine` requires exactly one argument diff --git a/openspec/changes/archive/2026-04-11-add-combining-functions/specs/fhirpath-union/spec.md b/openspec/changes/archive/2026-04-11-add-combining-functions/specs/fhirpath-union/spec.md new file mode 100644 index 0000000000..3773040d2b --- /dev/null +++ b/openspec/changes/archive/2026-04-11-add-combining-functions/specs/fhirpath-union/spec.md @@ -0,0 +1,91 @@ +## ADDED Requirements + +### Requirement: FHIRPath union function + +The FHIRPath engine SHALL implement the `union(other : collection) : collection` +function as defined in the FHIRPath specification §6.5 Combining. The function +SHALL merge the input collection with the `other` collection into a single +collection, eliminating any duplicate values using FHIRPath equality semantics +(i.e. the `=` operator). The result SHALL have no guaranteed order. + +The function SHALL be observationally equivalent to the `|` operator: for any +collections `x` and `y` where `x | y` produces a result, `x.union(y)` SHALL +produce a result that compares equal under FHIRPath set-equality. + +The function SHALL NOT introduce an iteration context. The `other` argument +SHALL be evaluated against the same context that applies to the function's +input, so that inside constructs such as `name.select(use.union(given))` the +`given` argument is evaluated against the current `name` element, not against +the root. + +#### Scenario: Union eliminates duplicates across two non-empty collections + +- **WHEN** the expression `(1 | 1 | 2 | 3).union(2 | 3)` is evaluated +- **THEN** the result is the collection `{1, 2, 3}` (order-independent) + +#### Scenario: Union with an empty argument returns the deduplicated input + +- **WHEN** the expression `(1 | 1 | 2 | 3).union({})` is evaluated +- **THEN** the result is the collection `{1, 2, 3}` (order-independent) + +#### Scenario: Union with an empty input returns the deduplicated argument + +- **WHEN** the expression `{}.union(1 | 2 | 2)` is evaluated +- **THEN** the result is the collection `{1, 2}` (order-independent) + +#### Scenario: Union of two empty collections is empty + +- **WHEN** the expression `{}.union({})` is evaluated +- **THEN** the result is the empty collection + +#### Scenario: Union promotes Integer to Decimal + +- **WHEN** the expression `(1 | 2).union(2.0 | 3.0)` is evaluated +- **THEN** the result is the collection `{1.0, 2.0, 3.0}` (order-independent) + +#### Scenario: Union of incompatible polymorphic types raises an error + +- **WHEN** the expression `(1).union(true)` is evaluated +- **THEN** the engine raises an invalid user input error describing + incompatible operand types, matching the behaviour of the `|` operator + +#### Scenario: Union is equivalent to the pipe operator + +- **WHEN** the expressions `x.union(y)` and `x | y` are evaluated for the + same `x` and `y` across Boolean, Integer, Decimal, String, Date, DateTime, + Time, Quantity, and Coding inputs +- **THEN** the two results are equal under FHIRPath set-equality for every + input pair + +#### Scenario: Union uses the surrounding iteration context inside select + +- **WHEN** the expression `Patient.name.select(use.union(given))` is + evaluated against a Patient resource with multiple `name` elements +- **THEN** for each `name` element the result includes the `use` and + `given` values of that specific `name`, and the overall result equals + that of `Patient.name.select(use | given)` + +#### Scenario: Union of Quantity collections uses Quantity equality + +- **WHEN** the expression `(1 'mg').union(1000 'ug')` is evaluated +- **THEN** the two values are recognised as equal under Quantity equality + and the result contains a single value + +#### Scenario: Union of Coding collections uses Coding equality + +- **WHEN** the expression + `Coding { system: 'http://x', code: 'a' }.union(Coding { system: 'http://x', code: 'a' })` + is evaluated +- **THEN** the result contains a single Coding value + +#### Scenario: Union with missing argument is rejected at parse time + +- **WHEN** the expression `(1 | 2).union()` is parsed +- **THEN** the parser raises an invalid user input error indicating that + `union` requires exactly one argument + +#### Scenario: Union with too many arguments is rejected at parse time + +- **WHEN** the expression `(1 | 2).union(3, 4)` is parsed +- **THEN** the parser raises an invalid user input error indicating that + `union` requires exactly one argument diff --git a/openspec/changes/archive/2026-04-11-add-combining-functions/tasks.md b/openspec/changes/archive/2026-04-11-add-combining-functions/tasks.md new file mode 100644 index 0000000000..d426b1e574 --- /dev/null +++ b/openspec/changes/archive/2026-04-11-add-combining-functions/tasks.md @@ -0,0 +1,117 @@ +## 1. Extract shared combining logic + +- [x] 1.1 Create `CombiningLogic` helper in + `fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/` exposing + array-level primitives: `prepareArray(Collection)` (extract column + and normalise Decimal precision), `dedupeArray(Column, ColumnEquality)`, + `unionArrays(Column, Column, ColumnEquality)`, and + `combineArrays(Column, Column)`. +- [x] 1.2 Refactor `UnionOperator` to delegate its `handleOneEmpty` and + `handleEquivalentTypes` bodies to `CombiningLogic`, preserving all + current behaviour and error messages exactly. +- [x] 1.3 Run the existing `CombiningOperatorsDslTest` to confirm the + refactor is behaviour-preserving. + +## 2. New CombineOperator + +- [x] 2.1 Create `CombineOperator` in + `fhirpath/src/main/java/au/csiro/pathling/fhirpath/operator/` extending + `SameTypeBinaryOperator`, with `getOperatorName()` returning `"combine"`. +- [x] 2.2 Implement `handleOneEmpty` to return the non-empty operand + unchanged (no dedup, no extra normalisation beyond what the collection + already carries). +- [x] 2.3 Implement `handleEquivalentTypes` to call + `CombiningLogic.combineArrays` after `prepareArray`, producing a + concatenated result that preserves duplicates. +- [x] 2.4 Inherit `handleNonEquivalentTypes` from `SameTypeBinaryOperator` + (which fails with the standard `InvalidUserInputError`), verifying the + resulting error message reads + `Operator \`combine\` is not supported for: ...`. + +## 3. Parser-level desugaring + +- [x] 3.1 In `Visitor.visitInvocationExpression`, after building the + `invocationSubject` and `invocationVerb`, detect the case where the + verb is an `EvalFunction` with identifier `union` or `combine`. +- [x] 3.2 When detected, validate the argument count (exactly one) and + emit an `EvalOperator(invocationSubject, operator, argument)` where + `operator` is `new UnionOperator()` for `union` or + `new CombineOperator()` for `combine`. +- [x] 3.3 For any other argument count, throw an + `InvalidUserInputError` with a clear message indicating that the + function requires exactly one argument. +- [x] 3.4 Fall through to the existing `invocationSubject.andThen(invocationVerb)` + composition for every other function identifier, keeping the change + strictly scoped to `union` and `combine`. +- [x] 3.5 Add a short comment at the desugaring site explaining the + rationale (spec equivalence with `|`, iteration-context behaviour) + and linking to the FHIRPath specification section. + +## 4. DSL test coverage + +- [x] 4.1 Create + `fhirpath/src/test/java/au/csiro/pathling/fhirpath/dsl/CombiningFunctionsDslTest.java` + extending `FhirPathDslTestBase`, following the layout used by + `CombiningOperatorsDslTest`. +- [x] 4.2 Cover `union()` across the full type matrix (Boolean, + Integer, Decimal, String, Date, DateTime, Time, Quantity, Coding), + mirroring the scenarios in `specs/fhirpath-union/spec.md`. +- [x] 4.3 Cover `combine()` duplicate-preservation cases across the same + type matrix: both-sides duplicates, within-one-side duplicates, + empty-input handling, Integer → Decimal promotion, Quantity and + Coding non-dedup. +- [x] 4.4 Add `x.union(y) ≡ x | y` equivalence tests across the same + type matrix so that any future divergence between the function form + and the operator form is caught immediately. +- [x] 4.5 Add iteration-context pin-down tests that evaluate + `Patient.name.select(use.union(given))` against a Patient resource + with multiple `name` elements, asserting equality with + `Patient.name.select(use | given)`. +- [x] 4.6 Add the `combine` analogue of the iteration-context test: + `Patient.name.select(use.combine(given))` must preserve duplicates + while still evaluating `use` and `given` against the current name + element. +- [x] 4.7 Add error-path tests with `testError(...)` for incompatible + polymorphic types on both functions (e.g. `(1).union(true)`, + `(1).combine(true)`), confirming the error paths are wired correctly + through the desugaring. +- [x] 4.8 Add a test that `x.union()` (no argument) raises a clear + "requires exactly one argument" error at parse time, ensuring + Decision 2.3 is exercised. + +## 5. Reference-test exclusion review + +- [x] 5.1 Audit `fhirpath/src/test/resources/fhirpath-js/config.yaml` + for exclusions that exist solely because `union()` or `combine()` + were not implemented. +- [x] 5.2 Remove any such exclusion and run + `YamlReferenceImplTest` to confirm the unblocked tests pass. +- [x] 5.3 For any reference test that still fails for reasons unrelated + to this change, keep the exclusion and update its justification text + to reference the underlying limitation (polymorphic unions, calendar + duration dedup, etc.). + +## 6. Documentation + +- [x] 6.1 Add entries for `union()` and `combine()` to the FHIRPath + functions page under `site/docs/fhirpath/` (whichever file lists + built-in functions). +- [x] 6.2 Note in the `union()` entry that it is defined by the spec to + be synonymous with `|`, and cross-reference the operator entry. + +## 7. Final verification + +- [x] 7.1 Run `mvn test -pl fhirpath` and confirm the full fhirpath + module test suite passes. (Two `FhirViewShareableComplianceTest` + failures are pre-existing on `issue/2384`, unrelated to this change, + and confirmed by running the test class on the stashed baseline.) +- [x] 7.2 Run `mvn test -pl fhirpath -Dtest=CombiningFunctionsDslTest` + and confirm the new test class is fully green. (58/58 pass.) +- [x] 7.3 Run `mvn test -pl fhirpath -Dtest=CombiningOperatorsDslTest` + and confirm the refactor preserved operator behaviour. (188/188 pass.) +- [x] 7.4 Run `mvn test -pl fhirpath -Dtest=YamlReferenceImplTest` + and confirm the reference suite state reflects the exclusion review. + (1821 run, 0 failures, 979 skipped — 5 additional tests skipped + vs. baseline because the 5 newly-failing combine/union reference + tests are covered by new entries in `config.yaml` for #437, #2398, + and the latent Quantity + repeat canonicalization limitation.) diff --git a/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/.openspec.yaml b/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/.openspec.yaml new file mode 100644 index 0000000000..33d01f7841 --- /dev/null +++ b/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-04-12 diff --git a/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/design.md b/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/design.md new file mode 100644 index 0000000000..a8fb7b549d --- /dev/null +++ b/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/design.md @@ -0,0 +1,67 @@ +## Context + +Spark's `lit(null)` produces an expression of type `NullType` (`VOID`). When +used inside `struct(...)`, the resulting struct field inherits that type. +`QuantityEncoding.encodeLiteral` passes `lit(null)` for fields that have no +value in a given literal (e.g., `id`, `comparator`, `_fid`), and +`FlexiDecimalSupport.toLiteral` does the same when canonicalisation is not +possible. The struct works fine in most contexts because Spark coerces `VOID` to +the target type during operations like `concat`. However, +`to_variant_object()` — used by `variantTransformTree` inside `repeatAll()` — +requires every field to have a concrete type and rejects `VOID` with +`DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION`. + +## Goals / Non-Goals + +**Goals:** + +- Eliminate all `VOID`-typed fields from Quantity literal structs so they are + compatible with `to_variant_object()` and any other schema-strict operations. +- Unblock `repeat($this)` over Quantity literal collections (issue #2588). +- Evaluate whether the fix also resolves the related "indefinite calendar + duration union deduplication" exclusion. + +**Non-Goals:** + +- Changing `repeatAll()`'s type classification for Quantity (Quantity is + correctly treated as a complex type since `.repeat(unit)` is valid + FHIRPath). +- Fixing `QuantityEncoding.encodeNumeric` — it has similar `lit(null)` patterns + but does not appear on a failing path today; addressing it is optional + clean-up. + +## Decisions + +### 1. Cast the entire struct rather than individual fields + +**Decision:** Append `.cast(dataType())` to the `toStruct(...)` result in +`encodeLiteral`, rather than casting each `lit(null)` individually. + +**Rationale:** `dataType()` already declares the canonical schema for all 10 +fields. A single positional struct cast resolves every `VOID` field in one +operation. This is less error-prone and less verbose than N separate casts, and +already validated empirically (see explore-mode spike). + +**Alternative considered:** Cast each `lit(null)` to its target type +(`lit(null).cast(DataTypes.StringType)`, etc.). Correct but repetitive and +fragile — if a field type changes in `dataType()`, the individual casts must be +updated in lockstep. + +### 2. Also fix `FlexiDecimalSupport.toLiteral` independently + +**Decision:** Change the null branch from `lit(null)` to +`lit(null).cast(DATA_TYPE)`. + +**Rationale:** Although the struct-level cast in `encodeLiteral` would also +resolve the `canonicalized_value` field, `FlexiDecimalSupport.toLiteral` is a +public utility used by other callers. Fixing it at source prevents the same +class of VOID issue from surfacing elsewhere. + +## Risks / Trade-offs + +- **Behavioural regression risk** → Low. The cast only concretises the type of + null values; no runtime data changes. Mitigated by running the full + `RepeatFunctionDslTest` and `YamlReferenceImplTest` suites. +- **Struct field ordering** → Spark's struct cast is positional. The field order + in `toStruct()` and `dataType()` must match. They already do and are defined + in the same class. Mitigated by the existing test suite. diff --git a/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/proposal.md b/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/proposal.md new file mode 100644 index 0000000000..613476aa3e --- /dev/null +++ b/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/proposal.md @@ -0,0 +1,41 @@ +## Why + +Quantity literals produce Spark structs with `VOID`-typed null fields (e.g., `id`, +`comparator`, `_fid`, and for indefinite calendar durations `_value_canonicalized` +and `_code_canonicalized`). `VOID` is Spark's placeholder for untyped nulls — +not a concrete data type. Any code path that requires a concrete schema (such as +`to_variant_object()` used by `repeat()` / `repeatAll()`) rejects these structs +with `DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION`. This blocks `repeat($this)` +over any Quantity literal collection and surfaces via `combine()` + `repeat()` +(issue #2588). + +## What Changes + +- Cast the Quantity literal struct to `QuantityEncoding.dataType()` after + construction so every field carries its declared type instead of `VOID`. +- Cast the `FlexiDecimalSupport.toLiteral` null branch to + `FlexiDecimal.DATA_TYPE` so the nested canonical-value sub-struct is also + properly typed. +- Remove the two `config.yaml` exclusions that were filed under #2588 and + verify the expressions pass. +- Also remove or downgrade the related "Indefinite calendar duration union + deduplication" `wontfix` exclusion if the fix resolves it. + +## Capabilities + +### New Capabilities + +_None._ + +### Modified Capabilities + +_None — this is a bug fix in the encoding layer; no spec-level behavior changes._ + +## Impact + +- `QuantityEncoding.encodeLiteral` (fhirpath module) — one-line `.cast(dataType())` addition. +- `FlexiDecimalSupport.toLiteral` (encoders module, Scala) — change `lit(null)` to `lit(null).cast(DATA_TYPE)`. +- `config.yaml` test exclusions — remove #2588 entries and evaluate the related + indefinite calendar duration exclusion. +- Risk: low — the cast merely concretises types that were already correct + structurally; no runtime values change. diff --git a/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/specs/quantity-encoding/spec.md b/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/specs/quantity-encoding/spec.md new file mode 100644 index 0000000000..2cf37f7337 --- /dev/null +++ b/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/specs/quantity-encoding/spec.md @@ -0,0 +1,45 @@ +## ADDED Requirements + +### Requirement: Quantity literal structs have concrete field types + +Quantity literal columns produced by `QuantityEncoding.encodeLiteral` SHALL have +every struct field typed to its declared schema type (as defined by +`QuantityEncoding.dataType()`). No field SHALL have Spark `NullType` (`VOID`) +even when its runtime value is null. + +#### Scenario: Indefinite calendar duration literal has concrete types + +- **WHEN** encoding the FHIRPath literal `1 year` (indefinite calendar duration + with no canonical form) +- **THEN** the resulting struct column's `_value_canonicalized` field has type + `STRUCT` (not `VOID`), and `id`, + `comparator`, `_fid` fields have their declared types (not `VOID`) + +#### Scenario: UCUM quantity literal has concrete types + +- **WHEN** encoding the FHIRPath literal `3 'min'` (canonicalisable UCUM + quantity) +- **THEN** all struct fields have their declared types (not `VOID`) + +#### Scenario: repeat($this) over combined indefinite Quantity literals succeeds + +- **WHEN** evaluating `(1 year).combine(12 months).repeat($this)` +- **THEN** the expression returns a collection containing two Quantity values + (1 year and 12 months) without error + +#### Scenario: repeat($this) over combined UCUM Quantity literals succeeds + +- **WHEN** evaluating `(3 'min').combine(180 seconds).repeat($this)` +- **THEN** the expression returns a collection containing one Quantity value + (the two are equal after canonicalisation, so deduplication collapses them) + +### Requirement: FlexiDecimal null literals have concrete type + +`FlexiDecimalSupport.toLiteral(null)` SHALL return a column of type +`FlexiDecimal.DATA_TYPE` (not `VOID`). + +#### Scenario: toLiteral with null produces typed null + +- **WHEN** calling `FlexiDecimalSupport.toLiteral(null)` +- **THEN** the returned column's data type is + `STRUCT`, not `NullType` diff --git a/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/tasks.md b/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/tasks.md new file mode 100644 index 0000000000..b4281806cb --- /dev/null +++ b/openspec/changes/archive/2026-04-13-fix-quantity-literal-void-types/tasks.md @@ -0,0 +1,19 @@ +## 1. Fix literal encoding + +- [x] 1.1 In `FlexiDecimalSupport.toLiteral` (encoders module, Scala), change the null branch from `lit(null)` to `lit(null).cast(DATA_TYPE)` +- [x] 1.2 In `QuantityEncoding.encodeLiteral` (fhirpath module), append `.cast(dataType())` to the `toStruct(...)` return value + +## 2. Update test exclusions + +- [x] 2.1 Remove the #2588 exclusion block from `fhirpath/src/test/resources/fhirpath-js/config.yaml` (lines 179-193) +- [x] 2.2 Evaluate the related "Indefinite calendar duration union deduplication" wontfix exclusion — remove or re-link if the fix resolves it (kept: different issue — equality semantics, not VOID types) + +## 3. Add regression tests + +- [x] 3.1 Add DSL test cases in `RepeatFunctionDslTest` for `repeat($this)` over combined Quantity literals (both indefinite calendar and UCUM cases) + +## 4. Validate + +- [x] 4.1 Run `RepeatFunctionDslTest` — all tests pass +- [x] 4.2 Run `YamlReferenceImplTest` — previously excluded expressions now pass +- [x] 4.3 Run full fhirpath module test suite — no regressions diff --git a/openspec/specs/fhirpath-combine/spec.md b/openspec/specs/fhirpath-combine/spec.md new file mode 100644 index 0000000000..d09eeb4b84 --- /dev/null +++ b/openspec/specs/fhirpath-combine/spec.md @@ -0,0 +1,89 @@ +## ADDED Requirements + +### Requirement: FHIRPath combine function + +The FHIRPath engine SHALL implement the `combine(other : collection) : collection` +function as defined in the FHIRPath specification §6.5 Combining. The function +SHALL merge the input collection with the `other` collection into a single +collection **without** eliminating duplicate values. The result SHALL have no +guaranteed order. + +Combining an empty collection with a non-empty collection SHALL return the +non-empty collection. Combining two empty collections SHALL return an empty +collection. + +The function SHALL apply the same type reconciliation as the `|` operator +(for example promoting Integer to Decimal, normalizing Decimal precision). +Where the operand types cannot be reconciled to a common FHIRPath type, the +engine SHALL raise an invalid user input error consistent with the `|` +operator's behaviour for incompatible polymorphic types. + +The function SHALL NOT introduce an iteration context. The `other` argument +SHALL be evaluated against the same context that applies to the function's +input. + +#### Scenario: Combine preserves duplicates from both sides + +- **WHEN** the expression `(1 | 1).combine(1 | 2)` is evaluated +- **THEN** the result is a collection of four elements containing three + `1` values and one `2` value (order-independent) + +#### Scenario: Combine preserves duplicates within a single side + +- **WHEN** the expression `(1 | 1 | 2 | 3).combine({})` is evaluated +- **THEN** the result is a collection of four elements `{1, 1, 2, 3}` + (order-independent) + +#### Scenario: Combine with an empty input returns the other collection unchanged + +- **WHEN** the expression `{}.combine(1 | 1 | 2)` is evaluated +- **THEN** the result is a collection of three elements `{1, 1, 2}` + (order-independent) + +#### Scenario: Combine of two empty collections is empty + +- **WHEN** the expression `{}.combine({})` is evaluated +- **THEN** the result is the empty collection + +#### Scenario: Combine promotes Integer to Decimal + +- **WHEN** the expression `(1).combine(2.0)` is evaluated +- **THEN** the result is a collection of two Decimal elements + `{1.0, 2.0}` (order-independent) + +#### Scenario: Combine does not deduplicate equal Quantity values + +- **WHEN** the expression `(1 'mg').combine(1000 'ug')` is evaluated +- **THEN** the result is a collection containing both Quantity values, + regardless of their equality under Quantity comparison semantics + +#### Scenario: Combine does not deduplicate equal Coding values + +- **WHEN** two collections containing identical Coding values are combined +- **THEN** the result contains both Coding values + +#### Scenario: Combine of incompatible polymorphic types raises an error + +- **WHEN** the expression `(1).combine(true)` is evaluated +- **THEN** the engine raises an invalid user input error describing + incompatible operand types + +#### Scenario: Combine uses the surrounding iteration context inside select + +- **WHEN** the expression `Patient.name.select(use.combine(given))` is + evaluated against a Patient resource with multiple `name` elements +- **THEN** for each `name` element the result includes the `use` and + `given` values of that specific `name`, preserving duplicates across + the iteration + +#### Scenario: Combine with missing argument is rejected at parse time + +- **WHEN** the expression `(1 | 2).combine()` is parsed +- **THEN** the parser raises an invalid user input error indicating that + `combine` requires exactly one argument + +#### Scenario: Combine with too many arguments is rejected at parse time + +- **WHEN** the expression `(1 | 2).combine(3, 4)` is parsed +- **THEN** the parser raises an invalid user input error indicating that + `combine` requires exactly one argument diff --git a/openspec/specs/fhirpath-union/spec.md b/openspec/specs/fhirpath-union/spec.md new file mode 100644 index 0000000000..3773040d2b --- /dev/null +++ b/openspec/specs/fhirpath-union/spec.md @@ -0,0 +1,91 @@ +## ADDED Requirements + +### Requirement: FHIRPath union function + +The FHIRPath engine SHALL implement the `union(other : collection) : collection` +function as defined in the FHIRPath specification §6.5 Combining. The function +SHALL merge the input collection with the `other` collection into a single +collection, eliminating any duplicate values using FHIRPath equality semantics +(i.e. the `=` operator). The result SHALL have no guaranteed order. + +The function SHALL be observationally equivalent to the `|` operator: for any +collections `x` and `y` where `x | y` produces a result, `x.union(y)` SHALL +produce a result that compares equal under FHIRPath set-equality. + +The function SHALL NOT introduce an iteration context. The `other` argument +SHALL be evaluated against the same context that applies to the function's +input, so that inside constructs such as `name.select(use.union(given))` the +`given` argument is evaluated against the current `name` element, not against +the root. + +#### Scenario: Union eliminates duplicates across two non-empty collections + +- **WHEN** the expression `(1 | 1 | 2 | 3).union(2 | 3)` is evaluated +- **THEN** the result is the collection `{1, 2, 3}` (order-independent) + +#### Scenario: Union with an empty argument returns the deduplicated input + +- **WHEN** the expression `(1 | 1 | 2 | 3).union({})` is evaluated +- **THEN** the result is the collection `{1, 2, 3}` (order-independent) + +#### Scenario: Union with an empty input returns the deduplicated argument + +- **WHEN** the expression `{}.union(1 | 2 | 2)` is evaluated +- **THEN** the result is the collection `{1, 2}` (order-independent) + +#### Scenario: Union of two empty collections is empty + +- **WHEN** the expression `{}.union({})` is evaluated +- **THEN** the result is the empty collection + +#### Scenario: Union promotes Integer to Decimal + +- **WHEN** the expression `(1 | 2).union(2.0 | 3.0)` is evaluated +- **THEN** the result is the collection `{1.0, 2.0, 3.0}` (order-independent) + +#### Scenario: Union of incompatible polymorphic types raises an error + +- **WHEN** the expression `(1).union(true)` is evaluated +- **THEN** the engine raises an invalid user input error describing + incompatible operand types, matching the behaviour of the `|` operator + +#### Scenario: Union is equivalent to the pipe operator + +- **WHEN** the expressions `x.union(y)` and `x | y` are evaluated for the + same `x` and `y` across Boolean, Integer, Decimal, String, Date, DateTime, + Time, Quantity, and Coding inputs +- **THEN** the two results are equal under FHIRPath set-equality for every + input pair + +#### Scenario: Union uses the surrounding iteration context inside select + +- **WHEN** the expression `Patient.name.select(use.union(given))` is + evaluated against a Patient resource with multiple `name` elements +- **THEN** for each `name` element the result includes the `use` and + `given` values of that specific `name`, and the overall result equals + that of `Patient.name.select(use | given)` + +#### Scenario: Union of Quantity collections uses Quantity equality + +- **WHEN** the expression `(1 'mg').union(1000 'ug')` is evaluated +- **THEN** the two values are recognised as equal under Quantity equality + and the result contains a single value + +#### Scenario: Union of Coding collections uses Coding equality + +- **WHEN** the expression + `Coding { system: 'http://x', code: 'a' }.union(Coding { system: 'http://x', code: 'a' })` + is evaluated +- **THEN** the result contains a single Coding value + +#### Scenario: Union with missing argument is rejected at parse time + +- **WHEN** the expression `(1 | 2).union()` is parsed +- **THEN** the parser raises an invalid user input error indicating that + `union` requires exactly one argument + +#### Scenario: Union with too many arguments is rejected at parse time + +- **WHEN** the expression `(1 | 2).union(3, 4)` is parsed +- **THEN** the parser raises an invalid user input error indicating that + `union` requires exactly one argument diff --git a/openspec/specs/quantity-encoding/spec.md b/openspec/specs/quantity-encoding/spec.md new file mode 100644 index 0000000000..2cf37f7337 --- /dev/null +++ b/openspec/specs/quantity-encoding/spec.md @@ -0,0 +1,45 @@ +## ADDED Requirements + +### Requirement: Quantity literal structs have concrete field types + +Quantity literal columns produced by `QuantityEncoding.encodeLiteral` SHALL have +every struct field typed to its declared schema type (as defined by +`QuantityEncoding.dataType()`). No field SHALL have Spark `NullType` (`VOID`) +even when its runtime value is null. + +#### Scenario: Indefinite calendar duration literal has concrete types + +- **WHEN** encoding the FHIRPath literal `1 year` (indefinite calendar duration + with no canonical form) +- **THEN** the resulting struct column's `_value_canonicalized` field has type + `STRUCT` (not `VOID`), and `id`, + `comparator`, `_fid` fields have their declared types (not `VOID`) + +#### Scenario: UCUM quantity literal has concrete types + +- **WHEN** encoding the FHIRPath literal `3 'min'` (canonicalisable UCUM + quantity) +- **THEN** all struct fields have their declared types (not `VOID`) + +#### Scenario: repeat($this) over combined indefinite Quantity literals succeeds + +- **WHEN** evaluating `(1 year).combine(12 months).repeat($this)` +- **THEN** the expression returns a collection containing two Quantity values + (1 year and 12 months) without error + +#### Scenario: repeat($this) over combined UCUM Quantity literals succeeds + +- **WHEN** evaluating `(3 'min').combine(180 seconds).repeat($this)` +- **THEN** the expression returns a collection containing one Quantity value + (the two are equal after canonicalisation, so deduplication collapses them) + +### Requirement: FlexiDecimal null literals have concrete type + +`FlexiDecimalSupport.toLiteral(null)` SHALL return a column of type +`FlexiDecimal.DATA_TYPE` (not `VOID`). + +#### Scenario: toLiteral with null produces typed null + +- **WHEN** calling `FlexiDecimalSupport.toLiteral(null)` +- **THEN** the returned column's data type is + `STRUCT`, not `NullType` diff --git a/site/docs/fhirpath/index.md b/site/docs/fhirpath/index.md index 1c1a229943..087d3f6081 100644 --- a/site/docs/fhirpath/index.md +++ b/site/docs/fhirpath/index.md @@ -92,11 +92,11 @@ Unary `+` and `-` are also supported for numeric values. #### Collection operators -| Operator | Description | -| ---------- | ----------------------------------- | -| `\|` | Union of two collections | -| `in` | Test if element is in collection | -| `contains` | Test if collection contains element | +| Operator | Description | +| ---------- | --------------------------------------------------------------- | +| `\|` | Union of two collections (equivalent to the `union()` function) | +| `in` | Test if element is in collection | +| `contains` | Test if collection contains element | #### Type operators @@ -135,6 +135,20 @@ for detailed semantics. | --------- | ------------------------------------------- | | `first()` | Returns the first element of the collection | +#### Combining functions + +| Function | Description | +| ---------------- | ---------------------------------------------------------------------------------------- | +| `union(other)` | Merge two collections, eliminating duplicates via FHIRPath equality (equivalent to `\|`) | +| `combine(other)` | Merge two collections without eliminating duplicates | + +Both functions follow the type-reconciliation rules used by the `|` operator +(for example, Integer is promoted to Decimal when merging with a Decimal +collection), and neither introduces a new iteration context — arguments are +evaluated against the same focus that applies to the function's input, so +expressions like `name.select(use.union(given))` resolve `given` against the +current `name` element, matching `name.select(use | given)`. + #### Boolean functions | Function | Description |