diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/EvaluationPhaseFilterFunctions.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/EvaluationPhaseFilterFunctions.java index caaad76b058..dbeb3315661 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/EvaluationPhaseFilterFunctions.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/EvaluationPhaseFilterFunctions.java @@ -1649,38 +1649,53 @@ public static ValueTuple getHitTerm(Object valueTuple) { } /** - * Returns a string that is a substring of the given string. The substring starts at the index of the first '.', and extends to the index of the Nth - * occurrence of the character '.' from the left, where N is specified by {@code pos}. - * - *
-     * Given the string "FIRST.SECOND.THIRD.FOURTH"
-     * - A value of 0 for pos will result in the substring 'SECOND.THIRD'
-     * - A value of 1 for pos will result in the substring 'SECOND'
-     * - A value of 2 for pos will result in null being returned
-     * 
+ * Extracts a range of dot-delimited segments, starting after the first period. + *

+ * This method skips the initial prefix (everything before the first '.') and returns a substring containing all segments from index 0 up to + * {@code lastDotSegmentIndex}. + *

+ * Example: {@code "FIELD.FIRST.SECOND.THIRD.FOURTH"} + *

* * @param input - * the input string - * @param pos - * the Nth position of '.' to end the substring at - * @return the substring + * the dot-delimited string to process + * @param lastDotSegmentIndex + * the zero-based index of the last segment to include (relative to the first period). -1 is valid to mean all + * @return the joined segments from index 0 to {@code lastDotSegmentIndex}, or {@code null} if the index is out of bounds or no periods exist. */ - public static String getMatchToLeftOfPeriod(String input, int pos) { + public static String extractDotSegmentRangeFromLeft(String input, int lastDotSegmentIndex) { + if (lastDotSegmentIndex < -1) { + return null; + } + // Always peel off the fieldName before the first '.' input = input.substring(input.indexOf('.') + 1); int[] indices = getIndicesOfPeriods(input); - if (indices.length < pos + 1) { + if (lastDotSegmentIndex == -1) { + return input; + } + if (lastDotSegmentIndex < indices.length) { + return input.substring(0, indices[lastDotSegmentIndex]); + } else if (lastDotSegmentIndex == indices.length) { + return input; + } else { if (log.isTraceEnabled()) { - log.trace("Not enough grouping info to extract group " + pos + " from the left for input " + input); + log.trace("Not enough grouping info to extract group " + lastDotSegmentIndex + " from the left for input " + input); } return null; } - return input.substring(0, indices[indices.length - pos - 1]); } /** * Returns a string that is a substring of the given string. The substring starts at the index of the Nth occurrence of the character '.' from the left, - * where N is specified by {@code pos} and extends to the end of the string. + * where N is specified by {@code pos} and extends to the end of the string. A pos of '-1' will return the maximum substring possible. * *
      * Given the string "FIRST.SECOND.THIRD.FOURTH"
@@ -1688,6 +1703,7 @@ public static String getMatchToLeftOfPeriod(String input, int pos) {
      * - A value of 1 for pos will result in the substring 'THIRD.FOURTH'
      * - A value of 2 for pos will result in the substring 'SECOND.THIRD.FOURTH'
      * - A value of 3 for pos will result in null being returned
+     * - A value of -1 for pos will result in the substring 'SECOND.THIRD.FOURTH'
      * 
* * @param input @@ -1698,6 +1714,15 @@ public static String getMatchToLeftOfPeriod(String input, int pos) { */ public static String getMatchToRightOfPeriod(String input, int pos) { int[] indices = getIndicesOfPeriods(input); + + if (indices.length == 0) { + return null; + } + + if (pos == -1) { + return input.substring(indices[0] + 1); + } + if (indices.length < pos + 1) { if (log.isTraceEnabled()) { log.trace("Not enough grouping info to extract group " + pos + " from the right for input " + input); diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/GroupingRequiredFilterFunctions.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/GroupingRequiredFilterFunctions.java index a0fe85c1c9d..662590d4720 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/GroupingRequiredFilterFunctions.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/GroupingRequiredFilterFunctions.java @@ -280,7 +280,7 @@ public static Collection matchesInGroupLeft(Object... args) { firstMatches.forEach(currentMatch -> { String matchFieldName = ValueTuple.getFieldName(currentMatch); // my firstMatches will be a collection that looks like [NAME.grandparent_0.parent_0.child_0:SANTINO] - String theFirstMatch = EvaluationPhaseFilterFunctions.getMatchToLeftOfPeriod(matchFieldName, positionFromLeft); + String theFirstMatch = EvaluationPhaseFilterFunctions.extractDotSegmentRangeFromLeft(matchFieldName, positionFromLeft); for (int i = 2; i < args.length; i += 2) { @@ -300,7 +300,7 @@ public static Collection matchesInGroupLeft(Object... args) { for (Object fieldValue : (Iterable) args[i]) { String fieldName = ValueTuple.getFieldName(fieldValue); String nextRegex = args[i + 1].toString(); - String matchToLeftOfPeriod = EvaluationPhaseFilterFunctions.getMatchToLeftOfPeriod(fieldName, positionFromLeft); + String matchToLeftOfPeriod = EvaluationPhaseFilterFunctions.extractDotSegmentRangeFromLeft(fieldName, positionFromLeft); // @formatter:off manageMatchesInGroupLeftRemainingArgs(fieldValue, nextRegex, // regex @@ -324,7 +324,7 @@ public static Collection matchesInGroupLeft(Object... args) { manageMatchesInGroupLeftRemainingArgs(fieldValue, args[i + 1].toString(), // regex allMatches, theFirstMatch, - EvaluationPhaseFilterFunctions.getMatchToLeftOfPeriod(fieldName, positionFromLeft), // the next match + EvaluationPhaseFilterFunctions.extractDotSegmentRangeFromLeft(fieldName, positionFromLeft), // the next match currentMatch); // @formatter:on } diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MatchesInGroupLeft.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MatchesInGroupLeft.java index 269349ef485..9c273d7f1a6 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MatchesInGroupLeft.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MatchesInGroupLeft.java @@ -13,24 +13,25 @@ * Function to test whether key/value pairs match within the part of a tree (left side) formed by the field name structure that is * dot-delimited: NAME.FOO.BAR.BAZ * - * position args are as follows: + * position args are the indexed of the grouping name starting from the left to include when determining a group * for this field name: NAME.grandparent_0.parent_0.child_0 * - * '0' means take everything to the left of the last '.' (in other words 'NAME.grandparent_0.parent_0') - * '1' means take everything to the left of the next-to-last '.' (i.e. 'NAME.grandparent_0' + * '0' means take the leftmost group after the first '.' (in other words 'NAME.grandparent_0') + * '1' means take the up through the second group '.' (i.e. 'NAME.grandparent_0.parent_0' * * If there is no position arg supplied, '0' is assumed. * * "NAME.grandparent_0.parent_0.child_1,FREDO,fredo" == "fredo", - * "NAME.grandparent_0.parent_0.child_0,SANTINO,santino" == "santino"); + * "NAME.grandparent_0.parent_1.child_0,SANTINO,santino" == "santino"); * (implied 0 for the position arg) means that fredo and santino have the same - * field name left-side: 'NAME.grandparent_0.parent_0' (they have the same parents so they are siblings) + * field name left-side: 'NAME.grandparent_0' (they have the same grandparents so they are related) * * "NAME.grandparent_0.parent_0.child_1,FREDO,fredo" == "fredo", * "NAME.grandparent_0.parent_1.child_0,SANTINO,santino" == "santino", 1); - * with '1' for the position ard, function is true fredo and santino have the same - * field name left-side: 'NAME.grandparent_0' (they have the same grandparents so they are 1st cousins + * with '1' for the position ard, function is false fredo and santino do not have the same + * field name left-side: 'NAME.grandparent_0.parent_X' (they have the same grandparents so they are 1st cousins) * + * Supplying -1 as the position will mean to match all grouping fields must match, regardless of how many there are * */ public class MatchesInGroupLeft extends JexlQueryFunction { diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/functions/EvaluationPhaseFilterFunctionsTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/functions/EvaluationPhaseFilterFunctionsTest.java index c5f1790a697..d1fd545104b 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/functions/EvaluationPhaseFilterFunctionsTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/functions/EvaluationPhaseFilterFunctionsTest.java @@ -1037,7 +1037,7 @@ private FunctionalSet result() { } /** - * Tests for {@link EvaluationPhaseFilterFunctions#getMatchToLeftOfPeriod(String, int)}. + * Tests for {@link EvaluationPhaseFilterFunctions#extractDotSegmentRangeFromLeft(String, int)}. */ public static class GetMatchToLeftOfPeriodTests { @@ -1049,17 +1049,25 @@ public static class GetMatchToLeftOfPeriodTests { @Test public void testValidPositions() { givenPosition(0); - assertResult("second.third"); + assertResult("second"); givenPosition(1); - assertResult("second"); + assertResult("second.third"); + + givenPosition(2); + assertResult("second.third.fourth"); } // Verify that null is returned for an invalid position. @Test public void testInvalidPosition() { - givenPosition(2); + givenPosition(3); + assertResult(null); + + givenPosition(4); + assertResult(null); + givenPosition(-2); assertResult(null); } @@ -1068,7 +1076,7 @@ private void givenPosition(int position) { } private void assertResult(String expected) { - assertThat(EvaluationPhaseFilterFunctions.getMatchToLeftOfPeriod(input, position)).isEqualTo(expected); + assertThat(EvaluationPhaseFilterFunctions.extractDotSegmentRangeFromLeft(input, position)).isEqualTo(expected); } } diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/functions/GroupingRequiredFilterFunctionsIT.java b/warehouse/query-core/src/test/java/datawave/query/jexl/functions/GroupingRequiredFilterFunctionsIT.java index 5fd5e573992..c0decc5e704 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/functions/GroupingRequiredFilterFunctionsIT.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/functions/GroupingRequiredFilterFunctionsIT.java @@ -93,6 +93,15 @@ public void withData(String field, String value, String uid) { // 10 - if there aren't enough indexes in the group it can't be true even if otherwise is a match "grouping:matchesInGroup(FIELD, 'a', FIELD_A, 'b', 10); FIELD.1.2.3=a,FIELD_A.1.2.3=b; false", + // matchesInGroup supports -1 for position to mean full group + "grouping:matchesInGroup(FIELD, 'a', FIELD_A, 'b', -1); FIELD.1.2.3=a,FIELD_A.1.2.3=b; true", + "grouping:matchesInGroup(FIELD, 'a', FIELD_A, 'b', -1); FIELD.1.2.3=a,FIELD_A.1.2.4=b; false", + "grouping:matchesInGroup(FIELD, 'a', FIELD_A, 'b', -1); FIELD.1.2.3=a,FIELD_A.1.2=b; false", + "grouping:matchesInGroup(FIELD, 'a', FIELD_A, 'b', -1); FIELD.1.2=a,FIELD_A.1.2.3=b; false", + "grouping:matchesInGroup(FIELD, 'a', FIELD_A, 'b', -1); FIELD=a,FIELD_A.1.2.3=b; false", + "grouping:matchesInGroup(FIELD, 'a', FIELD_A, 'b', -1); FIELD.1.2.3=a,FIELD_A=b; false", + "grouping:matchesInGroup(FIELD, 'a', FIELD_A, 'b', -1); FIELD.1.2.3=a,FIELD_A.1.2.3.0=b; false", + // supports regexes on either argument "grouping:matchesInGroup(FIELD, 'a*', FIELD_A, 'b*'); FIELD.1.2.3=aaaaaaaa,FIELD_A.1.2.3=bbbbbbbbbb; true", // regex can be full wildcards @@ -102,16 +111,15 @@ public void withData(String field, String value, String uid) { // can be complex patterns with lookahead: one digit, one upper case, 8+ characters "grouping:matchesInGroup(FIELD, '^(?=.*\\\\d)(?=.*[A-Z]).{8,}$', FIELD_A, 'b*'); FIELD.1.2.3=bb7dfZuq,FIELD_A.1.2.3=bbbbbbbbbb; true", - // matchesInGroupLeft should work the same as matchesInGroup but with an index offset from the left but - // currently does not. The current behavior is show below in tests. When unexpected, the correct response - // is indicated. Problem appears to be in EvaluationPhaseFilterFunctions.getMatchToLeftOfPeriod() + // matchesInGroupLeft should work the same as matchesInGroup but with an index offset from the left + // default 0 - .1 matches .1 "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b'); FIELD.1.2.3=a,FIELD_A.1.2.3=b; true", - // CURRENT BEHAVIOR: 0-index is acting like 1-index, this should still be true - "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b'); FIELD.1.0.3=a,FIELD_A.1.9.3=b; false", - // CURRENT BEHAVIOR: 1-index is acting like 0-index, this should still be false - "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b', 1); FIELD.000.999.0=a,FIELD_A.000.888.9=b; true", - // CURRENT BEHAVIOR: 2-index is acting like 3-index, this should be true - "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b', 2); FIELD.000.999.1=a,FIELD_A.000.999.1=b; false", + // default 0 - .1 matches .1 + "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b'); FIELD.1.0.3=a,FIELD_A.1.9.3=b; true", + // 1 - 000.999 does not match 000.888 + "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b', 1); FIELD.000.999.0=a,FIELD_A.000.888.9=b; false", + // 2 - 000.999.1 matches 000.999.1 + "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b', 2); FIELD.000.999.1=a,FIELD_A.000.999.1=b; true", // 0 - 1 does not match 0 "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b'); FIELD.1.2.3=a,FIELD_A.0.2.3=b; false", @@ -135,7 +143,16 @@ public void withData(String field, String value, String uid) { // matchesInGroupLeft also supports regex "grouping:matchesInGroupLeft(FIELD, 'a*', FIELD_A, 'b*'); FIELD.1.2.3=aaaaaaaa,FIELD_A.1.2.3=bbbbbbbbbb; true", "grouping:matchesInGroupLeft(FIELD, 'a{8}', FIELD_A, 'b*'); FIELD.1.2.3=aaaaaaaa,FIELD_A.1.2.3=bbbbbbbbbb; true", - "grouping:matchesInGroupLeft(FIELD, '^(?=.*\\\\d)(?=.*[A-Z]).{8,}$', FIELD_A, 'b*'); FIELD.1.2.3=bb7dfZuq,FIELD_A.1.2.3=bbbbbbbbbb; true",}) + "grouping:matchesInGroupLeft(FIELD, '^(?=.*\\\\d)(?=.*[A-Z]).{8,}$', FIELD_A, 'b*'); FIELD.1.2.3=bb7dfZuq,FIELD_A.1.2.3=bbbbbbbbbb; true", + + // matchesInGroupLeft supports -1 for position to mean full group + "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b', -1); FIELD.1.2.3=a,FIELD_A.1.2.3=b; true", + "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b', -1); FIELD.1.2.3=a,FIELD_A.1.2.4=b; false", + "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b', -1); FIELD.1.2.3=a,FIELD_A.1.2=b; false", + "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b', -1); FIELD.1.2=a,FIELD_A.1.2.3=b; false", + "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b', -1); FIELD=a,FIELD_A.1.2.3=b; false", + "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b', -1); FIELD.1.2.3=a,FIELD_A=b; false", + "grouping:matchesInGroupLeft(FIELD, 'a', FIELD_A, 'b', -1); FIELD.1.2.3=a,FIELD_A.1.2.3.0=b; false",}) @ParameterizedTest(name = "{0} against {1} should be {2}") public void functionalTests(String query, String data, boolean result) { withQuery(query); diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/functions/GroupingRequiredFilterFunctionsTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/functions/GroupingRequiredFilterFunctionsTest.java index f3536b63453..c33a8c2f423 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/functions/GroupingRequiredFilterFunctionsTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/functions/GroupingRequiredFilterFunctionsTest.java @@ -408,26 +408,37 @@ public void tearDown() throws Exception { public void testDefaultIndex() { givenArg(lcNoDiacriticsTuple("NAME.grandparent_0.parent_0.child_1", "FREDO")); givenArg(normalizeLcNoDiacriticsFilter("fredo")); - givenArg(lcNoDiacriticsTuple("NAME.grandparent_0.parent_0.child_0", "SANTINO")); + givenArg(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_0", "SANTINO")); givenArg(normalizeLcNoDiacriticsFilter("santino")); - // Grouping context should have matched against parent_0. + // Grouping context should have matched against grandparent_0 expect(lcNoDiacriticsTuple("NAME.grandparent_0.parent_0.child_1", "FREDO")); - expect(lcNoDiacriticsTuple("NAME.grandparent_0.parent_0.child_0", "SANTINO")); + expect(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_0", "SANTINO")); assertResult(); } @Test - public void testIndexOfOne() { + public void testIndexOfOneMiss() { givenArg(lcNoDiacriticsTuple("NAME.grandparent_0.parent_0.child_1", "FREDO")); givenArg(normalizeLcNoDiacriticsFilter("fredo")); givenArg(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_0", "SANTINO")); givenArg(normalizeLcNoDiacriticsFilter("santino")); givenArg(1); - // Grouping context should have matched against grandparent_0. - expect(lcNoDiacriticsTuple("NAME.grandparent_0.parent_0.child_1", "FREDO")); + assertResult(); + } + + @Test + public void testIndexOfOneHit() { + givenArg(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_1", "FREDO")); + givenArg(normalizeLcNoDiacriticsFilter("fredo")); + givenArg(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_0", "SANTINO")); + givenArg(normalizeLcNoDiacriticsFilter("santino")); + givenArg(1); + + // Grouping context should have matched against grandparent_0.parent_1 + expect(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_1", "FREDO")); expect(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_0", "SANTINO")); assertResult(); @@ -437,16 +448,16 @@ public void testIndexOfOne() { public void testPartialMatch() { // @formatter:off givenArg(Lists.newArrayList( - lcNoDiacriticsTuple("NAME.grandparent_0.parent_0.child_1","FREDO"), + lcNoDiacriticsTuple("NAME.grandparent_1.parent_0.child_1","FREDO"), lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_1","FREDO"))); givenArg(normalizeLcNoDiacriticsFilter("fredo")); - givenArg(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_0","SANTINO")); + givenArg(lcNoDiacriticsTuple("NAME.grandparent_1.parent_1.child_0","SANTINO")); givenArg(normalizeLcNoDiacriticsFilter("SANTINO")); // @formatter:on - // Grouping context should have matched against parent_x. Only parent_1 found in commonality. - expect(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_1", "FREDO")); - expect(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_0", "SANTINO")); + // Grouping context should have matched against grandparent_x. Only grandparent_1 found in commonality. + expect(lcNoDiacriticsTuple("NAME.grandparent_1.parent_0.child_1", "FREDO")); + expect(lcNoDiacriticsTuple("NAME.grandparent_1.parent_1.child_0", "SANTINO")); assertResult(); } @@ -455,17 +466,32 @@ public void testPartialMatch() { public void testPartialMatchWithIndexOfOne() { // @formatter:off givenArg(Lists.newArrayList( - lcNoDiacriticsTuple("NAME.grandparent_0.parent_0.child_1","FREDO"), - lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_1","FREDO"))); + lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_1","FREDO"), + lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_2","FREDO"))); givenArg(normalizeLcNoDiacriticsFilter("fredo")); givenArg(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_0","SANTINO")); givenArg(normalizeLcNoDiacriticsFilter("SANTINO")); givenArg(1); // @formatter:on - // Grouping context should have matched against grandparent_0. - expect(lcNoDiacriticsTuple("NAME.grandparent_0.parent_0.child_1", "FREDO")); + // Grouping context should have matched against grandparent_0.parent_1 expect(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_1", "FREDO")); + expect(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_2", "FREDO")); + expect(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_0", "SANTINO")); + + assertResult(); + } + + @Test + public void testIndexOfTwoHit() { + givenArg(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_0", "FREDO")); + givenArg(normalizeLcNoDiacriticsFilter("fredo")); + givenArg(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_0", "SANTINO")); + givenArg(normalizeLcNoDiacriticsFilter("santino")); + givenArg(2); + + // Grouping context should have matched against grandparent_0.parent_1.child_0 + expect(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_0", "FREDO")); expect(lcNoDiacriticsTuple("NAME.grandparent_0.parent_1.child_0", "SANTINO")); assertResult();