From 8111c5193ac58113c6c676638b8a33cccf6baed9 Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Thu, 30 Apr 2026 17:10:41 +0000 Subject: [PATCH 01/16] Added values only to discovery count. --- ...ThingValuesOnlyConditionalTransformer.java | 27 +++++++ .../query/discovery/DiscoveryIterator.java | 75 +++++++++++++++++-- .../query/discovery/DiscoveryLogic.java | 17 +++++ .../DiscoveryQueryConfiguration.java | 11 ++- .../query/discovery/DiscoveredThingTest.java | 44 +++++++++++ .../query/discovery/DiscoveryLogicTest.java | 25 ++++++- 6 files changed, 191 insertions(+), 8 deletions(-) create mode 100644 warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingValuesOnlyConditionalTransformer.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveredThingTest.java diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingValuesOnlyConditionalTransformer.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingValuesOnlyConditionalTransformer.java new file mode 100644 index 00000000000..d092080ee0c --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingValuesOnlyConditionalTransformer.java @@ -0,0 +1,27 @@ +package datawave.query.discovery; + +import java.util.function.UnaryOperator; + +import org.apache.hadoop.io.MapWritable; + +public class DiscoveredThingValuesOnlyConditionalTransformer implements UnaryOperator { + + boolean valuesOnly = false; + + DiscoveredThingValuesOnlyConditionalTransformer(boolean valuesOnly) { + this.valuesOnly = valuesOnly; + } + + public DiscoveredThing apply(DiscoveredThing dt) { + // @formatter:off + return (valuesOnly) ? new DiscoveredThing(dt.getTerm(), + "", + "", + "", + dt.getColumnVisibility(), + 0L, + new MapWritable()) + : dt; + // @formatter:on + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java index 404d9c29dda..080bbb151ef 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java @@ -19,6 +19,8 @@ import org.apache.accumulo.core.iterators.IteratorEnvironment; import org.apache.accumulo.core.iterators.SortedKeyValueIterator; import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.MapWritable; @@ -27,6 +29,7 @@ import org.apache.log4j.Logger; import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.LinkedHashMultimap; import com.google.common.collect.Multimap; import com.google.protobuf.InvalidProtocolBufferException; @@ -46,6 +49,7 @@ public class DiscoveryIterator implements SortedKeyValueIterator { private boolean showReferenceCount = false; private boolean reverseIndex = false; private boolean sumCounts = false; + private boolean valuesOnly = false; @Override public DiscoveryIterator deepCopy(IteratorEnvironment env) { @@ -61,7 +65,7 @@ public void next() throws IOException { while (iterator.hasTop() && key == null) { // Get the entries to aggregate. - Multimap terms = getTermsByDatatype(); + Multimap terms = this.valuesOnly ? getTermsOnly() : getTermsByDatatype(); if (terms.isEmpty()) { log.trace("Couldn't aggregate index info; moving onto next date/field/term if data is available."); } else { @@ -84,8 +88,8 @@ private Multimap getTermsByDatatype() throws IOException { Multimap terms = ArrayListMultimap.create(); Key start = new Key(iterator.getTopKey()); Key key; - // If we should sum up counts, we want to collect the term entries for each date seen for the current field and term of start. Otherwise, we only want - // to collect the term entries for the current field, term, and date of start. + // If we should sum up counts, we want to collect the term entries for each date seen for the current field and term of start. + // Otherwise, we only want to collect the term entries for the current field, term, and date of start. BiFunction dateMatchingFunction = sumCounts ? (first, second) -> true : this::datesMatch; // Find all matching entries and parse term entries from them. while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), PartialKey.ROW_COLFAM) && dateMatchingFunction.apply(start, key)) { @@ -102,6 +106,46 @@ private Multimap getTermsByDatatype() throws IOException { return terms; } + private Multimap getTermsOnly() throws IOException { + LinkedHashMultimap terms = LinkedHashMultimap.create(); + Key start = new Key(iterator.getTopKey()); + Key key; + // If we should sum up counts, we want to collect the term entries for each date seen for the current field and term of start. + // Otherwise, we only want to collect the term entries for the current field, term, and date of start. + BiFunction dateMatchingFunction = sumCounts ? (first, second) -> true : this::datesMatch; + // Find all matching entries and parse term entries from them. + + while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), PartialKey.ROW_COLFAM) && dateMatchingFunction.apply(start, key)) { + // TermEntry termEntry = new TermEntry(key, iterator.getTopValue()); + TermEntry termEntry = new TermEntry(key, iterator.getTopValue()) { + // Only use term and visibility for equality. + @Override + public boolean equals(Object o) { + if (o instanceof TermEntry) { + TermEntry other = (TermEntry) o; + return new EqualsBuilder().append(getTerm(), other.getTerm()).append(getVisibility(), other.getVisibility()).isEquals(); + } + return false; + } + + @Override + public int hashCode() { + return new HashCodeBuilder().append(getTerm()).append(getVisibility()).toHashCode(); + } + }; + + if (termEntry.isValid()) + terms.put(termEntry.getDatatype(), termEntry); + else { + if (log.isTraceEnabled()) { + log.trace("Received invalid term entry from key: " + key); + } + } + iterator.next(); + } + return terms; + } + /** * Return true if the dates for the two keys match, or false otherwise. */ @@ -180,8 +224,8 @@ private DiscoveredThing aggregate(Collection termEntries) { * Set the top {@link Key} and {@link Value} of this iterator, created from the given list of {@link DiscoveredThing} instances. */ private void setTop(List things) { - // We want the key to be the last possible key for this date. Return the key as it is in the index (reversed if necessary) to ensure the keys are - // consistent with the initial seek range. + // We want the key to be the last possible key for this date. Return the key as it is in the index (reversed if + // necessary) to ensure the keys are consistent with the initial seek range. DiscoveredThing thing = things.get(0); String row = (this.reverseIndex ? new StringBuilder().append(thing.getTerm()).reverse().toString() : thing.getTerm()); Key newKey = new Key(row, thing.getField(), thing.getDate() + "\uffff"); @@ -210,6 +254,7 @@ public void init(SortedKeyValueIterator source, Map op this.showReferenceCount = Boolean.parseBoolean(options.get(DiscoveryLogic.SHOW_REFERENCE_COUNT)); this.reverseIndex = Boolean.parseBoolean(options.get(DiscoveryLogic.REVERSE_INDEX)); this.sumCounts = Boolean.parseBoolean(options.get(DiscoveryLogic.SUM_COUNTS)); + this.valuesOnly = Boolean.parseBoolean(options.get(DiscoveryLogic.VALUES_ONLY)); if (log.isTraceEnabled()) { log.trace("Source: " + source.getClass().getName()); @@ -217,6 +262,7 @@ public void init(SortedKeyValueIterator source, Map op log.trace("Show reference counts only: " + this.showReferenceCount); log.trace("Reverse index: " + this.reverseIndex); log.trace("Sum counts: " + this.sumCounts); + log.trace("Values only: " + this.valuesOnly); } } @@ -241,7 +287,7 @@ public Value getTopValue() { private static class TermEntry { private final String term; - private final String field; + private String field; private String date; private String datatype; private ColumnVisibility visibility; @@ -325,5 +371,22 @@ public long getUidListSize() { public boolean isValid() { return valid; } + + @Override + public boolean equals(Object o) { + if (o instanceof TermEntry) { + TermEntry other = (TermEntry) o; + return new EqualsBuilder().append(getTerm(), other.getTerm()).append(getField(), other.getField()) + .append(getVisibility(), other.getVisibility()).append(getDate(), other.getDate()).append(getDatatype(), other.getDatatype()) + .append(getUidCount(), other.getUidCount()).append(getUidListSize(), other.getUidListSize()).isEquals(); + } + return false; + } + + @Override + public int hashCode() { + return new HashCodeBuilder().append(getTerm()).append(getField()).append(getVisibility()).append(getDate()).append(getDatatype()) + .append(getUidCount()).append(getUidListSize()).toHashCode(); + } } } diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryLogic.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryLogic.java index ed676bef61e..dcdcba932b7 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryLogic.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryLogic.java @@ -88,6 +88,11 @@ public class DiscoveryLogic extends ShardIndexQueryTable { */ public static final String SUM_COUNTS = "sum.counts"; + /** + * Used to specify a unique list of values not associated with a field. + */ + public static final String VALUES_ONLY = "values.only"; + /** * Used to specify whether to search against the reversed index. */ @@ -151,6 +156,9 @@ public GenericQueryConfiguration initialize(AccumuloClient client, Query setting // Check if counts should be summed. setSumCounts(getOrDefaultBoolean(settings, SUM_COUNTS, getSumCounts())); + // Specify values only. Treat associated field, data type, and the like as "don't care." + setValuesOnly(getOrDefaultBoolean(settings, VALUES_ONLY, false)); + // Check if any datatype filters were specified. getConfig().setDatatypeFilter(getOrDefaultSet(settings, QueryParameters.DATATYPE_FILTER_SET, getConfig().getDatatypeFilter())); @@ -580,6 +588,7 @@ private IteratorSetting configureDiscoveryIterator(DiscoveryQueryConfiguration c setting.addOption(SEPARATE_COUNTS_BY_COLVIS, Boolean.toString(config.getSeparateCountsByColVis())); setting.addOption(SHOW_REFERENCE_COUNT, Boolean.toString(config.getShowReferenceCount())); setting.addOption(SUM_COUNTS, Boolean.toString(config.getSumCounts())); + setting.addOption(VALUES_ONLY, Boolean.toString(config.getValuesOnly())); return setting; } @@ -689,4 +698,12 @@ public boolean getSumCounts() { public void setSumCounts(boolean sumCounts) { getConfig().setSumCounts(sumCounts); } + + public void setValuesOnly(boolean valuesOnly) { + getConfig().setValuesOnly(valuesOnly); + } + + public boolean getValuesOnly() { + return getConfig().getValuesOnly(); + } } diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryQueryConfiguration.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryQueryConfiguration.java index 248ae450eb5..49f50b3d22f 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryQueryConfiguration.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryQueryConfiguration.java @@ -22,6 +22,7 @@ public class DiscoveryQueryConfiguration extends ShardIndexQueryConfiguration im private boolean separateCountsByColVis = false; private boolean showReferenceCount = false; private boolean sumCounts = false; + private boolean valuesOnly = false; public DiscoveryQueryConfiguration() {} @@ -131,6 +132,10 @@ public boolean getSumCounts() { return sumCounts; } + public boolean getValuesOnly() { + return valuesOnly; + } + public void setSeparateCountsByColVis(boolean separateCountsByColVis) { this.separateCountsByColVis = separateCountsByColVis; } @@ -144,6 +149,10 @@ public void setSumCounts(boolean sumCounts) { this.sumCounts = sumCounts; } + public void setValuesOnly(boolean valuesOnly) { + this.valuesOnly = valuesOnly; + } + @Override public DiscoveryQueryConfiguration checkpoint() { // Create a new config that only contains what is needed to execute the specified ranges @@ -172,6 +181,6 @@ public int hashCode() { public String toString() { return new StringJoiner(", ", DiscoveryQueryConfiguration.class.getSimpleName() + "[", "]").add("literals=" + literals).add("patterns=" + patterns) .add("ranges=" + ranges).add("separateCountsByColVis=" + separateCountsByColVis).add("showReferenceCount=" + showReferenceCount) - .add("sumCounts=" + sumCounts).toString(); + .add("sumCounts=" + sumCounts).add("valuesOnly=" + valuesOnly).toString(); } } diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveredThingTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveredThingTest.java new file mode 100644 index 00000000000..38a3cd4c445 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveredThingTest.java @@ -0,0 +1,44 @@ +package datawave.query.discovery; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + +import org.apache.hadoop.io.MapWritable; +import org.junit.jupiter.api.Test; + +public class DiscoveredThingTest { + @Test + public void testDiscoveredThingSimpleEqualityTest() { + DiscoveredThing thing1 = new DiscoveredThing("bbc", "NETWORK", "csv", "20130101", "FOO", 240L, new MapWritable()); + DiscoveredThing thing2 = new DiscoveredThing("bbc", "NETWORK", "csv", "20130101", "FOO", 240L, new MapWritable()); + assertEquals(thing1, thing2); + + DiscoveredThing thing3 = new DiscoveredThing("", "", "", "", "", 0L, new MapWritable()); + DiscoveredThing thing4 = new DiscoveredThing("", "", "", "", "", 0L, new MapWritable()); + assertEquals(thing3, thing4); + + DiscoveredThing thing5 = new DiscoveredThing("", "", "", "", "", 0L, null); + DiscoveredThing thing6 = new DiscoveredThing("", "", "", "", "", 0L, null); + assertEquals(thing5, thing6); + + } + + @Test + public void testDiscoveredThingSimpleInequalityTest() { + DiscoveredThing thing1 = new DiscoveredThing("bbc", "NETWORK", "csv", "20130101", "FOO", 240L, new MapWritable()); + DiscoveredThing thing2 = new DiscoveredThing("bbc", "NETWORK", "csv", "20130102", "FOO", 240L, new MapWritable()); + assertNotEquals(thing1, thing2); + + DiscoveredThing thing3 = new DiscoveredThing("", "wanda", "", "", "", 0L, new MapWritable()); + DiscoveredThing thing4 = new DiscoveredThing("", "panda", "", "", "", 0L, new MapWritable()); + assertNotEquals(thing3, thing4); + + DiscoveredThing thing5 = new DiscoveredThing("", "wands", "", "", "", 0L, null); + DiscoveredThing thing6 = new DiscoveredThing("", "wands", "", "", "", 0L, new MapWritable()); + assertNotEquals(thing5, thing6); + + DiscoveredThing thing7 = new DiscoveredThing("", "wanda", "", "", "", 0L, null); + DiscoveredThing thing8 = new DiscoveredThing("", "panda", "", "", "", 0L, null); + assertNotEquals(thing7, thing8); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java index f0512fc2ac6..4bff5cf4a24 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java @@ -241,8 +241,10 @@ private void assertQueryResults() throws Exception { logic.setupQuery(config); Iterator iterator = logic.iterator(); List actual = new ArrayList<>(); + DiscoveredThingValuesOnlyConditionalTransformer dtvoct = new DiscoveredThingValuesOnlyConditionalTransformer(logic.getValuesOnly()); while (iterator.hasNext()) { - actual.add(iterator.next()); + actual.add(dtvoct.apply(iterator.next())); + // actual.add(iterator.next()); } Assertions.assertThat(actual).hasSize(expected.size()); @@ -520,4 +522,25 @@ public void testSumCountsForReverse() throws Exception { assertQueryResults(); } + + @Test + public void testValuesOnlyForLiterals() throws Exception { + givenQuery("bbc OR onyx"); + givenStartDate("20130101"); + givenEndDate("20130102"); + givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); + givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); + + // expect(new DiscoveredThing("bbc", "NETWORK", "csv", "", "FOO", 480L, new MapWritable())); + // expect(new DiscoveredThing("onyx", "POKEMON", "csv", "", "FOO", 110L, new MapWritable())); + // expect(new DiscoveredThing("onyx", "ROCK", "csv", "", "FOO", 4L, new MapWritable())); + // expect(new DiscoveredThing("onyx", "ROOSTER", "csv", "", "BAR", 240L, new MapWritable())); + + expect(new DiscoveredThing("bbc", "", "", "", "FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "BAR", 0L, new MapWritable())); + + assertQueryResults(); + } } From 076cbf53347a8b6112d4ee283a965a1312b3e23c Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Fri, 1 May 2026 13:12:19 +0000 Subject: [PATCH 02/16] Removed some commented-out code. --- .../java/datawave/query/discovery/DiscoveryIterator.java | 1 - .../java/datawave/query/discovery/DiscoveryLogicTest.java | 6 ------ 2 files changed, 7 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java index 080bbb151ef..fa5bd535187 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java @@ -116,7 +116,6 @@ private Multimap getTermsOnly() throws IOException { // Find all matching entries and parse term entries from them. while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), PartialKey.ROW_COLFAM) && dateMatchingFunction.apply(start, key)) { - // TermEntry termEntry = new TermEntry(key, iterator.getTopValue()); TermEntry termEntry = new TermEntry(key, iterator.getTopValue()) { // Only use term and visibility for equality. @Override diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java index 4bff5cf4a24..1a859e32cd5 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java @@ -244,7 +244,6 @@ private void assertQueryResults() throws Exception { DiscoveredThingValuesOnlyConditionalTransformer dtvoct = new DiscoveredThingValuesOnlyConditionalTransformer(logic.getValuesOnly()); while (iterator.hasNext()) { actual.add(dtvoct.apply(iterator.next())); - // actual.add(iterator.next()); } Assertions.assertThat(actual).hasSize(expected.size()); @@ -531,11 +530,6 @@ public void testValuesOnlyForLiterals() throws Exception { givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); - // expect(new DiscoveredThing("bbc", "NETWORK", "csv", "", "FOO", 480L, new MapWritable())); - // expect(new DiscoveredThing("onyx", "POKEMON", "csv", "", "FOO", 110L, new MapWritable())); - // expect(new DiscoveredThing("onyx", "ROCK", "csv", "", "FOO", 4L, new MapWritable())); - // expect(new DiscoveredThing("onyx", "ROOSTER", "csv", "", "BAR", 240L, new MapWritable())); - expect(new DiscoveredThing("bbc", "", "", "", "FOO", 0L, new MapWritable())); expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); From 634dfa47a9599d564ce1dea94025e5b2802390ed Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Fri, 1 May 2026 19:58:53 +0000 Subject: [PATCH 03/16] Make the Discover Transformer FIELD to be optional. --- .../datawave/query/discovery/DiscoveryTransformer.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java index bbabc7c9537..59e43bab3f6 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java @@ -4,6 +4,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import org.apache.accumulo.core.security.ColumnVisibility; import org.apache.hadoop.io.Writable; @@ -58,9 +59,12 @@ public EventBase transform(DiscoveredThing thing) { fields.add(this.makeField("VALUE", markings, "", 0L, thing.getTerm())); /** - * Added query model to alias FIELD + * Added query model to alias FIELD, if DiscoveredThing::field both not NULL and not empty. */ - fields.add(this.makeField("FIELD", markings, "", 0L, myQueryModel.aliasFieldNameReverseModel(thing.getField()))); + Optional fieldOFThing = Optional.ofNullable(thing.getField()); + fieldOFThing.filter(i -> !i.isEmpty()) + .ifPresent(i -> fields.add(this.makeField("FIELD", markings, "", 0L, myQueryModel.aliasFieldNameReverseModel(i)))); + fields.add(this.makeField("DATE", markings, "", 0L, thing.getDate())); fields.add(this.makeField("DATA TYPE", markings, "", 0L, thing.getType())); From 9d7ffd273072c6a377d13cc2a67b8202001fc331 Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Mon, 4 May 2026 14:11:20 +0000 Subject: [PATCH 04/16] DiscoveryTransformer: use blank rather then empty. --- .../java/datawave/query/discovery/DiscoveryTransformer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java index 59e43bab3f6..0f6f5be02b0 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java @@ -62,7 +62,7 @@ public EventBase transform(DiscoveredThing thing) { * Added query model to alias FIELD, if DiscoveredThing::field both not NULL and not empty. */ Optional fieldOFThing = Optional.ofNullable(thing.getField()); - fieldOFThing.filter(i -> !i.isEmpty()) + fieldOFThing.filter(i -> !i.isBlank()) .ifPresent(i -> fields.add(this.makeField("FIELD", markings, "", 0L, myQueryModel.aliasFieldNameReverseModel(i)))); fields.add(this.makeField("DATE", markings, "", 0L, thing.getDate())); From 90e9eca5e8987d44cc219dc609ba9bad08598881 Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Wed, 6 May 2026 21:54:21 +0000 Subject: [PATCH 05/16] Refactored DiscoveryIterator for terms only. --- .../query/discovery/DiscoveryIterator.java | 67 ++++++++++++++----- .../query/discovery/DiscoveryLogicTest.java | 31 +++++++-- 2 files changed, 76 insertions(+), 22 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java index fa5bd535187..b4693f313d6 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java @@ -7,6 +7,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.function.BiFunction; import java.util.stream.Collectors; @@ -29,7 +30,6 @@ import org.apache.log4j.Logger; import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.LinkedHashMultimap; import com.google.common.collect.Multimap; import com.google.protobuf.InvalidProtocolBufferException; @@ -58,14 +58,24 @@ public DiscoveryIterator deepCopy(IteratorEnvironment env) { return copy; } - @Override - public void next() throws IOException { - this.key = null; - this.value = null; + private void termsOnlyOperation() throws IOException { + while (Optional.ofNullable(iterator).isPresent() && iterator.hasTop()) { + // Get the entries to aggregate. + Set terms = getTermsOnly(); + if (terms.isEmpty()) { + log.trace("Couldn't aggregate index info; moving onto next date/field/term if data is available."); + } else { + List things = List.of(aggregate(terms)); + setTop(things); + return; + } + } + } - while (iterator.hasTop() && key == null) { + private void standardOperation() throws IOException { + while (Optional.ofNullable(iterator).isPresent() && iterator.hasTop() && key == null) { // Get the entries to aggregate. - Multimap terms = this.valuesOnly ? getTermsOnly() : getTermsByDatatype(); + Multimap terms = getTermsByDatatype(); if (terms.isEmpty()) { log.trace("Couldn't aggregate index info; moving onto next date/field/term if data is available."); } else { @@ -78,6 +88,20 @@ public void next() throws IOException { } } } + } + + @Override + public void next() throws IOException { + this.key = null; + this.value = null; + + // Underlying code is tentacled. Keep this "strategy" non-parameterized for now. + if (this.valuesOnly) { + termsOnlyOperation(); + } else { + standardOperation(); + } + log.trace("No data found."); } @@ -91,8 +115,13 @@ private Multimap getTermsByDatatype() throws IOException { // If we should sum up counts, we want to collect the term entries for each date seen for the current field and term of start. // Otherwise, we only want to collect the term entries for the current field, term, and date of start. BiFunction dateMatchingFunction = sumCounts ? (first, second) -> true : this::datesMatch; + // Find all matching entries and parse term entries from them. - while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), PartialKey.ROW_COLFAM) && dateMatchingFunction.apply(start, key)) { + //@formatter:off + while (Optional.ofNullable(iterator).isPresent() && + iterator.hasTop() && + start.equals((key = iterator.getTopKey()), PartialKey.ROW_COLFAM) && + dateMatchingFunction.apply(start, key)) { TermEntry termEntry = new TermEntry(key, iterator.getTopValue()); if (termEntry.isValid()) terms.put(termEntry.getDatatype(), termEntry); @@ -103,19 +132,21 @@ private Multimap getTermsByDatatype() throws IOException { } iterator.next(); } + //@formatter:on return terms; } - private Multimap getTermsOnly() throws IOException { - LinkedHashMultimap terms = LinkedHashMultimap.create(); + private Set getTermsOnly() throws IOException { + Set terms = new HashSet<>(); Key start = new Key(iterator.getTopKey()); Key key; - // If we should sum up counts, we want to collect the term entries for each date seen for the current field and term of start. - // Otherwise, we only want to collect the term entries for the current field, term, and date of start. - BiFunction dateMatchingFunction = sumCounts ? (first, second) -> true : this::datesMatch; + // Find all matching entries and parse term entries from them. + //@formatter:off + while (Optional.ofNullable(iterator).isPresent() && + iterator.hasTop() && + start.equals((key = iterator.getTopKey()), PartialKey.ROW_COLFAM)) { - while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), PartialKey.ROW_COLFAM) && dateMatchingFunction.apply(start, key)) { TermEntry termEntry = new TermEntry(key, iterator.getTopValue()) { // Only use term and visibility for equality. @Override @@ -134,7 +165,7 @@ public int hashCode() { }; if (termEntry.isValid()) - terms.put(termEntry.getDatatype(), termEntry); + terms.add(termEntry); else { if (log.isTraceEnabled()) { log.trace("Received invalid term entry from key: " + key); @@ -142,6 +173,7 @@ public int hashCode() { } iterator.next(); } + //@formatter:on return terms; } @@ -169,6 +201,9 @@ private DiscoveredThing aggregate(Collection termEntries) { TermEntry first = termEntries.iterator().next(); String term = reverseIndex ? new StringBuilder(first.getTerm()).reverse().toString() : first.getTerm(); String date = sumCounts ? "" : first.date; + if (valuesOnly) { + date = ""; + } Set visibilities = new HashSet<>(); Map visibilityToCounts = new HashMap<>(); @@ -241,7 +276,7 @@ private void setTop(List things) { public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { this.iterator.seek(range, columnFamilies, inclusive); if (log.isTraceEnabled()) { - log.trace("My source " + (this.iterator.hasTop() ? "does" : "does not") + " have a top."); + log.trace("My source " + ((Optional.ofNullable(iterator).isPresent() && this.iterator.hasTop()) ? "does" : "does not") + " have a top."); } next(); } diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java index 1a859e32cd5..03e4204c57c 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java @@ -94,7 +94,7 @@ private void writeData() throws Throwable { writeEntries("ROOSTER", "onyx", "csv", "BAR", "20130102", 5, 24, 2); writeEntries("ROOSTER", "onyx", "csv", "BAR", "20130103", 5, 24, 20); writeEntries("NETWORK", "bbc", "csv", "FOO", "20130101", 10, 24, 20); - writeEntries("NETWORK", "bbc", "csv", "FOO", "20130102", 10, 24, 20); + writeEntries("NETWORK", "bbc", "csv", "BAR", "20130102", 10, 24, 20); // formerly FOO writeEntries("NETWORK", "bbc", "csv", "FOO", "20130103", 10, 24, 20); writeEntries("OCCUPATION", "skydiver", "text", "FOO", "20130101", 10, 10, 5); writeEntries("OCCUPATION", "skydiver", "text", "FOO", "20130102", 10, 10, 5); @@ -241,11 +241,16 @@ private void assertQueryResults() throws Exception { logic.setupQuery(config); Iterator iterator = logic.iterator(); List actual = new ArrayList<>(); + DiscoveredThingValuesOnlyConditionalTransformer dtvoct = new DiscoveredThingValuesOnlyConditionalTransformer(logic.getValuesOnly()); while (iterator.hasNext()) { actual.add(dtvoct.apply(iterator.next())); } + /* + * while (iterator.hasNext()) { actual.add(iterator.next()); } + */ + Assertions.assertThat(actual).hasSize(expected.size()); for (int i = 0; i < expected.size(); i++) { DiscoveredThing actualThing = actual.get(i); @@ -282,7 +287,7 @@ public void testLiterals() throws Exception { givenEndDate("20130102"); expect(new DiscoveredThing("bbc", "NETWORK", "csv", "20130101", "FOO", 240L, new MapWritable())); - expect(new DiscoveredThing("bbc", "NETWORK", "csv", "20130102", "FOO", 240L, new MapWritable())); + expect(new DiscoveredThing("bbc", "NETWORK", "csv", "20130102", "BAR", 240L, new MapWritable())); expect(new DiscoveredThing("onyx", "POKEMON", "csv", "20130101", "FOO", 100L, new MapWritable())); expect(new DiscoveredThing("onyx", "POKEMON", "csv", "20130102", "FOO", 10L, new MapWritable())); expect(new DiscoveredThing("onyx", "ROCK", "csv", "20130101", "FOO", 1L, new MapWritable())); @@ -300,7 +305,7 @@ public void testPatterns() throws Exception { givenEndDate("20130102"); expect(new DiscoveredThing("bbc", "NETWORK", "csv", "20130101", "FOO", 240L, new MapWritable())); - expect(new DiscoveredThing("bbc", "NETWORK", "csv", "20130102", "FOO", 240L, new MapWritable())); + expect(new DiscoveredThing("bbc", "NETWORK", "csv", "20130102", "BAR", 240L, new MapWritable())); expect(new DiscoveredThing("onyx", "POKEMON", "csv", "20130101", "FOO", 100L, new MapWritable())); expect(new DiscoveredThing("onyx", "POKEMON", "csv", "20130102", "FOO", 10L, new MapWritable())); expect(new DiscoveredThing("onyx", "ROCK", "csv", "20130101", "FOO", 1L, new MapWritable())); @@ -428,7 +433,7 @@ public void testSumCountsForLiterals() throws Exception { givenEndDate("20130102"); givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); - expect(new DiscoveredThing("bbc", "NETWORK", "csv", "", "FOO", 480L, new MapWritable())); + expect(new DiscoveredThing("bbc", "NETWORK", "csv", "", "BAR&FOO", 480L, new MapWritable())); expect(new DiscoveredThing("onyx", "POKEMON", "csv", "", "FOO", 110L, new MapWritable())); expect(new DiscoveredThing("onyx", "ROCK", "csv", "", "FOO", 4L, new MapWritable())); expect(new DiscoveredThing("onyx", "ROOSTER", "csv", "", "BAR", 240L, new MapWritable())); @@ -443,7 +448,7 @@ public void testSumCountsForPatterns() throws Exception { givenEndDate("20130102"); givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); - expect(new DiscoveredThing("bbc", "NETWORK", "csv", "", "FOO", 480L, new MapWritable())); + expect(new DiscoveredThing("bbc", "NETWORK", "csv", "", "BAR&FOO", 480L, new MapWritable())); expect(new DiscoveredThing("onyx", "POKEMON", "csv", "", "FOO", 110L, new MapWritable())); expect(new DiscoveredThing("onyx", "ROCK", "csv", "", "FOO", 4L, new MapWritable())); expect(new DiscoveredThing("onyx", "ROOSTER", "csv", "", "BAR", 240L, new MapWritable())); @@ -530,11 +535,25 @@ public void testValuesOnlyForLiterals() throws Exception { givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); - expect(new DiscoveredThing("bbc", "", "", "", "FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("bbc", "", "", "", "BAR&FOO", 0L, new MapWritable())); expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); expect(new DiscoveredThing("onyx", "", "", "", "BAR", 0L, new MapWritable())); + assertQueryResults(); + } + @Test + public void testValuesOnlyForLiteralsNoSumCount() throws Exception { + givenQuery("bbc OR onyx"); + givenStartDate("20130101"); + givenEndDate("20130102"); + givenParameter(DiscoveryLogic.SUM_COUNTS, "false"); + givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); + + expect(new DiscoveredThing("bbc", "", "", "", "BAR&FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "BAR", 0L, new MapWritable())); assertQueryResults(); } } From 31003b78f8c59931237698acf8fe4a870eda3758 Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Fri, 8 May 2026 13:04:21 +0000 Subject: [PATCH 06/16] Added additional unit tests for DiscoveryLogicTest. --- .../query/discovery/DiscoveryIterator.java | 4 +- .../query/discovery/DiscoveryLogicTest.java | 81 ++++++++++++++++--- 2 files changed, 72 insertions(+), 13 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java index b4693f313d6..ed25a34b6a0 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java @@ -101,8 +101,6 @@ public void next() throws IOException { } else { standardOperation(); } - - log.trace("No data found."); } /** @@ -145,7 +143,7 @@ private Set getTermsOnly() throws IOException { //@formatter:off while (Optional.ofNullable(iterator).isPresent() && iterator.hasTop() && - start.equals((key = iterator.getTopKey()), PartialKey.ROW_COLFAM)) { + start.equals((key = iterator.getTopKey()), PartialKey.ROW)) { TermEntry termEntry = new TermEntry(key, iterator.getTopValue()) { // Only use term and visibility for equality. diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java index 03e4204c57c..d3fae4187bd 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java @@ -244,13 +244,12 @@ private void assertQueryResults() throws Exception { DiscoveredThingValuesOnlyConditionalTransformer dtvoct = new DiscoveredThingValuesOnlyConditionalTransformer(logic.getValuesOnly()); while (iterator.hasNext()) { - actual.add(dtvoct.apply(iterator.next())); + DiscoveredThing dtee = iterator.next(); + actual.add(dtvoct.apply(dtee)); + // actual.add(dtvoct.apply(iterator.next())); + // actual.add(iterator.next()); } - /* - * while (iterator.hasNext()) { actual.add(iterator.next()); } - */ - Assertions.assertThat(actual).hasSize(expected.size()); for (int i = 0; i < expected.size(); i++) { DiscoveredThing actualThing = actual.get(i); @@ -536,14 +535,12 @@ public void testValuesOnlyForLiterals() throws Exception { givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); expect(new DiscoveredThing("bbc", "", "", "", "BAR&FOO", 0L, new MapWritable())); - expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); - expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); - expect(new DiscoveredThing("onyx", "", "", "", "BAR", 0L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "BAR&FOO", 0L, new MapWritable())); assertQueryResults(); } @Test - public void testValuesOnlyForLiteralsNoSumCount() throws Exception { + public void testValuesOnlyForLiteralsFalseSumCount() throws Exception { givenQuery("bbc OR onyx"); givenStartDate("20130101"); givenEndDate("20130102"); @@ -551,9 +548,73 @@ public void testValuesOnlyForLiteralsNoSumCount() throws Exception { givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); expect(new DiscoveredThing("bbc", "", "", "", "BAR&FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "BAR&FOO", 0L, new MapWritable())); + assertQueryResults(); + } + + @Test + public void testValuesOnlyForPatterns() throws Exception { + givenQuery("*yx OR b*"); + givenStartDate("20130101"); + givenEndDate("20130102"); + givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); + givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); + + expect(new DiscoveredThing("bbc", "", "", "", "BAR&FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "BAR&FOO", 0L, new MapWritable())); + + assertQueryResults(); + } + + @Test + public void testValuesOnlyForPatternsNotFound() throws Exception { + givenQuery("*nixon OR ford*"); + givenStartDate("20130101"); + givenEndDate("20130102"); + givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); + givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); + + // We expect no results. Make sure we do not blow up. + // expect(new DiscoveredThing("bbc", "", "", "", "BAR&FOO", 0L, new MapWritable())); + // expect(new DiscoveredThing("onyx", "", "", "", "BAR&FOO", 0L, new MapWritable())); + + assertQueryResults(); + } + + @Test + public void testValuesOnlyForFieldedLiterals() throws Exception { + // givenQuery("bbc OR onyx"); + givenQuery("rock:onyx OR pokemon:onyx"); + givenStartDate("20130101"); + givenEndDate("20130102"); + + givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); + givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); + + // expect(new DiscoveredThing("bbc", "", "", "", "BAR&FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); + assertQueryResults(); + } + + @Test + public void testValuesOnlyForFieldedLiteralsExtendedRange() throws Exception { + givenQuery("rock:onyx OR pokemon:onyx"); + givenStartDate("20130101"); + givenEndDate("20130104"); + + givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); + givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); + + // expect(new DiscoveredThing("onyx", "POKEMON", "csv", "20130101", "FOO", 100L, new MapWritable())); + // expect(new DiscoveredThing("onyx", "POKEMON", "csv", "20130102", "FOO", 10L, new MapWritable())); + // expect(new DiscoveredThing("onyx", "POKEMON", "csv", "20130103", "FOO", 1L, new MapWritable())); + // expect(new DiscoveredThing("onyx", "ROCK", "csv", "20130101", "FOO", 1L, new MapWritable())); + // expect(new DiscoveredThing("onyx", "ROCK", "csv", "20130102", "FOO", 3L, new MapWritable())); + // expect(new DiscoveredThing("onyx", "ROCK", "csv", "20130103", "FOO", 3L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); - expect(new DiscoveredThing("onyx", "", "", "", "BAR", 0L, new MapWritable())); assertQueryResults(); } } From 638c7b7c155ed4a07edddf3fc721bffeb853ed69 Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Fri, 8 May 2026 13:48:07 +0000 Subject: [PATCH 07/16] Code review & formatting concerns. --- .../query/discovery/DiscoveredThing.java | 10 ++++++ ...ThingValuesOnlyConditionalTransformer.java | 18 +++------- .../query/discovery/DiscoveryIterator.java | 35 +++++++++++++------ .../query/discovery/DiscoveryLogicTest.java | 4 +-- 4 files changed, 41 insertions(+), 26 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThing.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThing.java index 3e2c3e21e95..13048e64f37 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThing.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThing.java @@ -29,6 +29,16 @@ public DiscoveredThing(String term, String field, String type, String date, Stri this.countsByColumnVisibility = countsByColumnVisibility; } + public DiscoveredThing(String term, String columnVisibility) { + this.term = term; + this.field = ""; + this.type = ""; + this.date = ""; + this.columnVisibility = columnVisibility; + this.count = new VLongWritable(0L); + this.countsByColumnVisibility = new MapWritable(); + } + public DiscoveredThing() { count = new VLongWritable(); countsByColumnVisibility = new MapWritable(); diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingValuesOnlyConditionalTransformer.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingValuesOnlyConditionalTransformer.java index d092080ee0c..3255daef2a6 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingValuesOnlyConditionalTransformer.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingValuesOnlyConditionalTransformer.java @@ -2,26 +2,18 @@ import java.util.function.UnaryOperator; -import org.apache.hadoop.io.MapWritable; - public class DiscoveredThingValuesOnlyConditionalTransformer implements UnaryOperator { - boolean valuesOnly = false; + boolean valuesOnly; DiscoveredThingValuesOnlyConditionalTransformer(boolean valuesOnly) { this.valuesOnly = valuesOnly; } public DiscoveredThing apply(DiscoveredThing dt) { - // @formatter:off - return (valuesOnly) ? new DiscoveredThing(dt.getTerm(), - "", - "", - "", - dt.getColumnVisibility(), - 0L, - new MapWritable()) - : dt; - // @formatter:on + if (valuesOnly) { + return new DiscoveredThing(dt.getTerm(), dt.getColumnVisibility()); + } + return dt; } } diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java index ed25a34b6a0..6120e06b7d6 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java @@ -116,8 +116,7 @@ private Multimap getTermsByDatatype() throws IOException { // Find all matching entries and parse term entries from them. //@formatter:off - while (Optional.ofNullable(iterator).isPresent() && - iterator.hasTop() && + while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), PartialKey.ROW_COLFAM) && dateMatchingFunction.apply(start, key)) { TermEntry termEntry = new TermEntry(key, iterator.getTopValue()); @@ -141,8 +140,7 @@ private Set getTermsOnly() throws IOException { // Find all matching entries and parse term entries from them. //@formatter:off - while (Optional.ofNullable(iterator).isPresent() && - iterator.hasTop() && + while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), PartialKey.ROW)) { TermEntry termEntry = new TermEntry(key, iterator.getTopValue()) { @@ -151,14 +149,16 @@ private Set getTermsOnly() throws IOException { public boolean equals(Object o) { if (o instanceof TermEntry) { TermEntry other = (TermEntry) o; - return new EqualsBuilder().append(getTerm(), other.getTerm()).append(getVisibility(), other.getVisibility()).isEquals(); + return new EqualsBuilder().append(getTerm(), other.getTerm()) + .append(getVisibility(), other.getVisibility()).isEquals(); } return false; } @Override public int hashCode() { - return new HashCodeBuilder().append(getTerm()).append(getVisibility()).toHashCode(); + return new HashCodeBuilder().append(getTerm()) + .append(getVisibility()).toHashCode(); } }; @@ -408,17 +408,30 @@ public boolean isValid() { public boolean equals(Object o) { if (o instanceof TermEntry) { TermEntry other = (TermEntry) o; - return new EqualsBuilder().append(getTerm(), other.getTerm()).append(getField(), other.getField()) - .append(getVisibility(), other.getVisibility()).append(getDate(), other.getDate()).append(getDatatype(), other.getDatatype()) - .append(getUidCount(), other.getUidCount()).append(getUidListSize(), other.getUidListSize()).isEquals(); + // @formatter:off + return new EqualsBuilder().append(getTerm(), other.getTerm()) + .append(getField(), other.getField()) + .append(getVisibility(), other.getVisibility()) + .append(getDate(), other.getDate()) + .append(getDatatype(), other.getDatatype()) + .append(getUidCount(), other.getUidCount()) + .append(getUidListSize(), other.getUidListSize()).isEquals(); + // @formatter:on } return false; } @Override public int hashCode() { - return new HashCodeBuilder().append(getTerm()).append(getField()).append(getVisibility()).append(getDate()).append(getDatatype()) - .append(getUidCount()).append(getUidListSize()).toHashCode(); + // @formatter:off + return new HashCodeBuilder().append(getTerm()) + .append(getField()) + .append(getVisibility()) + .append(getDate()) + .append(getDatatype()) + .append(getUidCount()) + .append(getUidListSize()).toHashCode(); + // @formatter:on } } } diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java index d3fae4187bd..67b2c653bdd 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java @@ -581,7 +581,7 @@ public void testValuesOnlyForPatternsNotFound() throws Exception { assertQueryResults(); } - @Test + // @Test public void testValuesOnlyForFieldedLiterals() throws Exception { // givenQuery("bbc OR onyx"); givenQuery("rock:onyx OR pokemon:onyx"); @@ -597,7 +597,7 @@ public void testValuesOnlyForFieldedLiterals() throws Exception { assertQueryResults(); } - @Test + // @Test public void testValuesOnlyForFieldedLiteralsExtendedRange() throws Exception { givenQuery("rock:onyx OR pokemon:onyx"); givenStartDate("20130101"); From b19201da58c62dd401e8219e3840be4707794ea0 Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Fri, 8 May 2026 16:53:20 +0000 Subject: [PATCH 08/16] Added TermOnlyEntry. --- .../query/discovery/DiscoveryIterator.java | 44 ++---- .../query/discovery/TermInterface.java | 37 +++++ .../query/discovery/TermOnlyEntry.java | 127 ++++++++++++++++++ .../query/discovery/DiscoveryLogicTest.java | 4 +- 4 files changed, 179 insertions(+), 33 deletions(-) create mode 100644 warehouse/query-core/src/main/java/datawave/query/discovery/TermInterface.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/discovery/TermOnlyEntry.java diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java index 6120e06b7d6..1deab62313d 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java @@ -61,7 +61,7 @@ public DiscoveryIterator deepCopy(IteratorEnvironment env) { private void termsOnlyOperation() throws IOException { while (Optional.ofNullable(iterator).isPresent() && iterator.hasTop()) { // Get the entries to aggregate. - Set terms = getTermsOnly(); + Set terms = getTermsOnly(); if (terms.isEmpty()) { log.trace("Couldn't aggregate index info; moving onto next date/field/term if data is available."); } else { @@ -75,7 +75,7 @@ private void termsOnlyOperation() throws IOException { private void standardOperation() throws IOException { while (Optional.ofNullable(iterator).isPresent() && iterator.hasTop() && key == null) { // Get the entries to aggregate. - Multimap terms = getTermsByDatatype(); + Multimap terms = getTermsByDatatype(); if (terms.isEmpty()) { log.trace("Couldn't aggregate index info; moving onto next date/field/term if data is available."); } else { @@ -106,8 +106,8 @@ public void next() throws IOException { /** * Return a multimap containing mappings of datatypes to term entries that should be aggregated. */ - private Multimap getTermsByDatatype() throws IOException { - Multimap terms = ArrayListMultimap.create(); + private Multimap getTermsByDatatype() throws IOException { + Multimap terms = ArrayListMultimap.create(); Key start = new Key(iterator.getTopKey()); Key key; // If we should sum up counts, we want to collect the term entries for each date seen for the current field and term of start. @@ -133,8 +133,8 @@ private Multimap getTermsByDatatype() throws IOException { return terms; } - private Set getTermsOnly() throws IOException { - Set terms = new HashSet<>(); + private Set getTermsOnly() throws IOException { + Set terms = new HashSet<>(); Key start = new Key(iterator.getTopKey()); Key key; @@ -142,25 +142,7 @@ private Set getTermsOnly() throws IOException { //@formatter:off while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), PartialKey.ROW)) { - - TermEntry termEntry = new TermEntry(key, iterator.getTopValue()) { - // Only use term and visibility for equality. - @Override - public boolean equals(Object o) { - if (o instanceof TermEntry) { - TermEntry other = (TermEntry) o; - return new EqualsBuilder().append(getTerm(), other.getTerm()) - .append(getVisibility(), other.getVisibility()).isEquals(); - } - return false; - } - - @Override - public int hashCode() { - return new HashCodeBuilder().append(getTerm()) - .append(getVisibility()).toHashCode(); - } - }; + TermOnlyEntry termEntry = new TermOnlyEntry(key, iterator.getTopValue()); if (termEntry.isValid()) terms.add(termEntry); @@ -192,13 +174,13 @@ private boolean datesMatch(Key left, Key right) { /** * Return the given term entries aggregated into a single {@link DiscoveredThing} if possible, or return null if any issues occurred. */ - private DiscoveredThing aggregate(Collection termEntries) { + private DiscoveredThing aggregate(Collection termEntries) { if (termEntries.isEmpty()) { return null; } else { - TermEntry first = termEntries.iterator().next(); + TermInterface first = termEntries.iterator().next(); String term = reverseIndex ? new StringBuilder(first.getTerm()).reverse().toString() : first.getTerm(); - String date = sumCounts ? "" : first.date; + String date = sumCounts ? "" : first.getDate(); if (valuesOnly) { date = ""; } @@ -208,7 +190,7 @@ private DiscoveredThing aggregate(Collection termEntries) { long count = 0L; // Aggregate the counts and visibilities from each entry. - for (TermEntry termEntry : termEntries) { + for (TermInterface termEntry : termEntries) { // Fetch the count to aggregate based of whether we should show the term count or the reference count. long currentCount = this.showReferenceCount ? termEntry.getUidListSize() : termEntry.getUidCount(); try { @@ -316,10 +298,10 @@ public Value getTopValue() { /** * Represents term information parsed from a {@link Key}, {@link Value} pair. */ - private static class TermEntry { + private static class TermEntry implements TermInterface { private final String term; - private String field; + private final String field; private String date; private String datatype; private ColumnVisibility visibility; diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/TermInterface.java b/warehouse/query-core/src/main/java/datawave/query/discovery/TermInterface.java new file mode 100644 index 00000000000..55ecb30ab3e --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/TermInterface.java @@ -0,0 +1,37 @@ +package datawave.query.discovery; + +import org.apache.accumulo.core.security.ColumnVisibility; + +public interface TermInterface { + default String getTerm() { + return ""; + } + + default String getField() { + return ""; + } + + default String getDate() { + return ""; + } + + default String getDatatype() { + return ""; + } + + default ColumnVisibility getVisibility() { + return new ColumnVisibility(); + } + + default long getUidCount() { + return 0L; + } + + default long getUidListSize() { + return 0L; + } + + default boolean isValid() { + return false; + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/TermOnlyEntry.java b/warehouse/query-core/src/main/java/datawave/query/discovery/TermOnlyEntry.java new file mode 100644 index 00000000000..caf9a02d6ef --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/TermOnlyEntry.java @@ -0,0 +1,127 @@ +package datawave.query.discovery; + +import java.util.Optional; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; + +import com.google.protobuf.InvalidProtocolBufferException; + +import datawave.ingest.protobuf.Uid; +import datawave.query.Constants; + +public class TermOnlyEntry implements TermInterface { + + private final String term; + private final String field; + private String date; + private final ColumnVisibility visibility; + private long uidCount; + private long uidListSize; + private boolean valid; + + public TermOnlyEntry(Key key, Value value) { + term = key.getRow().toString(); + field = key.getColumnFamily().toString(); + visibility = new ColumnVisibility(key.getColumnVisibility()); + + String colq = key.getColumnQualifier().toString(); + int firstSeparatorPos = colq.indexOf(Constants.NULL_BYTE_STRING); + if (firstSeparatorPos != -1) { + int lastSeparatorPos = colq.lastIndexOf(Constants.NULL_BYTE_STRING); + // If multiple separators are present, this is a task datatype entry. + if (firstSeparatorPos != lastSeparatorPos) { + // Ensure that we at least have yyyyMMdd. + if ((lastSeparatorPos - firstSeparatorPos) < 9) { + return; + } + // The form is datatype\0date\0task status (old knowledge entry). + date = colq.substring(firstSeparatorPos + 1, firstSeparatorPos + 9); + // datatype = colq.substring(0, firstSeparatorPos); + } else { + // Ensure that we at least have yyyyMMdd. + if (firstSeparatorPos < 8) { + return; + } + // The form is shardId\0datatype. + date = colq.substring(0, 8); + } + + // Parse the UID.List object from the value. + try { + Uid.List uidList = Uid.List.parseFrom(value.get()); + if (uidList != null) { + uidCount = uidList.getCOUNT(); + uidListSize = uidList.getUIDList().size(); + } + } catch (InvalidProtocolBufferException e) { + // Don't add UID information. At least we know what shard it's located in. + } + + // Parse the UID.List object from the value. + try { + Uid.List uidList = Uid.List.parseFrom(value.get()); + if (uidList != null) { + uidCount = uidList.getCOUNT(); + uidListSize = uidList.getUIDList().size(); + } + } catch (InvalidProtocolBufferException e) { + // Don't add UID information. At least we know what shard it's located in. + } + } + + // This is now considered a valid term entry for aggregation. + valid = Optional.ofNullable(term).isPresent(); + } + + public String getTerm() { + return this.term; + } + + public String getField() { + return field; + } + + public String getDate() { + return date; + } + + public ColumnVisibility getVisibility() { + return visibility; + } + + public long getUidCount() { + return uidCount; + } + + public long getUidListSize() { + return uidListSize; + } + + public boolean isValid() { + return this.valid; + } + + @Override + public boolean equals(Object o) { + if (o instanceof TermOnlyEntry) { + TermOnlyEntry other = (TermOnlyEntry) o; + // @formatter:off + return new EqualsBuilder().append(getTerm(), other.getTerm()) + .append(getVisibility(), other.getVisibility()).isEquals(); + // @formatter:on + } + return false; + } + + @Override + public int hashCode() { + // @formatter:off + return new HashCodeBuilder().append(getTerm()) + .append(getVisibility()).toHashCode(); + // @formatter:on + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java index 67b2c653bdd..d3fae4187bd 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java @@ -581,7 +581,7 @@ public void testValuesOnlyForPatternsNotFound() throws Exception { assertQueryResults(); } - // @Test + @Test public void testValuesOnlyForFieldedLiterals() throws Exception { // givenQuery("bbc OR onyx"); givenQuery("rock:onyx OR pokemon:onyx"); @@ -597,7 +597,7 @@ public void testValuesOnlyForFieldedLiterals() throws Exception { assertQueryResults(); } - // @Test + @Test public void testValuesOnlyForFieldedLiteralsExtendedRange() throws Exception { givenQuery("rock:onyx OR pokemon:onyx"); givenStartDate("20130101"); From 15dea549b1db56021896743539e980b28d5a1833 Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Mon, 11 May 2026 20:15:12 +0000 Subject: [PATCH 09/16] Edited SetTop --- .../query/discovery/DiscoveryIterator.java | 49 +++++++++++++++---- .../query/discovery/DiscoveryLogicTest.java | 36 ++++++++------ 2 files changed, 60 insertions(+), 25 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java index 1deab62313d..bb141d06711 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java @@ -44,6 +44,8 @@ public class DiscoveryIterator implements SortedKeyValueIterator { private Key key; private Value value; + private Range lastRange; + private Collection columnFamilies; private SortedKeyValueIterator iterator; private boolean separateCountsByColVis = false; private boolean showReferenceCount = false; @@ -61,7 +63,8 @@ public DiscoveryIterator deepCopy(IteratorEnvironment env) { private void termsOnlyOperation() throws IOException { while (Optional.ofNullable(iterator).isPresent() && iterator.hasTop()) { // Get the entries to aggregate. - Set terms = getTermsOnly(); + boolean firstTermOnly = true; + Set terms = getTermsOnly(firstTermOnly); if (terms.isEmpty()) { log.trace("Couldn't aggregate index info; moving onto next date/field/term if data is available."); } else { @@ -95,7 +98,7 @@ public void next() throws IOException { this.key = null; this.value = null; - // Underlying code is tentacled. Keep this "strategy" non-parameterized for now. + // Keep this "strategy" non-parameterized for now. if (this.valuesOnly) { termsOnlyOperation(); } else { @@ -133,7 +136,7 @@ private Multimap getTermsByDatatype() throws IOException { return terms; } - private Set getTermsOnly() throws IOException { + private Set getTermsOnly(boolean firstTermOnly) throws IOException { Set terms = new HashSet<>(); Key start = new Key(iterator.getTopKey()); Key key; @@ -144,9 +147,15 @@ private Set getTermsOnly() throws IOException { start.equals((key = iterator.getTopKey()), PartialKey.ROW)) { TermOnlyEntry termEntry = new TermOnlyEntry(key, iterator.getTopValue()); - if (termEntry.isValid()) + if (termEntry.isValid()) { terms.add(termEntry); - else { + return terms; +// // Return the first term only. +// if (firstTermOnly){ +// iterator.next(); +// break; +// } + } else { if (log.isTraceEnabled()) { log.trace("Received invalid term entry from key: " + key); } @@ -181,9 +190,9 @@ private DiscoveredThing aggregate(Collection termEntries) { TermInterface first = termEntries.iterator().next(); String term = reverseIndex ? new StringBuilder(first.getTerm()).reverse().toString() : first.getTerm(); String date = sumCounts ? "" : first.getDate(); - if (valuesOnly) { - date = ""; - } +// if (valuesOnly) { +// date = ""; +// } Set visibilities = new HashSet<>(); Map visibilityToCounts = new HashMap<>(); @@ -237,7 +246,8 @@ private DiscoveredThing aggregate(Collection termEntries) { /** * Set the top {@link Key} and {@link Value} of this iterator, created from the given list of {@link DiscoveredThing} instances. */ - private void setTop(List things) { + private void setTop(List things) throws IOException { + // We want the key to be the last possible key for this date. Return the key as it is in the index (reversed if // necessary) to ensure the keys are consistent with the initial seek range. DiscoveredThing thing = things.get(0); @@ -247,13 +257,29 @@ private void setTop(List things) { // Create a value from the list of things. ArrayWritable thingArray = new ArrayWritable(DiscoveredThing.class, things.toArray(new DiscoveredThing[0])); Value newValue = new Value(WritableUtils.toByteArray(thingArray)); + if (valuesOnly){ + Key skipKey = new Key(row, "\uffff"); + if(!columnFamilies.isEmpty()){ + skipKey = new Key(row, thing.getField(), "\uffff"); + } + + if (lastRange.contains(skipKey)) { + this.key = skipKey; + Range nextRange = new Range(skipKey,false,lastRange.getEndKey(), lastRange.isEndKeyInclusive()); + this.iterator.seek(nextRange, columnFamilies,false); + } + } else{ + this.key = newKey; + + } - this.key = newKey; this.value = newValue; } @Override public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { + this.lastRange = range; + this.columnFamilies = columnFamilies; this.iterator.seek(range, columnFamilies, inclusive); if (log.isTraceEnabled()) { log.trace("My source " + ((Optional.ofNullable(iterator).isPresent() && this.iterator.hasTop()) ? "does" : "does not") + " have a top."); @@ -269,6 +295,9 @@ public void init(SortedKeyValueIterator source, Map op this.reverseIndex = Boolean.parseBoolean(options.get(DiscoveryLogic.REVERSE_INDEX)); this.sumCounts = Boolean.parseBoolean(options.get(DiscoveryLogic.SUM_COUNTS)); this.valuesOnly = Boolean.parseBoolean(options.get(DiscoveryLogic.VALUES_ONLY)); + if(valuesOnly){ + sumCounts=false; + } if (log.isTraceEnabled()) { log.trace("Source: " + source.getClass().getName()); diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java index d3fae4187bd..40200c3bd28 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java @@ -1,15 +1,7 @@ package datawave.query.discovery; import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; +import java.util.*; import java.util.concurrent.TimeUnit; import org.apache.accumulo.core.client.AccumuloClient; @@ -19,7 +11,9 @@ import org.apache.accumulo.core.client.BatchWriterConfig; import org.apache.accumulo.core.client.TableExistsException; import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.user.SummingCombiner; import org.apache.accumulo.core.security.Authorizations; @@ -43,6 +37,8 @@ import datawave.query.util.MetadataHelperFactory; import datawave.util.TableName; +import static org.junit.jupiter.api.Assertions.assertTrue; + public class DiscoveryLogicTest { private static final Logger log = Logger.getLogger(DiscoveryLogicTest.class); @@ -534,11 +530,21 @@ public void testValuesOnlyForLiterals() throws Exception { givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); - expect(new DiscoveredThing("bbc", "", "", "", "BAR&FOO", 0L, new MapWritable())); - expect(new DiscoveredThing("onyx", "", "", "", "BAR&FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("bbc", "", "", "", "FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); assertQueryResults(); } + @Test + public void test() { + Key start = new Key("bbc"); + Key stop = new Key("bbc\u0000"); + Range range = new Range(start, true, stop, false); + + Key last = new Key("bbc"); + assertTrue(range.contains(last)); + } + @Test public void testValuesOnlyForLiteralsFalseSumCount() throws Exception { givenQuery("bbc OR onyx"); @@ -547,8 +553,8 @@ public void testValuesOnlyForLiteralsFalseSumCount() throws Exception { givenParameter(DiscoveryLogic.SUM_COUNTS, "false"); givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); - expect(new DiscoveredThing("bbc", "", "", "", "BAR&FOO", 0L, new MapWritable())); - expect(new DiscoveredThing("onyx", "", "", "", "BAR&FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("bbc", "", "", "", "FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); assertQueryResults(); } @@ -560,8 +566,8 @@ public void testValuesOnlyForPatterns() throws Exception { givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); - expect(new DiscoveredThing("bbc", "", "", "", "BAR&FOO", 0L, new MapWritable())); - expect(new DiscoveredThing("onyx", "", "", "", "BAR&FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("bbc", "", "", "", "FOO", 0L, new MapWritable())); + expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); assertQueryResults(); } From c89ec044cda24f306512777bdce08b5ddf86518b Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Tue, 12 May 2026 19:53:23 +0000 Subject: [PATCH 10/16] Refactored DiscoveryIterator for TermsOnly --- .../query/discovery/DiscoveryIterator.java | 144 ++++++------------ .../query/discovery/DiscoveryTransformer.java | 49 +++--- .../query/discovery/TermInterface.java | 37 ----- .../query/discovery/TermOnlyEntry.java | 127 --------------- .../query/discovery/DiscoveryLogicTest.java | 28 ++-- 5 files changed, 87 insertions(+), 298 deletions(-) delete mode 100644 warehouse/query-core/src/main/java/datawave/query/discovery/TermInterface.java delete mode 100644 warehouse/query-core/src/main/java/datawave/query/discovery/TermOnlyEntry.java diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java index bb141d06711..8127074f69d 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java @@ -52,6 +52,7 @@ public class DiscoveryIterator implements SortedKeyValueIterator { private boolean reverseIndex = false; private boolean sumCounts = false; private boolean valuesOnly = false; + private DiscoveredThingValuesOnlyConditionalTransformer discoveredThingTransformer; @Override public DiscoveryIterator deepCopy(IteratorEnvironment env) { @@ -60,25 +61,14 @@ public DiscoveryIterator deepCopy(IteratorEnvironment env) { return copy; } - private void termsOnlyOperation() throws IOException { - while (Optional.ofNullable(iterator).isPresent() && iterator.hasTop()) { - // Get the entries to aggregate. - boolean firstTermOnly = true; - Set terms = getTermsOnly(firstTermOnly); - if (terms.isEmpty()) { - log.trace("Couldn't aggregate index info; moving onto next date/field/term if data is available."); - } else { - List things = List.of(aggregate(terms)); - setTop(things); - return; - } - } - } + @Override + public void next() throws IOException { + this.key = null; + this.value = null; - private void standardOperation() throws IOException { - while (Optional.ofNullable(iterator).isPresent() && iterator.hasTop() && key == null) { + while (iterator.hasTop() && key == null) { // Get the entries to aggregate. - Multimap terms = getTermsByDatatype(); + Multimap terms = getTermsByDatatype(); if (terms.isEmpty()) { log.trace("Couldn't aggregate index info; moving onto next date/field/term if data is available."); } else { @@ -91,70 +81,35 @@ private void standardOperation() throws IOException { } } } - } - - @Override - public void next() throws IOException { - this.key = null; - this.value = null; - - // Keep this "strategy" non-parameterized for now. - if (this.valuesOnly) { - termsOnlyOperation(); - } else { - standardOperation(); - } + log.trace("No data found."); } /** * Return a multimap containing mappings of datatypes to term entries that should be aggregated. */ - private Multimap getTermsByDatatype() throws IOException { - Multimap terms = ArrayListMultimap.create(); + private Multimap getTermsByDatatype() throws IOException { + Multimap terms = ArrayListMultimap.create(); Key start = new Key(iterator.getTopKey()); Key key; + // If we should sum up counts, we want to collect the term entries for each date seen for the current field and term of start. // Otherwise, we only want to collect the term entries for the current field, term, and date of start. BiFunction dateMatchingFunction = sumCounts ? (first, second) -> true : this::datesMatch; + // if 'values only' is selected, then we should match on row rather than column family. + PartialKey partialKey = valuesOnly ? PartialKey.ROW : PartialKey.ROW_COLFAM; + // Find all matching entries and parse term entries from them. - //@formatter:off - while (iterator.hasTop() && - start.equals((key = iterator.getTopKey()), PartialKey.ROW_COLFAM) && - dateMatchingFunction.apply(start, key)) { + while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), partialKey) && dateMatchingFunction.apply(start, key)) { TermEntry termEntry = new TermEntry(key, iterator.getTopValue()); - if (termEntry.isValid()) + if (termEntry.isValid()) { terms.put(termEntry.getDatatype(), termEntry); - else { - if (log.isTraceEnabled()) { - log.trace("Received invalid term entry from key: " + key); + // if 'values only' is selected, then we only need a single TermEntry, the first one. The value of each + // term in TermEntry should be the identical as we iterate in this while. Therefore, the first one + // encountered will suffice. + if (valuesOnly) { + return terms; } - } - iterator.next(); - } - //@formatter:on - return terms; - } - - private Set getTermsOnly(boolean firstTermOnly) throws IOException { - Set terms = new HashSet<>(); - Key start = new Key(iterator.getTopKey()); - Key key; - - // Find all matching entries and parse term entries from them. - //@formatter:off - while (iterator.hasTop() && - start.equals((key = iterator.getTopKey()), PartialKey.ROW)) { - TermOnlyEntry termEntry = new TermOnlyEntry(key, iterator.getTopValue()); - - if (termEntry.isValid()) { - terms.add(termEntry); - return terms; -// // Return the first term only. -// if (firstTermOnly){ -// iterator.next(); -// break; -// } } else { if (log.isTraceEnabled()) { log.trace("Received invalid term entry from key: " + key); @@ -162,7 +117,6 @@ private Set getTermsOnly(boolean firstTermOnly) throws IOExceptio } iterator.next(); } - //@formatter:on return terms; } @@ -183,23 +137,20 @@ private boolean datesMatch(Key left, Key right) { /** * Return the given term entries aggregated into a single {@link DiscoveredThing} if possible, or return null if any issues occurred. */ - private DiscoveredThing aggregate(Collection termEntries) { + private DiscoveredThing aggregate(Collection termEntries) { if (termEntries.isEmpty()) { return null; } else { - TermInterface first = termEntries.iterator().next(); + TermEntry first = termEntries.iterator().next(); String term = reverseIndex ? new StringBuilder(first.getTerm()).reverse().toString() : first.getTerm(); - String date = sumCounts ? "" : first.getDate(); -// if (valuesOnly) { -// date = ""; -// } + String date = sumCounts ? "" : first.date; Set visibilities = new HashSet<>(); Map visibilityToCounts = new HashMap<>(); long count = 0L; // Aggregate the counts and visibilities from each entry. - for (TermInterface termEntry : termEntries) { + for (TermEntry termEntry : termEntries) { // Fetch the count to aggregate based of whether we should show the term count or the reference count. long currentCount = this.showReferenceCount ? termEntry.getUidListSize() : termEntry.getUidCount(); try { @@ -254,21 +205,24 @@ private void setTop(List things) throws IOException { String row = (this.reverseIndex ? new StringBuilder().append(thing.getTerm()).reverse().toString() : thing.getTerm()); Key newKey = new Key(row, thing.getField(), thing.getDate() + "\uffff"); + // Conditionally trim DiscoveredThing. (Perhaps to term and visibility). + //List thingsConditional = things.stream().map(discoveredThingTransformer).collect(Collectors.toList()); + // Create a value from the list of things. ArrayWritable thingArray = new ArrayWritable(DiscoveredThing.class, things.toArray(new DiscoveredThing[0])); Value newValue = new Value(WritableUtils.toByteArray(thingArray)); - if (valuesOnly){ + if (valuesOnly) { Key skipKey = new Key(row, "\uffff"); - if(!columnFamilies.isEmpty()){ + if (!columnFamilies.isEmpty()) { skipKey = new Key(row, thing.getField(), "\uffff"); } if (lastRange.contains(skipKey)) { this.key = skipKey; - Range nextRange = new Range(skipKey,false,lastRange.getEndKey(), lastRange.isEndKeyInclusive()); - this.iterator.seek(nextRange, columnFamilies,false); + Range nextRange = new Range(skipKey, false, lastRange.getEndKey(), lastRange.isEndKeyInclusive()); + this.iterator.seek(nextRange, columnFamilies, false); } - } else{ + } else { this.key = newKey; } @@ -295,8 +249,9 @@ public void init(SortedKeyValueIterator source, Map op this.reverseIndex = Boolean.parseBoolean(options.get(DiscoveryLogic.REVERSE_INDEX)); this.sumCounts = Boolean.parseBoolean(options.get(DiscoveryLogic.SUM_COUNTS)); this.valuesOnly = Boolean.parseBoolean(options.get(DiscoveryLogic.VALUES_ONLY)); - if(valuesOnly){ - sumCounts=false; + this.discoveredThingTransformer = new DiscoveredThingValuesOnlyConditionalTransformer(valuesOnly); + if (valuesOnly) { + sumCounts = false; } if (log.isTraceEnabled()) { @@ -327,10 +282,10 @@ public Value getTopValue() { /** * Represents term information parsed from a {@link Key}, {@link Value} pair. */ - private static class TermEntry implements TermInterface { + private static class TermEntry { private final String term; - private final String field; + private String field; private String date; private String datatype; private ColumnVisibility visibility; @@ -419,30 +374,17 @@ public boolean isValid() { public boolean equals(Object o) { if (o instanceof TermEntry) { TermEntry other = (TermEntry) o; - // @formatter:off - return new EqualsBuilder().append(getTerm(), other.getTerm()) - .append(getField(), other.getField()) - .append(getVisibility(), other.getVisibility()) - .append(getDate(), other.getDate()) - .append(getDatatype(), other.getDatatype()) - .append(getUidCount(), other.getUidCount()) - .append(getUidListSize(), other.getUidListSize()).isEquals(); - // @formatter:on + return new EqualsBuilder().append(getTerm(), other.getTerm()).append(getField(), other.getField()) + .append(getVisibility(), other.getVisibility()).append(getDate(), other.getDate()).append(getDatatype(), other.getDatatype()) + .append(getUidCount(), other.getUidCount()).append(getUidListSize(), other.getUidListSize()).isEquals(); } return false; } @Override public int hashCode() { - // @formatter:off - return new HashCodeBuilder().append(getTerm()) - .append(getField()) - .append(getVisibility()) - .append(getDate()) - .append(getDatatype()) - .append(getUidCount()) - .append(getUidListSize()).toHashCode(); - // @formatter:on + return new HashCodeBuilder().append(getTerm()).append(getField()).append(getVisibility()).append(getDate()).append(getDatatype()) + .append(getUidCount()).append(getUidListSize()).toHashCode(); } } } diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java index 0f6f5be02b0..49a4998b730 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java @@ -5,6 +5,8 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.function.BiFunction; +import java.util.function.Function; import org.apache.accumulo.core.security.ColumnVisibility; import org.apache.hadoop.io.Writable; @@ -32,7 +34,7 @@ public class DiscoveryTransformer extends BaseQueryLogicTransformer logic = null; private QueryModel myQueryModel = null; private MarkingFunctions markingFunctions; - private ResponseObjectFactory responseObjectFactory; + private final ResponseObjectFactory responseObjectFactory; public DiscoveryTransformer(BaseQueryLogic logic, Query settings, QueryModel qm) { super(new MarkingFunctions.Default()); @@ -42,47 +44,54 @@ public DiscoveryTransformer(BaseQueryLogic logic, Query setting this.myQueryModel = qm; } - @Override - public EventBase transform(DiscoveredThing thing) { - Preconditions.checkNotNull(thing, "Received a null object to transform!"); - - EventBase event = this.responseObjectFactory.getEvent(); - Map markings; + Function> markingsFromVisibility = x -> { try { - markings = this.markingFunctions.translateFromColumnVisibility(new ColumnVisibility(thing.getColumnVisibility())); + return this.markingFunctions.translateFromColumnVisibility(new ColumnVisibility(x)); } catch (Exception e) { - throw new RuntimeException("could not parse to markings: " + thing.getColumnVisibility()); + throw new RuntimeException("could not parse to markings: " + x); } - event.setMarkings(markings); + }; + BiFunction,List> generateFieldList = (x, y) -> { List fields = new ArrayList<>(); - fields.add(this.makeField("VALUE", markings, "", 0L, thing.getTerm())); + fields.add(this.makeField("VALUE", y, "", 0L, x.getTerm())); /** * Added query model to alias FIELD, if DiscoveredThing::field both not NULL and not empty. */ - Optional fieldOFThing = Optional.ofNullable(thing.getField()); - fieldOFThing.filter(i -> !i.isBlank()) - .ifPresent(i -> fields.add(this.makeField("FIELD", markings, "", 0L, myQueryModel.aliasFieldNameReverseModel(i)))); + Optional fieldOFThing = Optional.ofNullable(x.getField()); + fieldOFThing.filter(i -> !i.isBlank()).ifPresent(i -> fields.add(this.makeField("FIELD", y, "", 0L, myQueryModel.aliasFieldNameReverseModel(i)))); - fields.add(this.makeField("DATE", markings, "", 0L, thing.getDate())); - fields.add(this.makeField("DATA TYPE", markings, "", 0L, thing.getType())); + fields.add(this.makeField("DATE", y, "", 0L, x.getDate())); + fields.add(this.makeField("DATA TYPE", y, "", 0L, x.getType())); // If requested return counts separated by colvis, all counts by colvis could be > total record count - if (thing.getCountsByColumnVisibility() != null && !thing.getCountsByColumnVisibility().isEmpty()) { - for (Map.Entry entry : thing.getCountsByColumnVisibility().entrySet()) { + if (x.getCountsByColumnVisibility() != null && !x.getCountsByColumnVisibility().isEmpty()) { + for (Map.Entry entry : x.getCountsByColumnVisibility().entrySet()) { try { Map eMarkings = this.markingFunctions.translateFromColumnVisibility(new ColumnVisibility(entry.getKey().toString())); fields.add(this.makeField("RECORD COUNT", new HashMap<>(), entry.getKey().toString(), 0L, entry.getValue().toString())); } catch (Exception e) { - throw new RuntimeException("could not parse to markings: " + thing.getColumnVisibility()); + throw new RuntimeException("could not parse to markings: " + x.getColumnVisibility()); } } } else { - fields.add(this.makeField("RECORD COUNT", markings, "", 0L, Long.toString(thing.getCount()))); + fields.add(this.makeField("RECORD COUNT", y, "", 0L, Long.toString(x.getCount()))); } + return fields; + }; + + @Override + public EventBase transform(DiscoveredThing thing) { + Preconditions.checkNotNull(thing, "Received a null object to transform!"); + + EventBase event = this.responseObjectFactory.getEvent(); + + Map markings = markingsFromVisibility.apply(thing.getColumnVisibility()); + event.setMarkings(markings); + List fields = generateFieldList.apply(thing, markings); event.setFields(fields); event.setSizeInBytes(fields.size() * 6L); diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/TermInterface.java b/warehouse/query-core/src/main/java/datawave/query/discovery/TermInterface.java deleted file mode 100644 index 55ecb30ab3e..00000000000 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/TermInterface.java +++ /dev/null @@ -1,37 +0,0 @@ -package datawave.query.discovery; - -import org.apache.accumulo.core.security.ColumnVisibility; - -public interface TermInterface { - default String getTerm() { - return ""; - } - - default String getField() { - return ""; - } - - default String getDate() { - return ""; - } - - default String getDatatype() { - return ""; - } - - default ColumnVisibility getVisibility() { - return new ColumnVisibility(); - } - - default long getUidCount() { - return 0L; - } - - default long getUidListSize() { - return 0L; - } - - default boolean isValid() { - return false; - } -} diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/TermOnlyEntry.java b/warehouse/query-core/src/main/java/datawave/query/discovery/TermOnlyEntry.java deleted file mode 100644 index caf9a02d6ef..00000000000 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/TermOnlyEntry.java +++ /dev/null @@ -1,127 +0,0 @@ -package datawave.query.discovery; - -import java.util.Optional; - -import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.security.ColumnVisibility; -import org.apache.commons.lang3.builder.EqualsBuilder; -import org.apache.commons.lang3.builder.HashCodeBuilder; - -import com.google.protobuf.InvalidProtocolBufferException; - -import datawave.ingest.protobuf.Uid; -import datawave.query.Constants; - -public class TermOnlyEntry implements TermInterface { - - private final String term; - private final String field; - private String date; - private final ColumnVisibility visibility; - private long uidCount; - private long uidListSize; - private boolean valid; - - public TermOnlyEntry(Key key, Value value) { - term = key.getRow().toString(); - field = key.getColumnFamily().toString(); - visibility = new ColumnVisibility(key.getColumnVisibility()); - - String colq = key.getColumnQualifier().toString(); - int firstSeparatorPos = colq.indexOf(Constants.NULL_BYTE_STRING); - if (firstSeparatorPos != -1) { - int lastSeparatorPos = colq.lastIndexOf(Constants.NULL_BYTE_STRING); - // If multiple separators are present, this is a task datatype entry. - if (firstSeparatorPos != lastSeparatorPos) { - // Ensure that we at least have yyyyMMdd. - if ((lastSeparatorPos - firstSeparatorPos) < 9) { - return; - } - // The form is datatype\0date\0task status (old knowledge entry). - date = colq.substring(firstSeparatorPos + 1, firstSeparatorPos + 9); - // datatype = colq.substring(0, firstSeparatorPos); - } else { - // Ensure that we at least have yyyyMMdd. - if (firstSeparatorPos < 8) { - return; - } - // The form is shardId\0datatype. - date = colq.substring(0, 8); - } - - // Parse the UID.List object from the value. - try { - Uid.List uidList = Uid.List.parseFrom(value.get()); - if (uidList != null) { - uidCount = uidList.getCOUNT(); - uidListSize = uidList.getUIDList().size(); - } - } catch (InvalidProtocolBufferException e) { - // Don't add UID information. At least we know what shard it's located in. - } - - // Parse the UID.List object from the value. - try { - Uid.List uidList = Uid.List.parseFrom(value.get()); - if (uidList != null) { - uidCount = uidList.getCOUNT(); - uidListSize = uidList.getUIDList().size(); - } - } catch (InvalidProtocolBufferException e) { - // Don't add UID information. At least we know what shard it's located in. - } - } - - // This is now considered a valid term entry for aggregation. - valid = Optional.ofNullable(term).isPresent(); - } - - public String getTerm() { - return this.term; - } - - public String getField() { - return field; - } - - public String getDate() { - return date; - } - - public ColumnVisibility getVisibility() { - return visibility; - } - - public long getUidCount() { - return uidCount; - } - - public long getUidListSize() { - return uidListSize; - } - - public boolean isValid() { - return this.valid; - } - - @Override - public boolean equals(Object o) { - if (o instanceof TermOnlyEntry) { - TermOnlyEntry other = (TermOnlyEntry) o; - // @formatter:off - return new EqualsBuilder().append(getTerm(), other.getTerm()) - .append(getVisibility(), other.getVisibility()).isEquals(); - // @formatter:on - } - return false; - } - - @Override - public int hashCode() { - // @formatter:off - return new HashCodeBuilder().append(getTerm()) - .append(getVisibility()).toHashCode(); - // @formatter:on - } -} diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java index 40200c3bd28..7a396e4c673 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java @@ -1,7 +1,17 @@ package datawave.query.discovery; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.text.SimpleDateFormat; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; import java.util.concurrent.TimeUnit; import org.apache.accumulo.core.client.AccumuloClient; @@ -37,8 +47,6 @@ import datawave.query.util.MetadataHelperFactory; import datawave.util.TableName; -import static org.junit.jupiter.api.Assertions.assertTrue; - public class DiscoveryLogicTest { private static final Logger log = Logger.getLogger(DiscoveryLogicTest.class); @@ -242,8 +250,9 @@ private void assertQueryResults() throws Exception { while (iterator.hasNext()) { DiscoveredThing dtee = iterator.next(); actual.add(dtvoct.apply(dtee)); - // actual.add(dtvoct.apply(iterator.next())); - // actual.add(iterator.next()); + + //actual.add(dtvoct.apply(iterator.next())); + //actual.add(iterator.next()); } Assertions.assertThat(actual).hasSize(expected.size()); @@ -536,7 +545,7 @@ public void testValuesOnlyForLiterals() throws Exception { } @Test - public void test() { + public void testAccumuloCoreDataRange() { Key start = new Key("bbc"); Key stop = new Key("bbc\u0000"); Range range = new Range(start, true, stop, false); @@ -612,13 +621,6 @@ public void testValuesOnlyForFieldedLiteralsExtendedRange() throws Exception { givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); - // expect(new DiscoveredThing("onyx", "POKEMON", "csv", "20130101", "FOO", 100L, new MapWritable())); - // expect(new DiscoveredThing("onyx", "POKEMON", "csv", "20130102", "FOO", 10L, new MapWritable())); - // expect(new DiscoveredThing("onyx", "POKEMON", "csv", "20130103", "FOO", 1L, new MapWritable())); - // expect(new DiscoveredThing("onyx", "ROCK", "csv", "20130101", "FOO", 1L, new MapWritable())); - // expect(new DiscoveredThing("onyx", "ROCK", "csv", "20130102", "FOO", 3L, new MapWritable())); - // expect(new DiscoveredThing("onyx", "ROCK", "csv", "20130103", "FOO", 3L, new MapWritable())); - expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); assertQueryResults(); From 00892054f6859ee9580762181d6b3906cafcbdc1 Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Wed, 13 May 2026 16:45:18 +0000 Subject: [PATCH 11/16] Edited DiscoveryTransformer. --- .../query/discovery/DiscoveryTransformer.java | 91 +++++++++++++++---- .../query/discovery/DiscoveryLogicTest.java | 24 ++--- .../discovery/DiscoveryTransformerTest.java | 40 ++++++++ 3 files changed, 124 insertions(+), 31 deletions(-) create mode 100644 warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryTransformerTest.java diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java index 49a4998b730..41c798ee786 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java @@ -8,7 +8,9 @@ import java.util.function.BiFunction; import java.util.function.Function; +import datawave.microservice.query.QueryImpl; import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.io.Writable; import com.google.common.base.Preconditions; @@ -29,34 +31,35 @@ import datawave.webservice.result.BaseQueryResponse; import datawave.webservice.result.EventQueryResponseBase; +import static datawave.query.discovery.DiscoveryLogic.VALUES_ONLY; + public class DiscoveryTransformer extends BaseQueryLogicTransformer implements CacheableLogic { private List variableFieldList = null; - private BaseQueryLogic logic = null; - private QueryModel myQueryModel = null; + private final BaseQueryLogic logic; + private QueryModel myQueryModel; private MarkingFunctions markingFunctions; private final ResponseObjectFactory responseObjectFactory; + private boolean valuesOnly = false; - public DiscoveryTransformer(BaseQueryLogic logic, Query settings, QueryModel qm) { - super(new MarkingFunctions.Default()); - this.markingFunctions = logic.getMarkingFunctions(); - this.responseObjectFactory = logic.getResponseObjectFactory(); - this.logic = logic; - this.myQueryModel = qm; - } + /** + * Variant of a field list that contains only a value field. + */ + BiFunction,List>> generateValuesOnlyFieldList = (x, y) -> { + List> fields = new ArrayList<>(); - Function> markingsFromVisibility = x -> { - try { - return this.markingFunctions.translateFromColumnVisibility(new ColumnVisibility(x)); - } catch (Exception e) { - throw new RuntimeException("could not parse to markings: " + x); - } + fields.add(this.makeField("VALUE", y, "", 0L, x.getTerm())); + + return fields; }; - BiFunction,List> generateFieldList = (x, y) -> { - List fields = new ArrayList<>(); + /** + * Variant of field list that contains a standard, default set of fields. + */ + BiFunction,List>> generateFieldList = (x, y) -> { + List> fields = new ArrayList<>(); fields.add(this.makeField("VALUE", y, "", 0L, x.getTerm())); - /** + /* * Added query model to alias FIELD, if DiscoveredThing::field both not NULL and not empty. */ Optional fieldOFThing = Optional.ofNullable(x.getField()); @@ -82,6 +85,31 @@ public DiscoveryTransformer(BaseQueryLogic logic, Query setting return fields; }; + final Map,List>> > mapMapGenerator = new HashMap<>(); + { + mapMapGenerator.put("VALUES_ONLY",generateValuesOnlyFieldList); + mapMapGenerator.put("STANDARD",generateFieldList); + } + + /** + * Factory to get a particular field list generator. Different scenarios may call for different field lists. + * + * @param isValuesOnly + * @return + */ + BiFunction,List>> getMapGenerator(boolean isValuesOnly){ + return mapMapGenerator.get(isValuesOnly? "VALUES_ONLY": "STANDARD"); + } + + public DiscoveryTransformer(BaseQueryLogic logic, Query settings, QueryModel qm) { + super(new MarkingFunctions.Default()); + this.markingFunctions = logic.getMarkingFunctions(); + this.responseObjectFactory = logic.getResponseObjectFactory(); + this.logic = logic; + this.myQueryModel = qm; + this.valuesOnly = getOrDefaultBoolean(settings, VALUES_ONLY, false); + } + @Override public EventBase transform(DiscoveredThing thing) { Preconditions.checkNotNull(thing, "Received a null object to transform!"); @@ -91,7 +119,7 @@ public EventBase transform(DiscoveredThing thing) { Map markings = markingsFromVisibility.apply(thing.getColumnVisibility()); event.setMarkings(markings); - List fields = generateFieldList.apply(thing, markings); + List> fields = getMapGenerator(valuesOnly).apply(thing, markings); event.setFields(fields); event.setSizeInBytes(fields.size() * 6L); @@ -188,4 +216,29 @@ public Object readFromCache(CacheableQueryRow cacheableQueryRow) { event.setFields(fieldList); return event; } + + Function> markingsFromVisibility = x -> { + try { + return this.markingFunctions.translateFromColumnVisibility(new ColumnVisibility(x)); + } catch (Exception e) { + throw new RuntimeException("could not parse to markings: " + x); + } + }; + + /** + * If present, return the value of the given parameter from the given settings as a boolean, or return the default value otherwise. + */ + private boolean getOrDefaultBoolean(Query settings, String parameterName, boolean defaultValue) { + String value = getTrimmedParameter(settings, parameterName); + return StringUtils.isBlank(value) ? defaultValue : Boolean.parseBoolean(value); + } + + /** + * Return the trimmed value of the given parameter from the given settings, or null if a value is not present. + */ + private String getTrimmedParameter(Query settings, String parameterName) { + QueryImpl.Parameter parameter = settings.findParameter(parameterName); + return parameter != null ? parameter.getParameterValue().trim() : null; + } + } diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java index 7a396e4c673..0db739ed353 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java @@ -1,5 +1,6 @@ package datawave.query.discovery; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.text.SimpleDateFormat; @@ -30,11 +31,10 @@ import org.apache.accumulo.core.security.ColumnVisibility; import org.apache.hadoop.io.MapWritable; import org.apache.log4j.Logger; -import org.assertj.core.api.Assertions; -import org.junit.After; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import datawave.core.query.configuration.GenericQueryConfiguration; import datawave.core.query.result.event.DefaultResponseObjectFactory; @@ -62,16 +62,16 @@ public class DiscoveryLogicTest { private String query; private String startDate; private String endDate; - private Map parameters = new HashMap<>(); + private final Map parameters = new HashMap<>(); private final List expected = new ArrayList<>(); - @BeforeClass + @BeforeAll public static void setUp() { System.setProperty(MetadataHelperFactory.ALL_AUTHS_PROPERTY, QUERY_AUTHS); } - @Before + @BeforeEach public void setup() throws Throwable { initClient(); writeData(); @@ -222,7 +222,7 @@ private void initLogic() { logic.setMetadataHelperFactory(new MetadataHelperFactory()); } - @After + @AfterEach public void tearDown() throws Exception { query = null; startDate = null; @@ -255,12 +255,12 @@ private void assertQueryResults() throws Exception { //actual.add(iterator.next()); } - Assertions.assertThat(actual).hasSize(expected.size()); + assertEquals(expected.size(),actual.size()); for (int i = 0; i < expected.size(); i++) { DiscoveredThing actualThing = actual.get(i); DiscoveredThing expectedThing = expected.get(i); - Assertions.assertThat(actualThing).isEqualTo(expectedThing); - Assertions.assertThat(actualThing.getCountsByColumnVisibility()).isEqualTo(expectedThing.getCountsByColumnVisibility()); + assertEquals(expectedThing,actualThing); + assertEquals(expectedThing.getCountsByColumnVisibility(),actualThing.getCountsByColumnVisibility()); } } diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryTransformerTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryTransformerTest.java new file mode 100644 index 00000000000..fde4814843e --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryTransformerTest.java @@ -0,0 +1,40 @@ +package datawave.query.discovery; + +import datawave.core.query.logic.BaseQueryLogic; +import datawave.core.query.logic.QueryLogicTransformer; +import datawave.microservice.query.Query; +import datawave.query.model.QueryModel; +import datawave.query.tables.ShardIndexQueryTable; +import datawave.webservice.query.result.event.EventBase; +import org.apache.hadoop.io.MapWritable; +import org.junit.jupiter.api.Test; + +public class DiscoveryTransformerTest { + + @Test + public void testTermsOnlyDiscoveredThing() { + + // TODO: Work-in-progress + BaseQueryLogic logic = null; + Query settings = null; + QueryModel qm = null; + DiscoveryTransformer dt = new DiscoveryTransformer(logic,settings,qm); + + ShardIndexQueryTable siqt = new ShardIndexQueryTable(); + DiscoveredThing thing = new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable()); + + QueryLogicTransformer transformer = logic.getTransformer(settings); + + EventBase eb = transformer.transform(thing); + + + } + @Test + public void testSingleDiscoveredThing() { + // TODO: Work-in-progress + //DiscoveryTransformer dt = new DiscoveryTransformer(); + DiscoveredThing thing = new DiscoveredThing("bbc", "NETWORK", "csv", "20130101", "FOO", 240L, new MapWritable()); + //EventBase eb = dt.transform(thing); + + } +} From 7d2e1651f7d594d1e23e04837278cc207a1152be Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Fri, 15 May 2026 18:50:53 +0000 Subject: [PATCH 12/16] Added warning suppression for some legacy rawtypes. --- .../query/discovery/DiscoveryIterator.java | 4 +- .../query/discovery/DiscoveryTransformer.java | 43 ++++++++++--------- .../query/discovery/DiscoveryLogicTest.java | 10 ++--- .../discovery/DiscoveryTransformerTest.java | 13 +++--- 4 files changed, 37 insertions(+), 33 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java index 8127074f69d..26110f85af6 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java @@ -205,8 +205,8 @@ private void setTop(List things) throws IOException { String row = (this.reverseIndex ? new StringBuilder().append(thing.getTerm()).reverse().toString() : thing.getTerm()); Key newKey = new Key(row, thing.getField(), thing.getDate() + "\uffff"); - // Conditionally trim DiscoveredThing. (Perhaps to term and visibility). - //List thingsConditional = things.stream().map(discoveredThingTransformer).collect(Collectors.toList()); + // Conditionally trim DiscoveredThing. (Perhaps to term and visibility). + // List thingsConditional = things.stream().map(discoveredThingTransformer).collect(Collectors.toList()); // Create a value from the list of things. ArrayWritable thingArray = new ArrayWritable(DiscoveredThing.class, things.toArray(new DiscoveredThing[0])); diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java index 41c798ee786..efba3f1a7b0 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java @@ -1,5 +1,7 @@ package datawave.query.discovery; +import static datawave.query.discovery.DiscoveryLogic.VALUES_ONLY; + import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -8,7 +10,7 @@ import java.util.function.BiFunction; import java.util.function.Function; -import datawave.microservice.query.QueryImpl; +import datawave.webservice.query.result.event.*; import org.apache.accumulo.core.security.ColumnVisibility; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.io.Writable; @@ -21,18 +23,14 @@ import datawave.marking.MarkingFunctions; import datawave.marking.MarkingFunctions.Exception; import datawave.microservice.query.Query; +import datawave.microservice.query.QueryImpl; import datawave.query.model.QueryModel; import datawave.webservice.query.cachedresults.CacheableQueryRow; import datawave.webservice.query.exception.QueryException; -import datawave.webservice.query.result.event.EventBase; -import datawave.webservice.query.result.event.FieldBase; -import datawave.webservice.query.result.event.Metadata; -import datawave.webservice.query.result.event.ResponseObjectFactory; import datawave.webservice.result.BaseQueryResponse; import datawave.webservice.result.EventQueryResponseBase; -import static datawave.query.discovery.DiscoveryLogic.VALUES_ONLY; - +@SuppressWarnings({"rawtypes"}) public class DiscoveryTransformer extends BaseQueryLogicTransformer implements CacheableLogic { private List variableFieldList = null; private final BaseQueryLogic logic; @@ -44,8 +42,8 @@ public class DiscoveryTransformer extends BaseQueryLogicTransformer,List>> generateValuesOnlyFieldList = (x, y) -> { - List> fields = new ArrayList<>(); + BiFunction,List> generateValuesOnlyFieldList = (x, y) -> { + List fields = new ArrayList<>(); fields.add(this.makeField("VALUE", y, "", 0L, x.getTerm())); @@ -55,8 +53,8 @@ public class DiscoveryTransformer extends BaseQueryLogicTransformer,List>> generateFieldList = (x, y) -> { - List> fields = new ArrayList<>(); + BiFunction,List> generateFieldList = (x, y) -> { + List fields = new ArrayList<>(); fields.add(this.makeField("VALUE", y, "", 0L, x.getTerm())); /* @@ -85,20 +83,20 @@ public class DiscoveryTransformer extends BaseQueryLogicTransformer,List>> > mapMapGenerator = new HashMap<>(); + final Map,List>> mapMapGenerator = new HashMap<>(); { - mapMapGenerator.put("VALUES_ONLY",generateValuesOnlyFieldList); - mapMapGenerator.put("STANDARD",generateFieldList); + mapMapGenerator.put("VALUES_ONLY", generateValuesOnlyFieldList); + mapMapGenerator.put("STANDARD", generateFieldList); } /** - * Factory to get a particular field list generator. Different scenarios may call for different field lists. + * Factory to get a particular field list generator. Different scenarios may call for different field lists. * * @param isValuesOnly * @return */ - BiFunction,List>> getMapGenerator(boolean isValuesOnly){ - return mapMapGenerator.get(isValuesOnly? "VALUES_ONLY": "STANDARD"); + BiFunction,List> getMapGenerator(boolean isValuesOnly) { + return mapMapGenerator.get(isValuesOnly ? "VALUES_ONLY" : "STANDARD"); } public DiscoveryTransformer(BaseQueryLogic logic, Query settings, QueryModel qm) { @@ -110,6 +108,7 @@ public DiscoveryTransformer(BaseQueryLogic logic, Query setting this.valuesOnly = getOrDefaultBoolean(settings, VALUES_ONLY, false); } + @SuppressWarnings({"rawtypes","unchecked"}) @Override public EventBase transform(DiscoveredThing thing) { Preconditions.checkNotNull(thing, "Received a null object to transform!"); @@ -119,7 +118,7 @@ public EventBase transform(DiscoveredThing thing) { Map markings = markingsFromVisibility.apply(thing.getColumnVisibility()); event.setMarkings(markings); - List> fields = getMapGenerator(valuesOnly).apply(thing, markings); + List fields = getMapGenerator(valuesOnly).apply(thing, markings); event.setFields(fields); event.setSizeInBytes(fields.size() * 6L); @@ -133,8 +132,9 @@ public EventBase transform(DiscoveredThing thing) { return event; } - protected FieldBase makeField(String name, Map markings, String columnVisibility, Long timestamp, Object value) { - FieldBase field = this.responseObjectFactory.getField(); + @SuppressWarnings({"rawtypes"}) + protected FieldBase makeField(String name, Map markings, String columnVisibility, Long timestamp, Object value) { + FieldBase field = this.responseObjectFactory.getField(); field.setName(name); field.setMarkings(markings); field.setColumnVisibility(columnVisibility); @@ -143,6 +143,7 @@ protected FieldBase makeField(String name, Map markings, Strin return field; } + @SuppressWarnings({"rawtypes"}) @Override public BaseQueryResponse createResponse(List resultList) { EventQueryResponseBase response = this.responseObjectFactory.getEventQueryResponse(); @@ -157,6 +158,7 @@ public BaseQueryResponse createResponse(List resultList) { return response; } + @SuppressWarnings({"unchecked"}) @Override public CacheableQueryRow writeToCache(Object o) throws QueryException { EventBase event = (EventBase) o; @@ -176,6 +178,7 @@ public CacheableQueryRow writeToCache(Object o) throws QueryException { return cqo; } + @SuppressWarnings({"unchecked"}) @Override public Object readFromCache(CacheableQueryRow cacheableQueryRow) { if (this.variableFieldList == null) { diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java index 0db739ed353..0802f0de6cd 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java @@ -251,16 +251,16 @@ private void assertQueryResults() throws Exception { DiscoveredThing dtee = iterator.next(); actual.add(dtvoct.apply(dtee)); - //actual.add(dtvoct.apply(iterator.next())); - //actual.add(iterator.next()); + // actual.add(dtvoct.apply(iterator.next())); + // actual.add(iterator.next()); } - assertEquals(expected.size(),actual.size()); + assertEquals(expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { DiscoveredThing actualThing = actual.get(i); DiscoveredThing expectedThing = expected.get(i); - assertEquals(expectedThing,actualThing); - assertEquals(expectedThing.getCountsByColumnVisibility(),actualThing.getCountsByColumnVisibility()); + assertEquals(expectedThing, actualThing); + assertEquals(expectedThing.getCountsByColumnVisibility(), actualThing.getCountsByColumnVisibility()); } } diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryTransformerTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryTransformerTest.java index fde4814843e..d80dd9217a1 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryTransformerTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryTransformerTest.java @@ -1,13 +1,14 @@ package datawave.query.discovery; +import org.apache.hadoop.io.MapWritable; +import org.junit.jupiter.api.Test; + import datawave.core.query.logic.BaseQueryLogic; import datawave.core.query.logic.QueryLogicTransformer; import datawave.microservice.query.Query; import datawave.query.model.QueryModel; import datawave.query.tables.ShardIndexQueryTable; import datawave.webservice.query.result.event.EventBase; -import org.apache.hadoop.io.MapWritable; -import org.junit.jupiter.api.Test; public class DiscoveryTransformerTest { @@ -18,7 +19,7 @@ public void testTermsOnlyDiscoveredThing() { BaseQueryLogic logic = null; Query settings = null; QueryModel qm = null; - DiscoveryTransformer dt = new DiscoveryTransformer(logic,settings,qm); + DiscoveryTransformer dt = new DiscoveryTransformer(logic, settings, qm); ShardIndexQueryTable siqt = new ShardIndexQueryTable(); DiscoveredThing thing = new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable()); @@ -27,14 +28,14 @@ public void testTermsOnlyDiscoveredThing() { EventBase eb = transformer.transform(thing); - } + @Test public void testSingleDiscoveredThing() { // TODO: Work-in-progress - //DiscoveryTransformer dt = new DiscoveryTransformer(); + // DiscoveryTransformer dt = new DiscoveryTransformer(); DiscoveredThing thing = new DiscoveredThing("bbc", "NETWORK", "csv", "20130101", "FOO", 240L, new MapWritable()); - //EventBase eb = dt.transform(thing); + // EventBase eb = dt.transform(thing); } } From 13c8eb61cbd696ad9e7e17b6c76f3e4d030347f6 Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Tue, 19 May 2026 18:51:35 +0000 Subject: [PATCH 13/16] Edited DiscoveryTransformerTest. --- .../query/discovery/DiscoveryIterator.java | 20 +- .../query/discovery/DiscoveryTransformer.java | 10 +- .../discovery/DiscoveryTransformerTest.java | 290 +++++++++++++++++- 3 files changed, 296 insertions(+), 24 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java index 26110f85af6..3b2acc95d26 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java @@ -373,18 +373,28 @@ public boolean isValid() { @Override public boolean equals(Object o) { if (o instanceof TermEntry) { + // @formatter:off TermEntry other = (TermEntry) o; - return new EqualsBuilder().append(getTerm(), other.getTerm()).append(getField(), other.getField()) - .append(getVisibility(), other.getVisibility()).append(getDate(), other.getDate()).append(getDatatype(), other.getDatatype()) - .append(getUidCount(), other.getUidCount()).append(getUidListSize(), other.getUidListSize()).isEquals(); + return new EqualsBuilder().append(getTerm(), other.getTerm()) + .append(getField(), other.getField()) + .append(getVisibility(), other.getVisibility()) + .append(getDate(), other.getDate()) + .append(getDatatype(), other.getDatatype()) + .append(getUidCount(), other.getUidCount()) + .append(getUidListSize(), other.getUidListSize()).isEquals(); + // @formatter:on } return false; } @Override public int hashCode() { - return new HashCodeBuilder().append(getTerm()).append(getField()).append(getVisibility()).append(getDate()).append(getDatatype()) - .append(getUidCount()).append(getUidListSize()).toHashCode(); + // @formatter:off + return new HashCodeBuilder().append(getTerm()) + .append(getField()).append(getVisibility()) + .append(getDate()).append(getDatatype()) + .append(getUidCount()).append(getUidListSize()).toHashCode(); + // @formatter:on } } } diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java index efba3f1a7b0..d721a2ebab0 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java @@ -10,7 +10,6 @@ import java.util.function.BiFunction; import java.util.function.Function; -import datawave.webservice.query.result.event.*; import org.apache.accumulo.core.security.ColumnVisibility; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.io.Writable; @@ -26,7 +25,10 @@ import datawave.microservice.query.QueryImpl; import datawave.query.model.QueryModel; import datawave.webservice.query.cachedresults.CacheableQueryRow; -import datawave.webservice.query.exception.QueryException; +import datawave.webservice.query.result.event.EventBase; +import datawave.webservice.query.result.event.FieldBase; +import datawave.webservice.query.result.event.Metadata; +import datawave.webservice.query.result.event.ResponseObjectFactory; import datawave.webservice.result.BaseQueryResponse; import datawave.webservice.result.EventQueryResponseBase; @@ -108,7 +110,7 @@ public DiscoveryTransformer(BaseQueryLogic logic, Query setting this.valuesOnly = getOrDefaultBoolean(settings, VALUES_ONLY, false); } - @SuppressWarnings({"rawtypes","unchecked"}) + @SuppressWarnings({"rawtypes", "unchecked"}) @Override public EventBase transform(DiscoveredThing thing) { Preconditions.checkNotNull(thing, "Received a null object to transform!"); @@ -160,7 +162,7 @@ public BaseQueryResponse createResponse(List resultList) { @SuppressWarnings({"unchecked"}) @Override - public CacheableQueryRow writeToCache(Object o) throws QueryException { + public CacheableQueryRow writeToCache(Object o) { EventBase event = (EventBase) o; CacheableQueryRow cqo = responseObjectFactory.getCacheableQueryRow(); diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryTransformerTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryTransformerTest.java index d80dd9217a1..f989a7d0aeb 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryTransformerTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryTransformerTest.java @@ -1,41 +1,301 @@ package datawave.query.discovery; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.ByteArrayOutputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; + +import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBElement; +import javax.xml.bind.JAXBException; +import javax.xml.bind.Marshaller; +import javax.xml.namespace.QName; + +import org.apache.accumulo.core.client.AccumuloClient; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.accumulo.core.security.ColumnVisibility; import org.apache.hadoop.io.MapWritable; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import datawave.core.query.logic.BaseQueryLogic; -import datawave.core.query.logic.QueryLogicTransformer; +import datawave.accumulo.inmemory.InMemoryAccumuloClient; +import datawave.accumulo.inmemory.InMemoryInstance; +import datawave.core.query.result.event.DefaultResponseObjectFactory; +import datawave.marking.MarkingFunctions; import datawave.microservice.query.Query; +import datawave.microservice.query.QueryImpl; +import datawave.query.QueryParameters; +import datawave.query.QueryTestTableHelper; +import datawave.query.composite.CompositeMetadataHelper; import datawave.query.model.QueryModel; -import datawave.query.tables.ShardIndexQueryTable; +import datawave.query.util.AllFieldMetadataHelper; +import datawave.query.util.MetadataHelper; +import datawave.query.util.MetadataHelperFactory; +import datawave.query.util.TypeMetadataHelper; +import datawave.util.TableName; +import datawave.webservice.query.result.event.DefaultEvent; +import datawave.webservice.query.result.event.DefaultField; import datawave.webservice.query.result.event.EventBase; +import datawave.webservice.query.result.event.FieldBase; +import datawave.webservice.query.result.event.Metadata; public class DiscoveryTransformerTest { + private static final String METADATA_TABLE_NAME = "DatawaveMetadata"; + private MetadataHelper helper; + private AllFieldMetadataHelper allFieldHelper; + private final String[] authorizations = {"FOO", "BAR"}; + private static AccumuloClient client; + private DiscoveryLogic logic; + + @BeforeAll + public static void beforeAll() throws Exception { + InMemoryInstance instance = new InMemoryInstance(DiscoveryTransformerTest.class.getName()); + client = new InMemoryAccumuloClient("", instance); + client.tableOperations().create(METADATA_TABLE_NAME); + } + + @BeforeEach + public void setup() throws Throwable { + initLogic(); + } + private void initLogic() { + logic = new DiscoveryLogic(); + logic.setIndexTableName(TableName.SHARD_INDEX); + logic.setReverseIndexTableName(TableName.SHARD_RINDEX); + logic.setModelTableName(QueryTestTableHelper.METADATA_TABLE_NAME); + logic.setMetadataTableName(QueryTestTableHelper.METADATA_TABLE_NAME); + logic.setModelName("DATAWAVE"); + logic.setFullTableScanEnabled(false); + logic.setMaxResults(-1); + logic.setMaxWork(-1); + logic.setAllowLeadingWildcard(true); + logic.setResponseObjectFactory(new DefaultResponseObjectFactory()); + logic.setMarkingFunctions(new MarkingFunctions.Default()); + logic.setMetadataHelperFactory(new MetadataHelperFactory()); + } + + @BeforeEach + public void beforeEach() { + allFieldHelper = createAllFieldMetadataHelper(); + helper = createMetadataHelper(allFieldHelper); + } + + @SuppressWarnings({"rawtypes", "unchecked"}) @Test - public void testTermsOnlyDiscoveredThing() { + public void testTermsOnlyDiscoveredThing() throws TableNotFoundException { + + // Create settings. + Query settings = new QueryImpl(); + settings.setQuery("event:20241218_0/samplecsv/1.2.3"); + settings.addParameter(QueryParameters.DECODE_VIEW, "true"); + settings.setQueryAuthorizations("A"); + Map parameters = new HashMap<>(); + parameters.put(DiscoveryLogic.VALUES_ONLY, "true"); + settings.addParameters(parameters); - // TODO: Work-in-progress - BaseQueryLogic logic = null; - Query settings = null; - QueryModel qm = null; - DiscoveryTransformer dt = new DiscoveryTransformer(logic, settings, qm); + // Create query model. + QueryModel qm = helper.getQueryModel(METADATA_TABLE_NAME, "TEST_MODEL"); + + // Create transformer. + DiscoveryTransformer transformer = new DiscoveryTransformer(logic, settings, qm); - ShardIndexQueryTable siqt = new ShardIndexQueryTable(); DiscoveredThing thing = new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable()); - QueryLogicTransformer transformer = logic.getTransformer(settings); + EventBase expectedEventBase = new DefaultEvent(); + expectedEventBase.setMetadata(discoveredThingToMetadata(thing)); + Map markings = markingsFromVisibility.apply(thing.getColumnVisibility()); + + expectedEventBase.setMarkings(markings); + expectedEventBase.setFields(discoveredThingToFieldValuesOnly(thing, markings)); + expectedEventBase.setSizeInBytes(6L); + // This is the heart of the test. EventBase eb = transformer.transform(thing); + String serializeExpectedEvent = serializeEvent.apply(expectedEventBase); + String serializedEvent = serializeEvent.apply(eb); + assertEquals(serializeExpectedEvent, serializedEvent); } + @SuppressWarnings({"rawtypes", "unchecked"}) @Test - public void testSingleDiscoveredThing() { - // TODO: Work-in-progress - // DiscoveryTransformer dt = new DiscoveryTransformer(); + public void testTermsOnlySingleDiscoveredThingLoaded() throws TableNotFoundException { + // Create settings. + Query settings = new QueryImpl(); + settings.setQuery("event:20241218_0/samplecsv/1.2.3"); + settings.addParameter(QueryParameters.DECODE_VIEW, "true"); + settings.setQueryAuthorizations("A"); + Map parameters = new HashMap<>(); + parameters.put(DiscoveryLogic.VALUES_ONLY, "true"); + settings.addParameters(parameters); + + // Create query model. + QueryModel qm = helper.getQueryModel(METADATA_TABLE_NAME, "TEST_MODEL"); + + // Create transformer. + DiscoveryTransformer transformer = new DiscoveryTransformer(logic, settings, qm); + + DiscoveredThing thing = new DiscoveredThing("bbc", "NETWORK", "csv", "20130101", "FOO", 240L, new MapWritable()); + + EventBase expectedEventBase = new DefaultEvent(); + expectedEventBase.setMetadata(discoveredThingToMetadata(thing)); + Map markings = markingsFromVisibility.apply(thing.getColumnVisibility()); + + expectedEventBase.setMarkings(markings); + expectedEventBase.setFields(discoveredThingToFieldValuesOnly(thing, markings)); + expectedEventBase.setSizeInBytes(6L); + + // This is the heart of the test. + EventBase eb = transformer.transform(thing); + + String serializeExpectedEvent = serializeEvent.apply(expectedEventBase); + String serializedEvent = serializeEvent.apply(eb); + assertEquals(serializeExpectedEvent, serializedEvent); + } + + @SuppressWarnings({"rawtypes", "unchecked"}) + @Test + public void testSingleDiscoveredThing() throws TableNotFoundException { + // Create settings. + Query settings = new QueryImpl(); + settings.setQuery("event:20241218_0/samplecsv/1.2.3"); + settings.addParameter(QueryParameters.DECODE_VIEW, "true"); + settings.setQueryAuthorizations("A"); + Map parameters = new HashMap<>(); + parameters.put(DiscoveryLogic.VALUES_ONLY, "false"); + settings.addParameters(parameters); + + // Create query model. + QueryModel qm = helper.getQueryModel(METADATA_TABLE_NAME, "TEST_MODEL"); + + // Create transformer. + DiscoveryTransformer transformer = new DiscoveryTransformer(logic, settings, qm); + DiscoveredThing thing = new DiscoveredThing("bbc", "NETWORK", "csv", "20130101", "FOO", 240L, new MapWritable()); - // EventBase eb = dt.transform(thing); + EventBase expectedEventBase = new DefaultEvent(); + expectedEventBase.setMetadata(discoveredThingToMetadata(thing)); + Map markings = markingsFromVisibility.apply(thing.getColumnVisibility()); + + expectedEventBase.setMarkings(markings); + expectedEventBase.setFields(discoveredThingToField(thing, markings)); + expectedEventBase.setSizeInBytes(6L); + + // This is the heart of the test. + EventBase eb = transformer.transform(thing); + + String serializeExpectedEvent = serializeEvent.apply(expectedEventBase); + String serializedEvent = serializeEvent.apply(eb); + assertEquals(serializeExpectedEvent, serializedEvent); + } + + @SuppressWarnings("rawtypes") + private final Function serializeEvent = (x) -> { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + try { + JAXBContext jaxbContext = JAXBContext.newInstance(EventBase.class); + Marshaller marshaller = jaxbContext.createMarshaller(); + + // Construct a root element in order to unmarshall. + JAXBElement root = new JAXBElement<>(new QName("", "event"), EventBase.class, x); + marshaller.marshal(root, bos); + } catch (JAXBException e) { + throw new RuntimeException(e); + } + + return bos.toString(); + }; + + private final Function> markingsFromVisibility = x -> { + try { + return this.logic.getMarkingFunctions().translateFromColumnVisibility(new ColumnVisibility(x)); + } catch (MarkingFunctions.Exception e) { + throw new RuntimeException("could not parse to markings: " + x); + } + }; + + @SuppressWarnings("rawtypes") + private List discoveredThingToFieldValuesOnly(DiscoveredThing thing, Map markings) { + List fields = new ArrayList<>(); + + if (Optional.ofNullable(thing.getTerm()).isPresent()) { + fields.add(makeField("VALUE", markings, "", 0L, thing.getTerm())); + } + return fields; + } + + @SuppressWarnings("rawtypes") + private List discoveredThingToField(DiscoveredThing thing, Map markings) { + List fields = new ArrayList<>(); + + if (Optional.ofNullable(thing.getTerm()).isPresent()) { + fields.add(makeField("VALUE", markings, "", 0L, thing.getTerm())); + } + + if (Optional.ofNullable(thing.getField()).isPresent()) { + fields.add(makeField("FIELD", markings, "", 0L, thing.getField())); + } + + if (Optional.ofNullable(thing.getDate()).isPresent()) { + fields.add(makeField("DATE", markings, "", 0L, thing.getDate())); + } + + if (Optional.ofNullable(thing.getType()).isPresent()) { + fields.add(makeField("DATA TYPE", markings, "", 0L, thing.getType())); + } + + if (thing.getCount() > 0L) { + fields.add(makeField("RECORD COUNT", markings, "", 0L, String.valueOf(thing.getCount()))); + } + + return fields; + } + + private AllFieldMetadataHelper createAllFieldMetadataHelper() { + final Set allAuths = Collections.singleton(new Authorizations(authorizations)); + final Set auths = Collections.singleton(new Authorizations(authorizations)); + TypeMetadataHelper typeMetadataHelper = new TypeMetadataHelper(new HashMap<>(), auths, client, METADATA_TABLE_NAME, auths, false); + CompositeMetadataHelper compositeMetadataHelper = new CompositeMetadataHelper(client, METADATA_TABLE_NAME, auths); + return new AllFieldMetadataHelper(typeMetadataHelper, compositeMetadataHelper, client, METADATA_TABLE_NAME, auths, allAuths); + } + + @SuppressWarnings("rawtypes") + private FieldBase makeField(String name, Map markings, String columnVisibility, Long timestamp, Object value) { + FieldBase field = new DefaultField(); + field.setName(name); + field.setMarkings(markings); + field.setColumnVisibility(columnVisibility); + field.setTimestamp(timestamp); + field.setValue(value); + return field; + } + + private Metadata discoveredThingToMetadata(DiscoveredThing thing) { + Metadata metadata = new Metadata(); + metadata.setInternalId(""); + metadata.setDataType(thing.getType()); + metadata.setRow(thing.getTerm()); + metadata.setTable(logic.getTableName()); + return metadata; + } + + private MetadataHelper createMetadataHelper(AllFieldMetadataHelper allFieldHelper) { + if (allFieldHelper == null) { + allFieldHelper = createAllFieldMetadataHelper(); + } + + Set userAuths = Collections.singleton(new Authorizations(authorizations)); + Set metadataAuths = Collections.singleton(new Authorizations(authorizations)); + return new MetadataHelper(allFieldHelper, metadataAuths, client, METADATA_TABLE_NAME, userAuths, metadataAuths); } } From 610c355f46befebb3b02c2dd7a73d8ef50050bfb Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Wed, 20 May 2026 16:41:39 +0000 Subject: [PATCH 14/16] Addressed code review comments. --- ...dThingValuesOnlyConditionalTransformer.java | 8 +++++++- .../query/discovery/DiscoveryIterator.java | 6 +----- .../query/discovery/DiscoveryTransformer.java | 18 +++++++++--------- .../query/discovery/DiscoveryLogicTest.java | 16 ---------------- 4 files changed, 17 insertions(+), 31 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingValuesOnlyConditionalTransformer.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingValuesOnlyConditionalTransformer.java index 3255daef2a6..3c429eab5dc 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingValuesOnlyConditionalTransformer.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingValuesOnlyConditionalTransformer.java @@ -2,9 +2,15 @@ import java.util.function.UnaryOperator; +/** + * Functional + * + * Converts DiscoveredThing to one containing only the original "Term" and "Column Visibility." Other fields are defined the DiscoveredThing constructor for + * this scenario. The scenario here is a DiscoveredThing is created containing only "Term" and "Column Visibility." + */ public class DiscoveredThingValuesOnlyConditionalTransformer implements UnaryOperator { - boolean valuesOnly; + private final boolean valuesOnly; DiscoveredThingValuesOnlyConditionalTransformer(boolean valuesOnly) { this.valuesOnly = valuesOnly; diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java index 3b2acc95d26..42cd5868ff4 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java @@ -7,7 +7,6 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Optional; import java.util.Set; import java.util.function.BiFunction; import java.util.stream.Collectors; @@ -205,9 +204,6 @@ private void setTop(List things) throws IOException { String row = (this.reverseIndex ? new StringBuilder().append(thing.getTerm()).reverse().toString() : thing.getTerm()); Key newKey = new Key(row, thing.getField(), thing.getDate() + "\uffff"); - // Conditionally trim DiscoveredThing. (Perhaps to term and visibility). - // List thingsConditional = things.stream().map(discoveredThingTransformer).collect(Collectors.toList()); - // Create a value from the list of things. ArrayWritable thingArray = new ArrayWritable(DiscoveredThing.class, things.toArray(new DiscoveredThing[0])); Value newValue = new Value(WritableUtils.toByteArray(thingArray)); @@ -236,7 +232,7 @@ public void seek(Range range, Collection columnFamilies, boolean i this.columnFamilies = columnFamilies; this.iterator.seek(range, columnFamilies, inclusive); if (log.isTraceEnabled()) { - log.trace("My source " + ((Optional.ofNullable(iterator).isPresent() && this.iterator.hasTop()) ? "does" : "does not") + " have a top."); + log.trace("My source " + (this.iterator.hasTop() ? "does" : "does not") + " have a top."); } next(); } diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java index d721a2ebab0..7d8a171b608 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java @@ -41,6 +41,15 @@ public class DiscoveryTransformer extends BaseQueryLogicTransformer logic, Query settings, QueryModel qm) { + super(new MarkingFunctions.Default()); + this.markingFunctions = logic.getMarkingFunctions(); + this.responseObjectFactory = logic.getResponseObjectFactory(); + this.logic = logic; + this.myQueryModel = qm; + this.valuesOnly = getOrDefaultBoolean(settings, VALUES_ONLY, false); + } + /** * Variant of a field list that contains only a value field. */ @@ -101,15 +110,6 @@ BiFunction,List> getMapGenerator(b return mapMapGenerator.get(isValuesOnly ? "VALUES_ONLY" : "STANDARD"); } - public DiscoveryTransformer(BaseQueryLogic logic, Query settings, QueryModel qm) { - super(new MarkingFunctions.Default()); - this.markingFunctions = logic.getMarkingFunctions(); - this.responseObjectFactory = logic.getResponseObjectFactory(); - this.logic = logic; - this.myQueryModel = qm; - this.valuesOnly = getOrDefaultBoolean(settings, VALUES_ONLY, false); - } - @SuppressWarnings({"rawtypes", "unchecked"}) @Override public EventBase transform(DiscoveredThing thing) { diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java index 0802f0de6cd..b7f3353128a 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java @@ -1,7 +1,6 @@ package datawave.query.discovery; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -22,9 +21,7 @@ import org.apache.accumulo.core.client.BatchWriterConfig; import org.apache.accumulo.core.client.TableExistsException; import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Mutation; -import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.user.SummingCombiner; import org.apache.accumulo.core.security.Authorizations; @@ -250,9 +247,6 @@ private void assertQueryResults() throws Exception { while (iterator.hasNext()) { DiscoveredThing dtee = iterator.next(); actual.add(dtvoct.apply(dtee)); - - // actual.add(dtvoct.apply(iterator.next())); - // actual.add(iterator.next()); } assertEquals(expected.size(), actual.size()); @@ -544,16 +538,6 @@ public void testValuesOnlyForLiterals() throws Exception { assertQueryResults(); } - @Test - public void testAccumuloCoreDataRange() { - Key start = new Key("bbc"); - Key stop = new Key("bbc\u0000"); - Range range = new Range(start, true, stop, false); - - Key last = new Key("bbc"); - assertTrue(range.contains(last)); - } - @Test public void testValuesOnlyForLiteralsFalseSumCount() throws Exception { givenQuery("bbc OR onyx"); From 5c236a32722e36743b2f56bee4cdd19749109bd2 Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Thu, 21 May 2026 17:45:40 +0000 Subject: [PATCH 15/16] Addressed code review comments (2). --- .../datawave/query/discovery/DiscoveryIterator.java | 13 ++++--------- .../datawave/query/discovery/DiscoveryLogic.java | 2 +- .../query/discovery/DiscoveryTransformer.java | 2 +- .../query/discovery/DiscoveryLogicTest.java | 6 ------ 4 files changed, 6 insertions(+), 17 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java index 42cd5868ff4..5bf5a922259 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryIterator.java @@ -51,7 +51,6 @@ public class DiscoveryIterator implements SortedKeyValueIterator { private boolean reverseIndex = false; private boolean sumCounts = false; private boolean valuesOnly = false; - private DiscoveredThingValuesOnlyConditionalTransformer discoveredThingTransformer; @Override public DiscoveryIterator deepCopy(IteratorEnvironment env) { @@ -95,17 +94,14 @@ private Multimap getTermsByDatatype() throws IOException { // Otherwise, we only want to collect the term entries for the current field, term, and date of start. BiFunction dateMatchingFunction = sumCounts ? (first, second) -> true : this::datesMatch; - // if 'values only' is selected, then we should match on row rather than column family. - PartialKey partialKey = valuesOnly ? PartialKey.ROW : PartialKey.ROW_COLFAM; - // Find all matching entries and parse term entries from them. - while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), partialKey) && dateMatchingFunction.apply(start, key)) { + while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), PartialKey.ROW_COLFAM) && dateMatchingFunction.apply(start, key)) { TermEntry termEntry = new TermEntry(key, iterator.getTopValue()); if (termEntry.isValid()) { terms.put(termEntry.getDatatype(), termEntry); // if 'values only' is selected, then we only need a single TermEntry, the first one. The value of each - // term in TermEntry should be the identical as we iterate in this while. Therefore, the first one - // encountered will suffice. + // term in TermEntry should be the identical as we iterate in this 'while' loop. Therefore, the first + // one encountered will suffice. if (valuesOnly) { return terms; } @@ -245,7 +241,6 @@ public void init(SortedKeyValueIterator source, Map op this.reverseIndex = Boolean.parseBoolean(options.get(DiscoveryLogic.REVERSE_INDEX)); this.sumCounts = Boolean.parseBoolean(options.get(DiscoveryLogic.SUM_COUNTS)); this.valuesOnly = Boolean.parseBoolean(options.get(DiscoveryLogic.VALUES_ONLY)); - this.discoveredThingTransformer = new DiscoveredThingValuesOnlyConditionalTransformer(valuesOnly); if (valuesOnly) { sumCounts = false; } @@ -281,7 +276,7 @@ public Value getTopValue() { private static class TermEntry { private final String term; - private String field; + private final String field; private String date; private String datatype; private ColumnVisibility visibility; diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryLogic.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryLogic.java index dcdcba932b7..118f14f45fe 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryLogic.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryLogic.java @@ -639,7 +639,7 @@ public ShardIndexQueryTable clone() { */ private Iterator transformScanner(final BatchScanner scanner, final QueryData queryData, Set indexedFields) { return concat(transform(scanner.iterator(), new Function,Iterator>() { - DataInputBuffer in = new DataInputBuffer(); + final DataInputBuffer in = new DataInputBuffer(); @Override public Iterator apply(Entry from) { diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java index 7d8a171b608..dc30c85e8ee 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryTransformer.java @@ -81,7 +81,7 @@ public DiscoveryTransformer(BaseQueryLogic logic, Query setting if (x.getCountsByColumnVisibility() != null && !x.getCountsByColumnVisibility().isEmpty()) { for (Map.Entry entry : x.getCountsByColumnVisibility().entrySet()) { try { - Map eMarkings = this.markingFunctions.translateFromColumnVisibility(new ColumnVisibility(entry.getKey().toString())); + this.markingFunctions.translateFromColumnVisibility(new ColumnVisibility(entry.getKey().toString())); fields.add(this.makeField("RECORD COUNT", new HashMap<>(), entry.getKey().toString(), 0L, entry.getValue().toString())); } catch (Exception e) { throw new RuntimeException("could not parse to markings: " + x.getColumnVisibility()); diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java index b7f3353128a..6bbd32c680d 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java @@ -573,16 +573,11 @@ public void testValuesOnlyForPatternsNotFound() throws Exception { givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); - // We expect no results. Make sure we do not blow up. - // expect(new DiscoveredThing("bbc", "", "", "", "BAR&FOO", 0L, new MapWritable())); - // expect(new DiscoveredThing("onyx", "", "", "", "BAR&FOO", 0L, new MapWritable())); - assertQueryResults(); } @Test public void testValuesOnlyForFieldedLiterals() throws Exception { - // givenQuery("bbc OR onyx"); givenQuery("rock:onyx OR pokemon:onyx"); givenStartDate("20130101"); givenEndDate("20130102"); @@ -590,7 +585,6 @@ public void testValuesOnlyForFieldedLiterals() throws Exception { givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); - // expect(new DiscoveredThing("bbc", "", "", "", "BAR&FOO", 0L, new MapWritable())); expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); assertQueryResults(); From 53de981fe4bc833cc720431d1973c915681b1303 Mon Sep 17 00:00:00 2001 From: hoper-38709 <257851960+hoper-38709@users.noreply.github.com> Date: Wed, 27 May 2026 18:00:02 +0000 Subject: [PATCH 16/16] Updated DiscoveryLogic adding UniqueFilter. --- .../discovery/DiscoveredThingTermIsotope.java | 40 +++++++++++++++++++ .../query/discovery/DiscoveryLogic.java | 8 +++- .../query/discovery/DiscoveryLogicTest.java | 17 ++++++-- 3 files changed, 61 insertions(+), 4 deletions(-) create mode 100644 warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingTermIsotope.java diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingTermIsotope.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingTermIsotope.java new file mode 100644 index 00000000000..0239f0e1702 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveredThingTermIsotope.java @@ -0,0 +1,40 @@ +package datawave.query.discovery; + +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; +import org.apache.hadoop.io.MapWritable; + +/** + * A variant of DiscoveredThing in which equality if defined as comparing getTerm() only. + */ +public class DiscoveredThingTermIsotope extends DiscoveredThing { + public DiscoveredThingTermIsotope(String term, String field, String type, String date, String columnVisibility, long count, + MapWritable countsByColumnVisibility) { + super(term, field, type, date, columnVisibility, count, countsByColumnVisibility); + + } + + public DiscoveredThingTermIsotope(String term, String columnVisibility) { + super(term, columnVisibility); + } + + public DiscoveredThingTermIsotope() { + super(); + } + + @Override + public boolean equals(Object o) { + if (o instanceof DiscoveredThingTermIsotope) { + DiscoveredThingTermIsotope other = (DiscoveredThingTermIsotope) o; + return new EqualsBuilder().append(getTerm(), other.getTerm()).isEquals(); + } + return false; + } + + @Override + public int hashCode() { + // Ignore super results. + // super.hashCode(); + return new HashCodeBuilder().append(getTerm()).toHashCode(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryLogic.java b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryLogic.java index 118f14f45fe..a0eb4316bf3 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryLogic.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/DiscoveryLogic.java @@ -27,6 +27,7 @@ import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.security.Authorizations; +import org.apache.commons.collections4.iterators.UniqueFilterIterator; import org.apache.commons.jexl3.parser.ASTJexlScript; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.math.LongRange; @@ -120,6 +121,11 @@ public DiscoveryLogic(DiscoveryLogic other) { this.metadataHelper = other.metadataHelper; } + @Override + public Iterator iterator() { + return getValuesOnly() ? new UniqueFilterIterator<>(this.iterator) : this.iterator; + } + @Override public DiscoveryQueryConfiguration getConfig() { if (this.config == null) { @@ -646,7 +652,7 @@ public Iterator apply(Entry from) { queryData.setLastResult(from.getKey()); Value value = from.getValue(); in.reset(value.get(), value.getSize()); - ArrayWritable aw = new ArrayWritable(DiscoveredThing.class); + ArrayWritable aw = new ArrayWritable(DiscoveredThingTermIsotope.class); try { aw.readFields(in); } catch (IOException e) { diff --git a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java index 6bbd32c680d..81ccd7957e9 100644 --- a/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/discovery/DiscoveryLogicTest.java @@ -1,6 +1,7 @@ package datawave.query.discovery; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -253,7 +254,19 @@ private void assertQueryResults() throws Exception { for (int i = 0; i < expected.size(); i++) { DiscoveredThing actualThing = actual.get(i); DiscoveredThing expectedThing = expected.get(i); - assertEquals(expectedThing, actualThing); + assertInstanceOf(DiscoveredThing.class, actualThing); + // N.B.: DiscoveredThingTermIsotope extends DiscoveredThing. Convert to common format for comparison. + //@formatter:off + DiscoveredThing actualThingg = new DiscoveredThing(actualThing.getTerm(), + actualThing.getField(), + actualThing.getType(), + actualThing.getDate(), + actualThing.getColumnVisibility(), + actualThing.getCount(), + actualThing.getCountsByColumnVisibility()); + //@formatter:on + assertEquals(expectedThing, actualThingg); + assertEquals(expectedThing.getCountsByColumnVisibility(), actualThing.getCountsByColumnVisibility()); } } @@ -585,7 +598,6 @@ public void testValuesOnlyForFieldedLiterals() throws Exception { givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); - expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); assertQueryResults(); } @@ -599,7 +611,6 @@ public void testValuesOnlyForFieldedLiteralsExtendedRange() throws Exception { givenParameter(DiscoveryLogic.SUM_COUNTS, "true"); givenParameter(DiscoveryLogic.VALUES_ONLY, "true"); - expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable())); assertQueryResults(); }