Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package datawave.query.discovery;

import java.util.function.UnaryOperator;

import org.apache.hadoop.io.MapWritable;

public class DiscoveredThingValuesOnlyConditionalTransformer implements UnaryOperator<DiscoveredThing> {
Comment thread
hoper-38709 marked this conversation as resolved.

boolean valuesOnly = false;

DiscoveredThingValuesOnlyConditionalTransformer(boolean valuesOnly) {
this.valuesOnly = valuesOnly;
}

public DiscoveredThing apply(DiscoveredThing dt) {
// @formatter:off
return (valuesOnly) ? new DiscoveredThing(dt.getTerm(),
"",
"",
"",
dt.getColumnVisibility(),
0L,
new MapWritable())
: dt;
// @formatter:on
Comment thread
hoper-38709 marked this conversation as resolved.
Outdated
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import org.apache.accumulo.core.iterators.IteratorEnvironment;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.accumulo.core.security.ColumnVisibility;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapWritable;
Expand All @@ -27,6 +29,7 @@
import org.apache.log4j.Logger;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
import com.google.protobuf.InvalidProtocolBufferException;

Expand All @@ -46,6 +49,7 @@ public class DiscoveryIterator implements SortedKeyValueIterator<Key,Value> {
private boolean showReferenceCount = false;
private boolean reverseIndex = false;
private boolean sumCounts = false;
private boolean valuesOnly = false;

@Override
public DiscoveryIterator deepCopy(IteratorEnvironment env) {
Expand All @@ -61,7 +65,7 @@ public void next() throws IOException {

while (iterator.hasTop() && key == null) {
// Get the entries to aggregate.
Multimap<String,TermEntry> terms = getTermsByDatatype();
Multimap<String,TermEntry> terms = this.valuesOnly ? getTermsOnly() : getTermsByDatatype();
if (terms.isEmpty()) {
log.trace("Couldn't aggregate index info; moving onto next date/field/term if data is available.");
} else {
Expand All @@ -84,8 +88,8 @@ private Multimap<String,TermEntry> getTermsByDatatype() throws IOException {
Multimap<String,TermEntry> terms = ArrayListMultimap.create();
Key start = new Key(iterator.getTopKey());
Key key;
// If we should sum up counts, we want to collect the term entries for each date seen for the current field and term of start. Otherwise, we only want
// to collect the term entries for the current field, term, and date of start.
// If we should sum up counts, we want to collect the term entries for each date seen for the current field and term of start.
// Otherwise, we only want to collect the term entries for the current field, term, and date of start.
BiFunction<Key,Key,Boolean> dateMatchingFunction = sumCounts ? (first, second) -> true : this::datesMatch;
// Find all matching entries and parse term entries from them.
while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), PartialKey.ROW_COLFAM) && dateMatchingFunction.apply(start, key)) {
Expand All @@ -102,6 +106,45 @@ private Multimap<String,TermEntry> getTermsByDatatype() throws IOException {
return terms;
}

private Multimap<String,TermEntry> getTermsOnly() throws IOException {
LinkedHashMultimap<String,TermEntry> terms = LinkedHashMultimap.create();
Key start = new Key(iterator.getTopKey());
Key key;
// If we should sum up counts, we want to collect the term entries for each date seen for the current field and term of start.
// Otherwise, we only want to collect the term entries for the current field, term, and date of start.
BiFunction<Key,Key,Boolean> dateMatchingFunction = sumCounts ? (first, second) -> true : this::datesMatch;
// Find all matching entries and parse term entries from them.

while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), PartialKey.ROW_COLFAM) && dateMatchingFunction.apply(start, key)) {
Comment thread
hoper-38709 marked this conversation as resolved.
Outdated
TermEntry termEntry = new TermEntry(key, iterator.getTopValue()) {
// Only use term and visibility for equality.
@Override
public boolean equals(Object o) {
Comment thread
hoper-38709 marked this conversation as resolved.
Outdated
if (o instanceof TermEntry) {
TermEntry other = (TermEntry) o;
return new EqualsBuilder().append(getTerm(), other.getTerm()).append(getVisibility(), other.getVisibility()).isEquals();
}
return false;
}

@Override
public int hashCode() {
return new HashCodeBuilder().append(getTerm()).append(getVisibility()).toHashCode();
}
};

if (termEntry.isValid())
terms.put(termEntry.getDatatype(), termEntry);
else {
if (log.isTraceEnabled()) {
log.trace("Received invalid term entry from key: " + key);
}
}
iterator.next();
}
return terms;
}

/**
* Return true if the dates for the two keys match, or false otherwise.
*/
Expand Down Expand Up @@ -180,8 +223,8 @@ private DiscoveredThing aggregate(Collection<TermEntry> termEntries) {
* Set the top {@link Key} and {@link Value} of this iterator, created from the given list of {@link DiscoveredThing} instances.
*/
private void setTop(List<DiscoveredThing> things) {
// We want the key to be the last possible key for this date. Return the key as it is in the index (reversed if necessary) to ensure the keys are
// consistent with the initial seek range.
// We want the key to be the last possible key for this date. Return the key as it is in the index (reversed if
// necessary) to ensure the keys are consistent with the initial seek range.
DiscoveredThing thing = things.get(0);
String row = (this.reverseIndex ? new StringBuilder().append(thing.getTerm()).reverse().toString() : thing.getTerm());
Key newKey = new Key(row, thing.getField(), thing.getDate() + "\uffff");
Expand Down Expand Up @@ -210,13 +253,15 @@ public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> op
this.showReferenceCount = Boolean.parseBoolean(options.get(DiscoveryLogic.SHOW_REFERENCE_COUNT));
this.reverseIndex = Boolean.parseBoolean(options.get(DiscoveryLogic.REVERSE_INDEX));
this.sumCounts = Boolean.parseBoolean(options.get(DiscoveryLogic.SUM_COUNTS));
this.valuesOnly = Boolean.parseBoolean(options.get(DiscoveryLogic.VALUES_ONLY));
Comment thread
hoper-38709 marked this conversation as resolved.

if (log.isTraceEnabled()) {
log.trace("Source: " + source.getClass().getName());
log.trace("Separate counts by column visibility: " + this.separateCountsByColVis);
log.trace("Show reference counts only: " + this.showReferenceCount);
log.trace("Reverse index: " + this.reverseIndex);
log.trace("Sum counts: " + this.sumCounts);
log.trace("Values only: " + this.valuesOnly);
}
}

Expand All @@ -241,7 +286,7 @@ public Value getTopValue() {
private static class TermEntry {

private final String term;
private final String field;
private String field;
private String date;
private String datatype;
private ColumnVisibility visibility;
Expand Down Expand Up @@ -325,5 +370,22 @@ public long getUidListSize() {
public boolean isValid() {
return valid;
}

@Override
public boolean equals(Object o) {
if (o instanceof TermEntry) {
TermEntry other = (TermEntry) o;
return new EqualsBuilder().append(getTerm(), other.getTerm()).append(getField(), other.getField())
.append(getVisibility(), other.getVisibility()).append(getDate(), other.getDate()).append(getDatatype(), other.getDatatype())
.append(getUidCount(), other.getUidCount()).append(getUidListSize(), other.getUidListSize()).isEquals();
}
return false;
}

@Override
public int hashCode() {
return new HashCodeBuilder().append(getTerm()).append(getField()).append(getVisibility()).append(getDate()).append(getDatatype())
.append(getUidCount()).append(getUidListSize()).toHashCode();
}
Comment thread
hoper-38709 marked this conversation as resolved.
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ public class DiscoveryLogic extends ShardIndexQueryTable {
*/
public static final String SUM_COUNTS = "sum.counts";

/**
* Used to specify a unique list of values not associated with a field.
*/
public static final String VALUES_ONLY = "values.only";

/**
* Used to specify whether to search against the reversed index.
*/
Expand Down Expand Up @@ -151,6 +156,9 @@ public GenericQueryConfiguration initialize(AccumuloClient client, Query setting
// Check if counts should be summed.
setSumCounts(getOrDefaultBoolean(settings, SUM_COUNTS, getSumCounts()));

// Specify values only. Treat associated field, data type, and the like as "don't care."
setValuesOnly(getOrDefaultBoolean(settings, VALUES_ONLY, false));

// Check if any datatype filters were specified.
getConfig().setDatatypeFilter(getOrDefaultSet(settings, QueryParameters.DATATYPE_FILTER_SET, getConfig().getDatatypeFilter()));

Expand Down Expand Up @@ -580,6 +588,7 @@ private IteratorSetting configureDiscoveryIterator(DiscoveryQueryConfiguration c
setting.addOption(SEPARATE_COUNTS_BY_COLVIS, Boolean.toString(config.getSeparateCountsByColVis()));
setting.addOption(SHOW_REFERENCE_COUNT, Boolean.toString(config.getShowReferenceCount()));
setting.addOption(SUM_COUNTS, Boolean.toString(config.getSumCounts()));
setting.addOption(VALUES_ONLY, Boolean.toString(config.getValuesOnly()));
return setting;
}

Expand Down Expand Up @@ -689,4 +698,12 @@ public boolean getSumCounts() {
public void setSumCounts(boolean sumCounts) {
getConfig().setSumCounts(sumCounts);
}

public void setValuesOnly(boolean valuesOnly) {
getConfig().setValuesOnly(valuesOnly);
}

public boolean getValuesOnly() {
return getConfig().getValuesOnly();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ public class DiscoveryQueryConfiguration extends ShardIndexQueryConfiguration im
private boolean separateCountsByColVis = false;
private boolean showReferenceCount = false;
private boolean sumCounts = false;
private boolean valuesOnly = false;

public DiscoveryQueryConfiguration() {}

Expand Down Expand Up @@ -131,6 +132,10 @@ public boolean getSumCounts() {
return sumCounts;
}

public boolean getValuesOnly() {
return valuesOnly;
}

public void setSeparateCountsByColVis(boolean separateCountsByColVis) {
this.separateCountsByColVis = separateCountsByColVis;
}
Expand All @@ -144,6 +149,10 @@ public void setSumCounts(boolean sumCounts) {
this.sumCounts = sumCounts;
}

public void setValuesOnly(boolean valuesOnly) {
this.valuesOnly = valuesOnly;
}

@Override
public DiscoveryQueryConfiguration checkpoint() {
// Create a new config that only contains what is needed to execute the specified ranges
Expand Down Expand Up @@ -172,6 +181,6 @@ public int hashCode() {
public String toString() {
return new StringJoiner(", ", DiscoveryQueryConfiguration.class.getSimpleName() + "[", "]").add("literals=" + literals).add("patterns=" + patterns)
.add("ranges=" + ranges).add("separateCountsByColVis=" + separateCountsByColVis).add("showReferenceCount=" + showReferenceCount)
.add("sumCounts=" + sumCounts).toString();
.add("sumCounts=" + sumCounts).add("valuesOnly=" + valuesOnly).toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import org.apache.accumulo.core.security.ColumnVisibility;
import org.apache.hadoop.io.Writable;
Expand Down Expand Up @@ -58,9 +59,12 @@ public EventBase transform(DiscoveredThing thing) {

fields.add(this.makeField("VALUE", markings, "", 0L, thing.getTerm()));
/**
* Added query model to alias FIELD
* Added query model to alias FIELD, if DiscoveredThing::field both not NULL and not empty.
Comment thread
hoper-38709 marked this conversation as resolved.
*/
fields.add(this.makeField("FIELD", markings, "", 0L, myQueryModel.aliasFieldNameReverseModel(thing.getField())));
Optional<String> fieldOFThing = Optional.ofNullable(thing.getField());
fieldOFThing.filter(i -> !i.isBlank())
.ifPresent(i -> fields.add(this.makeField("FIELD", markings, "", 0L, myQueryModel.aliasFieldNameReverseModel(i))));

fields.add(this.makeField("DATE", markings, "", 0L, thing.getDate()));
fields.add(this.makeField("DATA TYPE", markings, "", 0L, thing.getType()));

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package datawave.query.discovery;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;

import org.apache.hadoop.io.MapWritable;
import org.junit.jupiter.api.Test;

public class DiscoveredThingTest {
@Test
public void testDiscoveredThingSimpleEqualityTest() {
DiscoveredThing thing1 = new DiscoveredThing("bbc", "NETWORK", "csv", "20130101", "FOO", 240L, new MapWritable());
DiscoveredThing thing2 = new DiscoveredThing("bbc", "NETWORK", "csv", "20130101", "FOO", 240L, new MapWritable());
assertEquals(thing1, thing2);

DiscoveredThing thing3 = new DiscoveredThing("", "", "", "", "", 0L, new MapWritable());
DiscoveredThing thing4 = new DiscoveredThing("", "", "", "", "", 0L, new MapWritable());
assertEquals(thing3, thing4);

DiscoveredThing thing5 = new DiscoveredThing("", "", "", "", "", 0L, null);
DiscoveredThing thing6 = new DiscoveredThing("", "", "", "", "", 0L, null);
assertEquals(thing5, thing6);

}

@Test
public void testDiscoveredThingSimpleInequalityTest() {
DiscoveredThing thing1 = new DiscoveredThing("bbc", "NETWORK", "csv", "20130101", "FOO", 240L, new MapWritable());
DiscoveredThing thing2 = new DiscoveredThing("bbc", "NETWORK", "csv", "20130102", "FOO", 240L, new MapWritable());
assertNotEquals(thing1, thing2);

DiscoveredThing thing3 = new DiscoveredThing("", "wanda", "", "", "", 0L, new MapWritable());
DiscoveredThing thing4 = new DiscoveredThing("", "panda", "", "", "", 0L, new MapWritable());
assertNotEquals(thing3, thing4);

DiscoveredThing thing5 = new DiscoveredThing("", "wands", "", "", "", 0L, null);
DiscoveredThing thing6 = new DiscoveredThing("", "wands", "", "", "", 0L, new MapWritable());
assertNotEquals(thing5, thing6);

DiscoveredThing thing7 = new DiscoveredThing("", "wanda", "", "", "", 0L, null);
DiscoveredThing thing8 = new DiscoveredThing("", "panda", "", "", "", 0L, null);
assertNotEquals(thing7, thing8);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,9 @@ private void assertQueryResults() throws Exception {
logic.setupQuery(config);
Iterator<DiscoveredThing> iterator = logic.iterator();
List<DiscoveredThing> actual = new ArrayList<>();
DiscoveredThingValuesOnlyConditionalTransformer dtvoct = new DiscoveredThingValuesOnlyConditionalTransformer(logic.getValuesOnly());
while (iterator.hasNext()) {
actual.add(iterator.next());
actual.add(dtvoct.apply(iterator.next()));
}

Assertions.assertThat(actual).hasSize(expected.size());
Expand Down Expand Up @@ -520,4 +521,20 @@ public void testSumCountsForReverse() throws Exception {

assertQueryResults();
}

@Test
public void testValuesOnlyForLiterals() throws Exception {
givenQuery("bbc OR onyx");
givenStartDate("20130101");
givenEndDate("20130102");
givenParameter(DiscoveryLogic.SUM_COUNTS, "true");
givenParameter(DiscoveryLogic.VALUES_ONLY, "true");

expect(new DiscoveredThing("bbc", "", "", "", "FOO", 0L, new MapWritable()));
expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable()));
expect(new DiscoveredThing("onyx", "", "", "", "FOO", 0L, new MapWritable()));
expect(new DiscoveredThing("onyx", "", "", "", "BAR", 0L, new MapWritable()));

assertQueryResults();
}
}
Loading