Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,16 @@ public DiscoveredThing(String term, String field, String type, String date, Stri
this.countsByColumnVisibility = countsByColumnVisibility;
}

public DiscoveredThing(String term, String columnVisibility) {
this.term = term;
this.field = "";
this.type = "";
this.date = "";
this.columnVisibility = columnVisibility;
this.count = new VLongWritable(0L);
this.countsByColumnVisibility = new MapWritable();
}

public DiscoveredThing() {
count = new VLongWritable();
countsByColumnVisibility = new MapWritable();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package datawave.query.discovery;

import java.util.function.UnaryOperator;

public class DiscoveredThingValuesOnlyConditionalTransformer implements UnaryOperator<DiscoveredThing> {
Comment thread
hoper-38709 marked this conversation as resolved.

boolean valuesOnly;
Comment thread
hoper-38709 marked this conversation as resolved.
Outdated

DiscoveredThingValuesOnlyConditionalTransformer(boolean valuesOnly) {
this.valuesOnly = valuesOnly;
}

public DiscoveredThing apply(DiscoveredThing dt) {
if (valuesOnly) {
return new DiscoveredThing(dt.getTerm(), dt.getColumnVisibility());
}
return dt;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
Expand All @@ -19,6 +20,8 @@
import org.apache.accumulo.core.iterators.IteratorEnvironment;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.accumulo.core.security.ColumnVisibility;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapWritable;
Expand All @@ -41,11 +44,15 @@ public class DiscoveryIterator implements SortedKeyValueIterator<Key,Value> {

private Key key;
private Value value;
private Range lastRange;
private Collection<ByteSequence> columnFamilies;
private SortedKeyValueIterator<Key,Value> iterator;
private boolean separateCountsByColVis = false;
private boolean showReferenceCount = false;
private boolean reverseIndex = false;
private boolean sumCounts = false;
private boolean valuesOnly = false;
private DiscoveredThingValuesOnlyConditionalTransformer discoveredThingTransformer;

@Override
public DiscoveryIterator deepCopy(IteratorEnvironment env) {
Expand Down Expand Up @@ -84,15 +91,26 @@ private Multimap<String,TermEntry> getTermsByDatatype() throws IOException {
Multimap<String,TermEntry> terms = ArrayListMultimap.create();
Key start = new Key(iterator.getTopKey());
Key key;
// If we should sum up counts, we want to collect the term entries for each date seen for the current field and term of start. Otherwise, we only want
// to collect the term entries for the current field, term, and date of start.

// If we should sum up counts, we want to collect the term entries for each date seen for the current field and term of start.
// Otherwise, we only want to collect the term entries for the current field, term, and date of start.
BiFunction<Key,Key,Boolean> dateMatchingFunction = sumCounts ? (first, second) -> true : this::datesMatch;

// if 'values only' is selected, then we should match on row rather than column family.
PartialKey partialKey = valuesOnly ? PartialKey.ROW : PartialKey.ROW_COLFAM;

// Find all matching entries and parse term entries from them.
while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), PartialKey.ROW_COLFAM) && dateMatchingFunction.apply(start, key)) {
while (iterator.hasTop() && start.equals((key = iterator.getTopKey()), partialKey) && dateMatchingFunction.apply(start, key)) {
TermEntry termEntry = new TermEntry(key, iterator.getTopValue());
if (termEntry.isValid())
if (termEntry.isValid()) {
terms.put(termEntry.getDatatype(), termEntry);
else {
// if 'values only' is selected, then we only need a single TermEntry, the first one. The value of each
// term in TermEntry should be the identical as we iterate in this while. Therefore, the first one
// encountered will suffice.
if (valuesOnly) {
return terms;
Comment thread
hoper-38709 marked this conversation as resolved.
}
} else {
if (log.isTraceEnabled()) {
log.trace("Received invalid term entry from key: " + key);
}
Expand Down Expand Up @@ -179,26 +197,46 @@ private DiscoveredThing aggregate(Collection<TermEntry> termEntries) {
/**
* Set the top {@link Key} and {@link Value} of this iterator, created from the given list of {@link DiscoveredThing} instances.
*/
private void setTop(List<DiscoveredThing> things) {
// We want the key to be the last possible key for this date. Return the key as it is in the index (reversed if necessary) to ensure the keys are
// consistent with the initial seek range.
private void setTop(List<DiscoveredThing> things) throws IOException {

// We want the key to be the last possible key for this date. Return the key as it is in the index (reversed if
// necessary) to ensure the keys are consistent with the initial seek range.
DiscoveredThing thing = things.get(0);
String row = (this.reverseIndex ? new StringBuilder().append(thing.getTerm()).reverse().toString() : thing.getTerm());
Key newKey = new Key(row, thing.getField(), thing.getDate() + "\uffff");

// Conditionally trim DiscoveredThing. (Perhaps to term and visibility).
// List<DiscoveredThing> thingsConditional = things.stream().map(discoveredThingTransformer).collect(Collectors.toList());
Comment thread
hoper-38709 marked this conversation as resolved.
Outdated

// Create a value from the list of things.
ArrayWritable thingArray = new ArrayWritable(DiscoveredThing.class, things.toArray(new DiscoveredThing[0]));
Value newValue = new Value(WritableUtils.toByteArray(thingArray));
if (valuesOnly) {
Key skipKey = new Key(row, "\uffff");
if (!columnFamilies.isEmpty()) {
skipKey = new Key(row, thing.getField(), "\uffff");
Comment thread
hoper-38709 marked this conversation as resolved.
}

if (lastRange.contains(skipKey)) {
this.key = skipKey;
Range nextRange = new Range(skipKey, false, lastRange.getEndKey(), lastRange.isEndKeyInclusive());
this.iterator.seek(nextRange, columnFamilies, false);
}
} else {
this.key = newKey;

}

this.key = newKey;
this.value = newValue;
}

@Override
public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
this.lastRange = range;
this.columnFamilies = columnFamilies;
this.iterator.seek(range, columnFamilies, inclusive);
if (log.isTraceEnabled()) {
log.trace("My source " + (this.iterator.hasTop() ? "does" : "does not") + " have a top.");
log.trace("My source " + ((Optional.ofNullable(iterator).isPresent() && this.iterator.hasTop()) ? "does" : "does not") + " have a top.");
Comment thread
hoper-38709 marked this conversation as resolved.
Outdated
}
next();
}
Expand All @@ -210,13 +248,19 @@ public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> op
this.showReferenceCount = Boolean.parseBoolean(options.get(DiscoveryLogic.SHOW_REFERENCE_COUNT));
this.reverseIndex = Boolean.parseBoolean(options.get(DiscoveryLogic.REVERSE_INDEX));
this.sumCounts = Boolean.parseBoolean(options.get(DiscoveryLogic.SUM_COUNTS));
this.valuesOnly = Boolean.parseBoolean(options.get(DiscoveryLogic.VALUES_ONLY));
Comment thread
hoper-38709 marked this conversation as resolved.
this.discoveredThingTransformer = new DiscoveredThingValuesOnlyConditionalTransformer(valuesOnly);
if (valuesOnly) {
sumCounts = false;
}

if (log.isTraceEnabled()) {
log.trace("Source: " + source.getClass().getName());
log.trace("Separate counts by column visibility: " + this.separateCountsByColVis);
log.trace("Show reference counts only: " + this.showReferenceCount);
log.trace("Reverse index: " + this.reverseIndex);
log.trace("Sum counts: " + this.sumCounts);
log.trace("Values only: " + this.valuesOnly);
}
}

Expand All @@ -241,7 +285,7 @@ public Value getTopValue() {
private static class TermEntry {

private final String term;
private final String field;
private String field;
private String date;
private String datatype;
private ColumnVisibility visibility;
Expand Down Expand Up @@ -325,5 +369,32 @@ public long getUidListSize() {
public boolean isValid() {
return valid;
}

@Override
public boolean equals(Object o) {
if (o instanceof TermEntry) {
// @formatter:off
TermEntry other = (TermEntry) o;
return new EqualsBuilder().append(getTerm(), other.getTerm())
.append(getField(), other.getField())
.append(getVisibility(), other.getVisibility())
.append(getDate(), other.getDate())
.append(getDatatype(), other.getDatatype())
.append(getUidCount(), other.getUidCount())
.append(getUidListSize(), other.getUidListSize()).isEquals();
// @formatter:on
}
return false;
}

@Override
public int hashCode() {
// @formatter:off
return new HashCodeBuilder().append(getTerm())
.append(getField()).append(getVisibility())
.append(getDate()).append(getDatatype())
.append(getUidCount()).append(getUidListSize()).toHashCode();
// @formatter:on
}
Comment thread
hoper-38709 marked this conversation as resolved.
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ public class DiscoveryLogic extends ShardIndexQueryTable {
*/
public static final String SUM_COUNTS = "sum.counts";

/**
* Used to specify a unique list of values not associated with a field.
*/
public static final String VALUES_ONLY = "values.only";

/**
* Used to specify whether to search against the reversed index.
*/
Expand Down Expand Up @@ -151,6 +156,9 @@ public GenericQueryConfiguration initialize(AccumuloClient client, Query setting
// Check if counts should be summed.
setSumCounts(getOrDefaultBoolean(settings, SUM_COUNTS, getSumCounts()));

// Specify values only. Treat associated field, data type, and the like as "don't care."
setValuesOnly(getOrDefaultBoolean(settings, VALUES_ONLY, false));

// Check if any datatype filters were specified.
getConfig().setDatatypeFilter(getOrDefaultSet(settings, QueryParameters.DATATYPE_FILTER_SET, getConfig().getDatatypeFilter()));

Expand Down Expand Up @@ -580,6 +588,7 @@ private IteratorSetting configureDiscoveryIterator(DiscoveryQueryConfiguration c
setting.addOption(SEPARATE_COUNTS_BY_COLVIS, Boolean.toString(config.getSeparateCountsByColVis()));
setting.addOption(SHOW_REFERENCE_COUNT, Boolean.toString(config.getShowReferenceCount()));
setting.addOption(SUM_COUNTS, Boolean.toString(config.getSumCounts()));
setting.addOption(VALUES_ONLY, Boolean.toString(config.getValuesOnly()));
return setting;
}

Expand Down Expand Up @@ -689,4 +698,12 @@ public boolean getSumCounts() {
public void setSumCounts(boolean sumCounts) {
getConfig().setSumCounts(sumCounts);
}

public void setValuesOnly(boolean valuesOnly) {
getConfig().setValuesOnly(valuesOnly);
}

public boolean getValuesOnly() {
return getConfig().getValuesOnly();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ public class DiscoveryQueryConfiguration extends ShardIndexQueryConfiguration im
private boolean separateCountsByColVis = false;
private boolean showReferenceCount = false;
private boolean sumCounts = false;
private boolean valuesOnly = false;

public DiscoveryQueryConfiguration() {}

Expand Down Expand Up @@ -131,6 +132,10 @@ public boolean getSumCounts() {
return sumCounts;
}

public boolean getValuesOnly() {
return valuesOnly;
}

public void setSeparateCountsByColVis(boolean separateCountsByColVis) {
this.separateCountsByColVis = separateCountsByColVis;
}
Expand All @@ -144,6 +149,10 @@ public void setSumCounts(boolean sumCounts) {
this.sumCounts = sumCounts;
}

public void setValuesOnly(boolean valuesOnly) {
this.valuesOnly = valuesOnly;
}

@Override
public DiscoveryQueryConfiguration checkpoint() {
// Create a new config that only contains what is needed to execute the specified ranges
Expand Down Expand Up @@ -172,6 +181,6 @@ public int hashCode() {
public String toString() {
return new StringJoiner(", ", DiscoveryQueryConfiguration.class.getSimpleName() + "[", "]").add("literals=" + literals).add("patterns=" + patterns)
.add("ranges=" + ranges).add("separateCountsByColVis=" + separateCountsByColVis).add("showReferenceCount=" + showReferenceCount)
.add("sumCounts=" + sumCounts).toString();
.add("sumCounts=" + sumCounts).add("valuesOnly=" + valuesOnly).toString();
}
}
Loading
Loading