Skip to content
Open
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@

################################################################
# document:match query tests for EventQuery

# These tests validate both the JEXL document:match(...) form and the
# Lucene #DOCUMENT_MATCH(...) form against a known Wikipedia event whose
# REVISION_COMMENT d-column contains the string "Origins".

################################################################
# JEXL create

setCurlData query=$( urlencode "PAGE_TITLE == 'Anarchism' && document:match('REVISION_COMMENT', 'Origins')" ) \
queryName=EventQueryDocumentMatchJexl \
begin=20130301 \
end=20130401 \
pagesize=1 \
auths=PUBLIC \
columnVisibility=PRIVATE \
query.syntax=JEXL

configureTest \
CreateDocumentMatchJexl \
"Creates a JEXL EventQuery using document:match against REVISION_COMMENT d-column content" \
"--header 'Content-Type: application/x-www-form-urlencoded' ${DW_CURL_DATA} -X POST ${URI_ROOT}/Query/EventQuery/create" \
"application/xml;charset=UTF-8" \
200

runTest --set-query-id

################################################################
# JEXL next

configureTest \
DocumentMatchJexlPage1 \
"Gets the first page of results for the JEXL document:match query in JSON format" \
"--header 'Accept: application/json' -X GET ${URI_ROOT}/Query/${DW_QUERY_ID}/next" \
application/json \
200

runTest

################################################################
# JEXL close

configureCloseQueryTest ${DW_QUERY_ID}

runTest

################################################################
# Lucene create

setCurlData query=$( urlencode "PAGE_TITLE:Anarchism AND #DOCUMENT_MATCH(REVISION_COMMENT, Origins)" ) \
queryName=EventQueryDocumentMatchLucene \
begin=20130301 \
end=20130401 \
pagesize=1 \
auths=PUBLIC \
columnVisibility=PRIVATE \
query.syntax=LUCENE

configureTest \
CreateDocumentMatchLucene \
"Creates a Lucene EventQuery using #DOCUMENT_MATCH against REVISION_COMMENT d-column content" \
"--header 'Content-Type: application/x-www-form-urlencoded' ${DW_CURL_DATA} -X POST ${URI_ROOT}/Query/EventQuery/create" \
"application/xml;charset=UTF-8" \
200

runTest --set-query-id

################################################################
# Lucene next

configureTest \
DocumentMatchLucenePage1 \
"Gets the first page of results for the Lucene #DOCUMENT_MATCH query in JSON format" \
"--header 'Accept: application/json' -X GET ${URI_ROOT}/Query/${DW_QUERY_ID}/next" \
application/json \
200

runTest

################################################################
# Lucene close

configureCloseQueryTest ${DW_QUERY_ID}

# This last test is executed by run.sh, as usual
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<entry key="geo" value="datawave.query.jexl.functions.GeoFunctions"/>
<entry key="geowave" value="datawave.query.jexl.functions.GeoWaveFunctions"/>
<entry key="content" value="datawave.query.jexl.functions.ContentFunctions"/>
<entry key="document" value="datawave.query.jexl.functions.DocumentFunctions"/>
<entry key="normalize" value="datawave.query.jexl.functions.NormalizationFunctions"/>
<entry key="filter" value="datawave.query.jexl.functions.EvaluationPhaseFilterFunctions"/>
<entry key="grouping" value="datawave.query.jexl.functions.GroupingRequiredFilterFunctions"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<entry key="geo" value="datawave.query.jexl.functions.GeoFunctions"/>
<entry key="geowave" value="datawave.query.jexl.functions.GeoWaveFunctions"/>
<entry key="content" value="datawave.query.jexl.functions.ContentFunctions"/>
<entry key="document" value="datawave.query.jexl.functions.DocumentFunctions"/>
<entry key="normalize" value="datawave.query.jexl.functions.NormalizationFunctions"/>
<entry key="filter" value="datawave.query.jexl.functions.EvaluationPhaseFilterFunctions"/>
<entry key="grouping" value="datawave.query.jexl.functions.GroupingRequiredFilterFunctions"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import datawave.query.attributes.UniqueFields;
import datawave.query.common.grouping.GroupFields;
import datawave.query.config.annotation.AllHitsQueryConfig;
import datawave.query.function.DocumentMatchContext;
import datawave.query.function.DocumentPermutation;
import datawave.query.iterator.QueryIterator;
import datawave.query.iterator.ivarator.IvaratorCacheDirConfig;
Expand Down Expand Up @@ -279,6 +280,8 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
private Set<String> queryTermFrequencyFields = Collections.emptySet();
// Are we required to get term frequencies (i.e. does the query contain content functions)
private boolean termFrequenciesRequired = false;
// Are we required to gather document-match context (i.e. does the query contain document:match functions)
private boolean documentMatchContextRequired = false;
// Limit count of returned values for arbitrary fields.
private Set<String> limitFields = Collections.emptySet();
private Set<String> matchingFieldSets = Collections.emptySet();
Expand Down Expand Up @@ -502,6 +505,18 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
* Term Frequency aggregations that exceed this threshold in milliseconds are logged as a warning
*/
private int tfAggregationThresholdMs = -1;
/**
* Maximum encoded d-column payload size, in bytes, to inspect for document:match evaluation
*/
private int documentMatchMaxEncodedSize = DocumentMatchContext.DEFAULT_MAX_ENCODED_SIZE;
/**
* Maximum decoded d-column payload size, in bytes, to inspect for document:match evaluation
*/
private int documentMatchMaxDecodedSize = DocumentMatchContext.DEFAULT_MAX_DECODED_SIZE;
/**
* Maximum aggregate encoded d-column payload size, in bytes, to retain in memory for document:match evaluation
*/
private int documentMatchMaxEncodedContextSize = DocumentMatchContext.DEFAULT_MAX_ENCODED_CONTEXT_SIZE;

/**
* Flag to control query option pruning in the visitor function. Queries that see significant or varied pruning via the RangeStream may see a benefit from
Expand Down Expand Up @@ -718,6 +733,7 @@ public void copyFrom(ShardQueryConfiguration other) {
this.setSortedUIDs(other.isSortedUIDs());
this.setQueryTermFrequencyFields(null == other.getQueryTermFrequencyFields() ? null : Sets.newHashSet(other.getQueryTermFrequencyFields()));
this.setTermFrequenciesRequired(other.isTermFrequenciesRequired());
this.setDocumentMatchContextRequired(other.isDocumentMatchContextRequired());
this.setLimitFields(null == other.getLimitFields() ? null : Sets.newHashSet(other.getLimitFields()));
this.setMatchingFieldSets(null == other.getMatchingFieldSets() ? null : Sets.newHashSet(other.getMatchingFieldSets()));
this.setLimitFieldsPreQueryEvaluation(other.isLimitFieldsPreQueryEvaluation());
Expand Down Expand Up @@ -836,6 +852,9 @@ public void copyFrom(ShardQueryConfiguration other) {
this.setLazySetMechanismEnabled(other.isLazySetMechanismEnabled());
this.setDocAggregationThresholdMs(other.getDocAggregationThresholdMs());
this.setTfAggregationThresholdMs(other.getTfAggregationThresholdMs());
this.setDocumentMatchMaxEncodedSize(other.getDocumentMatchMaxEncodedSize());
this.setDocumentMatchMaxDecodedSize(other.getDocumentMatchMaxDecodedSize());
this.setDocumentMatchMaxEncodedContextSize(other.getDocumentMatchMaxEncodedContextSize());
this.setGroupFields(GroupFields.copyOf(other.getGroupFields()));
this.setPruneQueryOptions(other.getPruneQueryOptions());
this.setSortQueryPreIndexWithImpliedCounts(other.isSortQueryPreIndexWithImpliedCounts());
Expand Down Expand Up @@ -2344,6 +2363,14 @@ public void setTermFrequenciesRequired(boolean termFrequenciesRequired) {
this.termFrequenciesRequired = termFrequenciesRequired;
}

public boolean isDocumentMatchContextRequired() {
return documentMatchContextRequired;
}

public void setDocumentMatchContextRequired(boolean documentMatchContextRequired) {
this.documentMatchContextRequired = documentMatchContextRequired;
}

public void setLimitTermExpansionToModel(boolean shouldLimitTermExpansionToModel) {
this.shouldLimitTermExpansionToModel = shouldLimitTermExpansionToModel;
}
Expand Down Expand Up @@ -2891,6 +2918,30 @@ public void setTfAggregationThresholdMs(int tfAggregationThresholdMs) {
this.tfAggregationThresholdMs = tfAggregationThresholdMs;
}

public int getDocumentMatchMaxEncodedSize() {
return documentMatchMaxEncodedSize;
}

public void setDocumentMatchMaxEncodedSize(int documentMatchMaxEncodedSize) {
this.documentMatchMaxEncodedSize = documentMatchMaxEncodedSize;
}

public int getDocumentMatchMaxDecodedSize() {
return documentMatchMaxDecodedSize;
}

public void setDocumentMatchMaxDecodedSize(int documentMatchMaxDecodedSize) {
this.documentMatchMaxDecodedSize = documentMatchMaxDecodedSize;
}

public int getDocumentMatchMaxEncodedContextSize() {
return documentMatchMaxEncodedContextSize;
}

public void setDocumentMatchMaxEncodedContextSize(int documentMatchMaxEncodedContextSize) {
this.documentMatchMaxEncodedContextSize = documentMatchMaxEncodedContextSize;
}

public GroupFields getGroupFields() {
return groupFields;
}
Expand Down Expand Up @@ -3050,6 +3101,7 @@ public boolean equals(Object o) {
Float.compare(that.getCollapseDatePercentThreshold(), getCollapseDatePercentThreshold()) == 0 &&
isSortedUIDs() == that.isSortedUIDs() &&
isTermFrequenciesRequired() == that.isTermFrequenciesRequired() &&
isDocumentMatchContextRequired() == that.isDocumentMatchContextRequired() &&
isLimitFieldsPreQueryEvaluation() == that.isLimitFieldsPreQueryEvaluation() &&
isHitList() == that.isHitList() &&
isDateIndexTimeTravel() == that.isDateIndexTimeTravel() &&
Expand Down Expand Up @@ -3205,6 +3257,9 @@ public boolean equals(Object o) {
isLazySetMechanismEnabled() == that.isLazySetMechanismEnabled() &&
getDocAggregationThresholdMs() == that.getDocAggregationThresholdMs() &&
getTfAggregationThresholdMs() == that.getTfAggregationThresholdMs() &&
getDocumentMatchMaxEncodedSize() == that.getDocumentMatchMaxEncodedSize() &&
getDocumentMatchMaxDecodedSize() == that.getDocumentMatchMaxDecodedSize() &&
getDocumentMatchMaxEncodedContextSize() == that.getDocumentMatchMaxEncodedContextSize() &&
getPruneQueryOptions() == that.getPruneQueryOptions() &&
isSortQueryPreIndexWithImpliedCounts() == that.isSortQueryPreIndexWithImpliedCounts() &&
isSortQueryPreIndexWithFieldCounts() == that.isSortQueryPreIndexWithFieldCounts() &&
Expand Down Expand Up @@ -3338,6 +3393,7 @@ public int hashCode() {
isSortedUIDs(),
getQueryTermFrequencyFields(),
isTermFrequenciesRequired(),
isDocumentMatchContextRequired(),
getLimitFields(),
getMatchingFieldSets(),
isLimitFieldsPreQueryEvaluation(),
Expand Down Expand Up @@ -3443,6 +3499,9 @@ public int hashCode() {
isLazySetMechanismEnabled(),
getDocAggregationThresholdMs(),
getTfAggregationThresholdMs(),
getDocumentMatchMaxEncodedSize(),
getDocumentMatchMaxDecodedSize(),
getDocumentMatchMaxEncodedContextSize(),
getPruneQueryOptions(),
isSortQueryPreIndexWithImpliedCounts(),
isSortQueryPreIndexWithFieldCounts(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package datawave.query.function;

import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;

import datawave.query.predicate.TimeFilter;

/**
* Configuration used to build the document-match context lookup function that runs immediately before JEXL evaluation.
*/
public class DocumentMatchConfig {
private SortedKeyValueIterator<Key,Value> source;
private TimeFilter timeFilter;
private DocumentMatchContext.Limits limits;
private boolean tld;

public SortedKeyValueIterator<Key,Value> getSource() {
return source;
}

public void setSource(SortedKeyValueIterator<Key,Value> source) {
this.source = source;
}

public TimeFilter getTimeFilter() {
return timeFilter;
}

public void setTimeFilter(TimeFilter timeFilter) {
this.timeFilter = timeFilter;
}

public DocumentMatchContext.Limits getLimits() {
return limits;
}

public void setLimits(DocumentMatchContext.Limits limits) {
this.limits = limits;
}

public boolean isTld() {
return tld;
}

public void setTld(boolean tld) {
this.tld = tld;
}
}
Loading
Loading