Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
<module>query</module>
<module>utils</module>
<module>base-rest-responses</module>
<module>tables</module>
</modules>
<dependencies>
<dependency>
Expand Down
24 changes: 24 additions & 0 deletions core/tables/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>gov.nsa.datawave.core</groupId>
<artifactId>datawave-core-parent</artifactId>
<version>7.40.0-SNAPSHOT</version>
</parent>

<artifactId>datawave-core-tables</artifactId>

<properties>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-api</artifactId>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package datawave.table.constants;

import org.apache.hadoop.io.Text;

/**
* Constants for Accumulo ColumnFamilies reserved by DataWave
*/
public class ColumnFamilyConstants {

public static final String TERM_FREQUENCY = "tf";
public static final String FULL_CONTENT = "d";

public static final Text TERM_FREQUENCY_TEXT = new Text(TERM_FREQUENCY);
public static final Text FULL_CONTENT_TEXT = new Text(FULL_CONTENT);

private ColumnFamilyConstants() {
// enforce static access
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package datawave.table.constants;

/**
* Constants for locality group names reserved by DataWave
*/
public class LocalityGroupConstants {

public static String FULL_CONTENT_LOCALITY = "fullcontent";

public static String TERM_FREQUENCY_LOCALITY = "termfrequency";

private LocalityGroupConstants() {
// enforce static access
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package datawave.table.constants;

import static org.junit.jupiter.api.Assertions.assertEquals;

import org.apache.hadoop.io.Text;
import org.junit.jupiter.api.Test;

/**
* It may seem odd to have a unit test for a constant class. However, a failing unit test here is a hint that changes could affect child modules in an
* unexpected way.
* <p>
* Thus, great care should be taken when modifying core constants.
*/
public class ColumnFamilyConstantsTest {

@Test
public void testConstantsAsString() {
assertEquals("tf", ColumnFamilyConstants.TERM_FREQUENCY);
assertEquals("d", ColumnFamilyConstants.FULL_CONTENT);
}

@Test
public void testConstantsAsText() {
assertEquals(new Text("tf"), ColumnFamilyConstants.TERM_FREQUENCY_TEXT);
assertEquals(new Text("d"), ColumnFamilyConstants.FULL_CONTENT_TEXT);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package datawave.table.constants;

import static org.junit.jupiter.api.Assertions.assertEquals;

import org.junit.jupiter.api.Test;

/**
* These constants should not change very often, if at all.
* <p>
* In the event that these need to change please ensure all child modules are updated
*/
public class LocalityGroupConstantsTest {

@Test
public void testLocalityGroupNames() {
assertEquals("fullcontent", LocalityGroupConstants.FULL_CONTENT_LOCALITY);
assertEquals("termfrequency", LocalityGroupConstants.TERM_FREQUENCY_LOCALITY);
}
}
4 changes: 4 additions & 0 deletions warehouse/ingest-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@
<artifactId>datawave-core-common-util</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>gov.nsa.datawave.core</groupId>
<artifactId>datawave-core-tables</artifactId>
</dependency>
<dependency>
<groupId>io.dropwizard.metrics</groupId>
<artifactId>metrics-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,18 @@
public interface ExtendedDataTypeHandler<KEYIN,KEYOUT,VALUEOUT> extends DataTypeHandler<KEYIN> {

Value NULL_VALUE = new Value(new byte[0]);

@Deprecated(forRemoval = true, since = "7.40.0")
String FULL_CONTENT_LOCALITY_NAME = "fullcontent";

@Deprecated(forRemoval = true, since = "7.40.0")
String FULL_CONTENT_COLUMN_FAMILY = "d";
/* TODO Make a clearer definition of full content indexers */

@Deprecated(forRemoval = true, since = "7.40.0")
String TERM_FREQUENCY_LOCALITY_NAME = "termfrequency";

@Deprecated(forRemoval = true, since = "7.40.0")
Text TERM_FREQUENCY_COLUMN_FAMILY = new Text("tf");

long process(KEYIN key, RawRecordContainer event, Multimap<String,NormalizedContentInterface> fields,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import datawave.ingest.mapreduce.job.BulkIngestKey;
import datawave.ingest.mapreduce.job.writer.ContextWriter;
import datawave.marking.MarkingFunctions;
import datawave.table.constants.ColumnFamilyConstants;

/**
* Handler that take events with processing errors or fatal errors and dumps them into a processing error table. This table will be used for subsequent
Expand Down Expand Up @@ -311,8 +312,8 @@ record = record.copy();

// ShardId 'd' DataType\0UID\0Name for document content event using Event.Writable
String colq = record.getDataType().outputName() + '\0' + record.getId() + '\0' + EVENT_CONTENT_FIELD;
Key k = createKey(getShardId(record), new Text(ExtendedDataTypeHandler.FULL_CONTENT_COLUMN_FAMILY), new Text(colq), getVisibility(record, null),
record.getTimestamp(), this.helper.getDeleteMode());
Key k = createKey(getShardId(record), new Text(ColumnFamilyConstants.FULL_CONTENT), new Text(colq), getVisibility(record, null), record.getTimestamp(),
this.helper.getDeleteMode());
BulkIngestKey ebKey = new BulkIngestKey(getShardTableName(), k);
contextWriter.write(ebKey, value, context);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
import datawave.ingest.data.tokenize.TokenizationHelper.HeartBeatThread;
import datawave.ingest.data.tokenize.TokenizationHelper.TokenizerTimeoutException;
import datawave.ingest.data.tokenize.TruncateAttribute;
import datawave.ingest.mapreduce.handler.ExtendedDataTypeHandler;
import datawave.ingest.mapreduce.handler.shard.AbstractColumnBasedHandler;
import datawave.ingest.mapreduce.handler.shard.ShardedDataTypeHandler;
import datawave.ingest.mapreduce.handler.shard.content.BoundedOffsetQueue;
Expand All @@ -52,6 +51,7 @@
import datawave.ingest.util.BloomFilterWrapper;
import datawave.ingest.util.Identity;
import datawave.ingest.util.TimeoutStrategy;
import datawave.table.constants.ColumnFamilyConstants;
import datawave.util.TextUtil;

/**
Expand Down Expand Up @@ -603,9 +603,8 @@ protected void createTermFrequencyIndex(RawRecordContainer event, Multimap<BulkI
colq.append(this.eventDataTypeName).append('\u0000').append(this.eventUid).append('\u0000').append(nfv.getIndexedFieldValue()).append('\u0000')
.append(nfv.getIndexedFieldName());

BulkIngestKey bKey = new BulkIngestKey(new Text(this.getShardTableName()),
new Key(shardId, ExtendedDataTypeHandler.TERM_FREQUENCY_COLUMN_FAMILY.getBytes(), colq.toString().getBytes(), visibility,
event.getTimestamp(), helper.getDeleteMode()));
BulkIngestKey bKey = new BulkIngestKey(new Text(this.getShardTableName()), new Key(shardId, ColumnFamilyConstants.TERM_FREQUENCY_TEXT.getBytes(),
colq.toString().getBytes(), visibility, event.getTimestamp(), helper.getDeleteMode()));

values.put(bKey, value);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import datawave.ingest.mapreduce.job.BulkIngestKey;
import datawave.ingest.mapreduce.job.writer.ContextWriter;
import datawave.ingest.protobuf.TermWeight;
import datawave.table.constants.ColumnFamilyConstants;
import datawave.util.TextUtil;

/**
Expand Down Expand Up @@ -561,8 +562,8 @@ protected void createShardIndexColumns(RawRecordContainer event, ContextWriter<K
}

/**
* Writes the document's content into the {@link #FULL_CONTENT_COLUMN_FAMILY} column family. The data is compressed (GZIP) and Base64 encoded before being
* placed into the value.
* Writes the document's content into the {@link ColumnFamilyConstants#FULL_CONTENT_TEXT} column family. The data is compressed (GZIP) and Base64 encoded
* before being placed into the value.
*
* @param event
* the event
Expand Down Expand Up @@ -591,8 +592,7 @@ protected void createContentRecord(RawRecordContainer event, ContextWriter<KEYOU
TaskInputOutputContext<KEYIN,? extends RawRecordContainer,KEYOUT,VALUEOUT> context, StatusReporter reporter, Text uid, byte[] visibility,
byte[] shardId, byte[] rawValue) throws IOException, InterruptedException, MutationsRejectedException {

Key k = createKey(shardId, new Text(ExtendedDataTypeHandler.FULL_CONTENT_COLUMN_FAMILY), uid, visibility, event.getTimestamp(),
this.ingestHelper.getDeleteMode());
Key k = createKey(shardId, new Text(ColumnFamilyConstants.FULL_CONTENT), uid, visibility, event.getTimestamp(), this.ingestHelper.getDeleteMode());

ByteArrayOutputStream baos = null;
Base64OutputStream b64os = null;
Expand Down Expand Up @@ -805,9 +805,8 @@ protected void createTermFrequencyIndex(RawRecordContainer event, ContextWriter<
colq.append(this.eventDataTypeName).append('\u0000').append(this.eventUid).append('\u0000').append(nfv.getIndexedFieldValue()).append('\u0000')
.append(nfv.getIndexedFieldName());

BulkIngestKey bKey = new BulkIngestKey(new Text(this.getShardTableName()),
new Key(shardId, ExtendedDataTypeHandler.TERM_FREQUENCY_COLUMN_FAMILY.getBytes(), colq.toString().getBytes(), visibility,
event.getTimestamp(), deleteMode));
BulkIngestKey bKey = new BulkIngestKey(new Text(this.getShardTableName()), new Key(shardId, ColumnFamilyConstants.TERM_FREQUENCY_TEXT.getBytes(),
colq.toString().getBytes(), visibility, event.getTimestamp(), deleteMode));

contextWriter.write(bKey, value, context);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
import org.apache.hadoop.util.StringUtils;
import org.apache.log4j.Logger;

import datawave.ingest.mapreduce.handler.ExtendedDataTypeHandler;
import datawave.table.constants.ColumnFamilyConstants;
import datawave.table.constants.LocalityGroupConstants;

public class DateIndexTableConfigHelper extends AbstractTableConfigHelper {

Expand All @@ -38,9 +39,8 @@ public void setup(String tableName, Configuration config, Logger log) throws Ill
}

String localityGroupsConf = null;
localityGroupsConf = conf.get(LOCALITY_GROUPS,
ExtendedDataTypeHandler.FULL_CONTENT_LOCALITY_NAME + ':' + ExtendedDataTypeHandler.FULL_CONTENT_COLUMN_FAMILY + ','
+ ExtendedDataTypeHandler.TERM_FREQUENCY_LOCALITY_NAME + ':' + ExtendedDataTypeHandler.TERM_FREQUENCY_COLUMN_FAMILY);
localityGroupsConf = conf.get(LOCALITY_GROUPS, LocalityGroupConstants.FULL_CONTENT_LOCALITY + ':' + ColumnFamilyConstants.FULL_CONTENT + ','
+ LocalityGroupConstants.TERM_FREQUENCY_LOCALITY + ':' + ColumnFamilyConstants.TERM_FREQUENCY);
for (String localityGroupDefConf : StringUtils.split(localityGroupsConf)) {
String[] localityGroupDef = StringUtils.split(localityGroupDefConf, '\\', ':');
Set<Text> families = localityGroups.get(localityGroupDef[0]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
import org.apache.hadoop.util.StringUtils;
import org.apache.log4j.Logger;

import datawave.ingest.mapreduce.handler.ExtendedDataTypeHandler;
import datawave.ingest.mapreduce.handler.error.ErrorShardedDataTypeHandler;
import datawave.ingest.mapreduce.handler.shard.ShardedDataTypeHandler;
import datawave.table.constants.ColumnFamilyConstants;
import datawave.table.constants.LocalityGroupConstants;

/**
* TableConfigHelper implementation for the "sharded" error tables. This class should perform the majority of the same operations that the
Expand Down Expand Up @@ -44,9 +45,8 @@ public void setup(String tableName, Configuration config, Logger log) throws Ill
String localityGroupsConf = null;
if (tableName.equals(shardTableName)) {
localityGroupsConf = conf.get(shardTableName + LOCALITY_GROUPS,
ExtendedDataTypeHandler.FULL_CONTENT_LOCALITY_NAME + ':' + ExtendedDataTypeHandler.FULL_CONTENT_COLUMN_FAMILY + ','
+ ExtendedDataTypeHandler.TERM_FREQUENCY_LOCALITY_NAME + ':'
+ ExtendedDataTypeHandler.TERM_FREQUENCY_COLUMN_FAMILY);
LocalityGroupConstants.FULL_CONTENT_LOCALITY + ':' + ColumnFamilyConstants.FULL_CONTENT + ','
+ LocalityGroupConstants.TERM_FREQUENCY_LOCALITY + ':' + ColumnFamilyConstants.TERM_FREQUENCY);
for (String localityGroupDefConf : StringUtils.split(localityGroupsConf)) {
String[] localityGroupDef = StringUtils.split(localityGroupDefConf, '\\', ':');
Set<Text> families = localityGroups.get(localityGroupDef[0]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import org.apache.hadoop.util.StringUtils;
import org.apache.log4j.Logger;

import datawave.ingest.mapreduce.handler.ExtendedDataTypeHandler;
import datawave.ingest.mapreduce.handler.shard.ShardedDataTypeHandler;
import datawave.ingest.table.aggregator.BitSetCombiner;
import datawave.ingest.table.aggregator.CombinerConfiguration;
Expand All @@ -28,6 +27,8 @@
import datawave.ingest.table.balancer.ShardedTableTabletBalancer;
import datawave.ingest.table.bloomfilter.ShardIndexKeyFunctor;
import datawave.ingest.table.bloomfilter.ShardKeyFunctor;
import datawave.table.constants.ColumnFamilyConstants;
import datawave.table.constants.LocalityGroupConstants;
import datawave.util.TableName;

public class ShardTableConfigHelper extends AbstractTableConfigHelper {
Expand Down Expand Up @@ -106,9 +107,8 @@ public void setup(String tableName, Configuration config, Logger log) throws Ill
String localityGroupsConf = null;
if (tableName.equals(shardTableName)) {
localityGroupsConf = conf.get(shardTableName + LOCALITY_GROUPS,
ExtendedDataTypeHandler.FULL_CONTENT_LOCALITY_NAME + ':' + ExtendedDataTypeHandler.FULL_CONTENT_COLUMN_FAMILY + ','
+ ExtendedDataTypeHandler.TERM_FREQUENCY_LOCALITY_NAME + ':'
+ ExtendedDataTypeHandler.TERM_FREQUENCY_COLUMN_FAMILY);
LocalityGroupConstants.FULL_CONTENT_LOCALITY + ':' + ColumnFamilyConstants.FULL_CONTENT + ','
+ LocalityGroupConstants.TERM_FREQUENCY_LOCALITY + ':' + ColumnFamilyConstants.TERM_FREQUENCY);
for (String localityGroupDefConf : StringUtils.split(localityGroupsConf)) {
String[] localityGroupDef = StringUtils.split(localityGroupDefConf, '\\', ':');
Set<Text> families = localityGroups.get(localityGroupDef[0]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import datawave.ingest.mapreduce.job.writer.AbstractContextWriter;
import datawave.ingest.test.StandaloneStatusReporter;
import datawave.ingest.test.StandaloneTaskAttemptContext;
import datawave.table.constants.ColumnFamilyConstants;
import datawave.util.TableName;

public class EdgeHandlerTestUtil {
Expand All @@ -43,7 +44,7 @@ public class EdgeHandlerTestUtil {
private static Logger log = Logger.getLogger(EdgeHandlerTestUtil.class);

public static boolean isDocumentKey(Key k) {
return isShardKey(k) && k.getColumnFamily().toString().equals(ExtendedDataTypeHandler.FULL_CONTENT_COLUMN_FAMILY);
return isShardKey(k) && k.getColumnFamily().toString().equals(ColumnFamilyConstants.FULL_CONTENT);
}

public static boolean isShardKey(Key k) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import datawave.ingest.mapreduce.job.writer.AbstractContextWriter;
import datawave.ingest.test.StandaloneStatusReporter;
import datawave.ingest.test.StandaloneTaskAttemptContext;
import datawave.table.constants.ColumnFamilyConstants;
import datawave.util.TableName;

/**
Expand All @@ -52,7 +53,7 @@ public class ColumnBasedHandlerTestUtil {
private static final Logger log = Logger.getLogger(ColumnBasedHandlerTestUtil.class);

public static boolean isDocumentKey(Key k) {
return isShardKey(k) && k.getColumnFamily().toString().equals(ExtendedDataTypeHandler.FULL_CONTENT_COLUMN_FAMILY);
return isShardKey(k) && k.getColumnFamily().toString().equals(ColumnFamilyConstants.FULL_CONTENT);
}

public static boolean isShardKey(Key k) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import datawave.ingest.mapreduce.job.writer.AbstractContextWriter;
import datawave.ingest.test.StandaloneStatusReporter;
import datawave.ingest.test.StandaloneTaskAttemptContext;
import datawave.table.constants.ColumnFamilyConstants;
import datawave.util.TableName;

/**
Expand All @@ -50,7 +51,7 @@ public class ColumnBasedHandlerTestUtil {
private static Logger log = Logger.getLogger(ColumnBasedHandlerTestUtil.class);

public static boolean isDocumentKey(Key k) {
return isShardKey(k) && k.getColumnFamily().toString().equals(ExtendedDataTypeHandler.FULL_CONTENT_COLUMN_FAMILY);
return isShardKey(k) && k.getColumnFamily().toString().equals(ColumnFamilyConstants.FULL_CONTENT);
}

public static boolean isShardKey(Key k) {
Expand Down
5 changes: 5 additions & 0 deletions warehouse/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@
<artifactId>datawave-regression-testing</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>gov.nsa.datawave.core</groupId>
<artifactId>datawave-core-tables</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>gov.nsa.datawave.webservices</groupId>
<artifactId>datawave-ws-annotations</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@
import datawave.core.query.logic.QueryCheckpoint;
import datawave.core.query.logic.QueryKey;
import datawave.core.query.logic.QueryLogicTransformer;
import datawave.ingest.mapreduce.handler.ExtendedDataTypeHandler;
import datawave.microservice.query.Query;
import datawave.microservice.query.QueryImpl.Parameter;
import datawave.query.Constants;
import datawave.query.QueryParameters;
import datawave.query.config.ContentQueryConfiguration;
import datawave.query.tables.ScannerFactory;
import datawave.query.transformer.ContentQueryTransformer;
import datawave.table.constants.ColumnFamilyConstants;
import datawave.webservice.query.exception.QueryException;

/**
Expand Down Expand Up @@ -228,7 +228,7 @@ private Collection<Range> createRanges(final Query settings, final String endKey
log.debug("Received pieces: " + shardId + ", " + datatype + ", " + uid);

// Create and add a Range
final String cf = ExtendedDataTypeHandler.FULL_CONTENT_COLUMN_FAMILY;
final String cf = ColumnFamilyConstants.FULL_CONTENT;
final String cq = datatype + Constants.NULL_BYTE_STRING + uid;
final Key startKey = new Key(shardId, cf, cq + Constants.NULL_BYTE_STRING);
final Key endKey = new Key(shardId, cf, cq + endKeyTerminator);
Expand Down
Loading
Loading