Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@
<dependencies>
<dependency>
<groupId>com.facebook.presto.hadoop</groupId>
<artifactId>hadoop-cdh4</artifactId>
<version>0.3</version>
<artifactId>hadoop-apache</artifactId>
<version>3.4.1-1</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>com.facebook.presto.hive</groupId>
<artifactId>hive-apache</artifactId>
<version>0.8</version>
<version>4.0.1-1</version>
<scope>provided</scope>
</dependency>

Expand Down Expand Up @@ -114,6 +114,20 @@
<artifactId>slice</artifactId>
</dependency>

<dependency>
<groupId>javax.annotation</groupId>
<artifactId>javax.annotation-api</artifactId>
<version>1.3.2</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.35</version>
<scope>provided</scope>
</dependency>

<!-- for testing -->
<dependency>
<groupId>junit</groupId>
Expand Down
3 changes: 1 addition & 2 deletions src/main/java/com/facebook/hive/orc/OrcInputFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
package com.facebook.hive.orc;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
Expand Down Expand Up @@ -168,7 +167,7 @@ private static boolean[] findIncludedColumns(List<OrcProto.Type> types,

@Override
public boolean validateInput(FileSystem fs, HiveConf conf,
ArrayList<FileStatus> files
List<FileStatus> files
) throws IOException {
if (files.size() <= 0) {
return false;
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/com/facebook/hive/orc/OrcSerde.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import org.apache.hadoop.conf.Configuration;
import com.facebook.hive.orc.lazy.OrcLazyRowObjectInspector;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
Expand All @@ -40,7 +41,7 @@
* A serde class for ORC.
* It transparently passes the object to/from the ORC file reader/writer.
*/
public class OrcSerde implements SerDe {
public class OrcSerde extends AbstractSerDe {
private final OrcSerdeRow row = new OrcSerdeRow();
private ObjectInspector inspector = null;

Expand Down Expand Up @@ -68,7 +69,7 @@ Object getRow() {
}

@Override
public void initialize(Configuration conf, Properties table) {
public void initialize(Configuration conf, Properties table, Properties partition) {
// Read the configuration parameters
String columnNameProperty = table.getProperty("columns");
// NOTE: if "columns.types" is missing, all columns will be of String type
Expand Down
9 changes: 5 additions & 4 deletions src/main/java/com/facebook/hive/orc/OrcStruct.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public List<String> getFieldNames() {
/**
* Change the names and number of fields in the struct. No effect if the number of
* fields is the same. The old field values are copied to the new array.
* @param numFields the new number of fields
* @param fieldNames the field names
*/
public void setFieldNames(List<String> fieldNames) {
this.fieldNames = fieldNames;
Expand Down Expand Up @@ -178,7 +178,8 @@ public String getFieldComment() {
return null;
}

public int getOffset() {
@Override
public int getFieldID() {
return offset;
}
}
Expand All @@ -187,8 +188,8 @@ public static class OrcStructInspector extends SettableStructObjectInspector {
private final List<StructField> fields;

public OrcStructInspector(StructTypeInfo info) {
ArrayList<String> fieldNames = info.getAllStructFieldNames();
ArrayList<TypeInfo> fieldTypes = info.getAllStructFieldTypeInfos();
List<String> fieldNames = info.getAllStructFieldNames();
List<TypeInfo> fieldTypes = info.getAllStructFieldTypeInfos();
fields = new ArrayList<StructField>(fieldNames.size());
for(int i=0; i < fieldNames.size(); ++i) {
fields.add(new Field(fieldNames.get(i),
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/com/facebook/hive/orc/WriterImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
Expand All @@ -52,7 +53,6 @@
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
Expand Down Expand Up @@ -1707,7 +1707,7 @@ void recordPosition(PositionRecorder recorder) throws IOException {

public static final int MILLIS_PER_SECOND = 1000;
public static final long BASE_TIMESTAMP =
Timestamp.valueOf("2015-01-01 00:00:00").getTime() / MILLIS_PER_SECOND;
Timestamp.valueOf("2015-01-01 00:00:00").toEpochMilli() / MILLIS_PER_SECOND;

private static class TimestampTreeWriter extends TreeWriter {
private final RunLengthIntegerWriter seconds;
Expand Down Expand Up @@ -1736,9 +1736,9 @@ void write(Object obj) throws IOException {
super.write(obj, RawDatasizeConst.TIMESTAMP_SIZE);
if (obj != null) {
Timestamp val =
((TimestampObjectInspector) inspector).
getPrimitiveJavaObject(obj);
seconds.write((val.getTime() / MILLIS_PER_SECOND) - BASE_TIMESTAMP);
((TimestampObjectInspector) inspector).
getPrimitiveJavaObject(obj);
seconds.write(val.toEpochMilli() / MILLIS_PER_SECOND - BASE_TIMESTAMP);
nanos.write(formatNanos(val.getNanos()));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
package com.facebook.hive.orc.lazy;

import java.io.IOException;
import java.sql.Timestamp;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;

import com.facebook.hive.orc.InStream;
import com.facebook.hive.orc.OrcProto;
Expand Down Expand Up @@ -72,12 +72,12 @@ public int loadIndeces(List<RowIndexEntry> rowIndexEntries, int startIndex) {

@Override
public Object next(Object previous) throws IOException {
TimestampWritable result = null;
TimestampWritableV2 result = null;
if (valuePresent) {
if (previous == null) {
result = new TimestampWritable();
result = new TimestampWritableV2();
} else {
result = (TimestampWritable) previous;
result = (TimestampWritableV2) previous;
}
long millis = (data.next() + WriterImpl.BASE_TIMESTAMP) *
WriterImpl.MILLIS_PER_SECOND;
Expand All @@ -89,7 +89,7 @@ public Object next(Object previous) throws IOException {
millis -= newNanos / 1000000;
}
Timestamp timestamp = result.getTimestamp();
timestamp.setTime(millis);
timestamp.setTimeInMillis(millis);
timestamp.setNanos(newNanos);
result.set(timestamp);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
public class OrcLazyRowObjectInspector extends OrcLazyStructObjectInspector {

public OrcLazyRowObjectInspector(StructTypeInfo info) {
ArrayList<String> fieldNames = info.getAllStructFieldNames();
ArrayList<TypeInfo> fieldTypes = info.getAllStructFieldTypeInfos();
List<String> fieldNames = info.getAllStructFieldNames();
List<TypeInfo> fieldTypes = info.getAllStructFieldTypeInfos();
for(int i=0; i < fieldNames.size(); ++i) {
fields.add(new Field(fieldNames.get(i),
OrcLazyObjectInspectorUtils.createLazyObjectInspector(fieldTypes.get(i)), i));
Expand All @@ -53,7 +53,7 @@ public OrcLazyRowObjectInspector(int columnId, List<OrcProto.Type> types) {

@Override
public Object getStructFieldData(Object data, StructField fieldRef) {
int offset = ((Field) fieldRef).getOffset();
int offset = fieldRef.getFieldID();

try {
OrcLazyObject obj = ((OrcLazyRow) data).getFieldValue(offset);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ protected OrcLazyStructObjectInspector() {
}

public OrcLazyStructObjectInspector(StructTypeInfo info) {
ArrayList<String> fieldNames = info.getAllStructFieldNames();
ArrayList<TypeInfo> fieldTypes = info.getAllStructFieldTypeInfos();
List<String> fieldNames = info.getAllStructFieldNames();
List<TypeInfo> fieldTypes = info.getAllStructFieldTypeInfos();
fields = new ArrayList<StructField>(fieldNames.size());
for(int i=0; i < fieldNames.size(); ++i) {
fields.add(new Field(fieldNames.get(i),
Expand Down Expand Up @@ -72,7 +72,7 @@ public Object getStructFieldData(Object data, StructField fieldRef) {
return null;
}

int offset = ((Field) fieldRef).getOffset();
int offset = fieldRef.getFieldID();
OrcStruct struct;
try {
struct = (OrcStruct) ((OrcLazyStruct) data).materialize();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

package com.facebook.hive.orc.lazy;

import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;

public class OrcLazyTimestamp extends OrcLazyObject {

Expand All @@ -30,6 +30,6 @@ public OrcLazyTimestamp(LazyTimestampTreeReader treeReader) {

public OrcLazyTimestamp(OrcLazyTimestamp copy) {
super(copy);
previous = new TimestampWritable(((TimestampWritable)copy.previous));
previous = new TimestampWritableV2(((TimestampWritableV2)copy.previous));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,23 @@

package com.facebook.hive.orc.lazy;

import java.sql.Timestamp;

import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;

public class OrcLazyTimestampObjectInspector extends
OrcLazyPrimitiveObjectInspector<OrcLazyTimestamp, TimestampWritable> implements TimestampObjectInspector {
OrcLazyPrimitiveObjectInspector<OrcLazyTimestamp, TimestampWritableV2> implements TimestampObjectInspector {

protected OrcLazyTimestampObjectInspector() {
super(PrimitiveObjectInspectorUtils.timestampTypeEntry);
}

@Override
public Timestamp getPrimitiveJavaObject(Object o) {
TimestampWritable writable = getPrimitiveWritableObject(o);
TimestampWritableV2 writable = getPrimitiveWritableObject(o);
return writable == null ? null : writable.getTimestamp();
}

Expand Down
17 changes: 8 additions & 9 deletions src/test/java/com/facebook/hive/orc/TestInputOutputFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
Expand Down Expand Up @@ -100,7 +99,7 @@ public void testInOutFormat() throws Exception {
ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
SerDe serde = new OrcSerde();
OrcSerde serde = new OrcSerde();
HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
FileSinkOperator.RecordWriter writer =
outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
Expand All @@ -112,7 +111,7 @@ public void testInOutFormat() throws Exception {
serde = new OrcSerde();
properties.setProperty("columns", "x,y");
properties.setProperty("columns.types", "int:int");
serde.initialize(conf, properties);
serde.initialize(conf, properties, null);
assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
inspector = (StructObjectInspector) serde.getObjectInspector();
assertEquals("struct<x:int,y:int>", inspector.getTypeName());
Expand Down Expand Up @@ -215,7 +214,7 @@ public void testMROutput() throws Exception {
ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class,
ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
SerDe serde = new OrcSerde();
OrcSerde serde = new OrcSerde();
OutputFormat<?, ?> outFormat = new OrcOutputFormat();
RecordWriter writer =
outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
Expand All @@ -230,7 +229,7 @@ public void testMROutput() throws Exception {
serde = new OrcSerde();
properties.setProperty("columns", "z,r");
properties.setProperty("columns.types", "int:struct<x:int,y:int>");
serde.initialize(conf, properties);
serde.initialize(conf, properties, null);
inspector = (StructObjectInspector) serde.getObjectInspector();
InputFormat<?,?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
Expand Down Expand Up @@ -273,8 +272,8 @@ public void testEmptyFile() throws Exception {
writer.close(true);
properties.setProperty("columns", "x,y");
properties.setProperty("columns.types", "int:int");
SerDe serde = new OrcSerde();
serde.initialize(conf, properties);
OrcSerde serde = new OrcSerde();
serde.initialize(conf, properties, null);
InputFormat<?,?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
Expand Down Expand Up @@ -321,7 +320,7 @@ public void testDefaultTypes() throws Exception {
ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
SerDe serde = new OrcSerde();
OrcSerde serde = new OrcSerde();
HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
FileSinkOperator.RecordWriter writer =
outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class,
Expand All @@ -335,7 +334,7 @@ public void testDefaultTypes() throws Exception {
writer.close(true);
serde = new OrcSerde();
properties.setProperty("columns", "str,str2");
serde.initialize(conf, properties);
serde.initialize(conf, properties, null);
inspector = (StructObjectInspector) serde.getObjectInspector();
assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
InputFormat<?,?> in = new OrcInputFormat();
Expand Down
Loading