diff --git a/pom.xml b/pom.xml index 83f642d..766eefb 100644 --- a/pom.xml +++ b/pom.xml @@ -68,15 +68,15 @@ com.facebook.presto.hadoop - hadoop-cdh4 - 0.3 + hadoop-apache + 3.4.1-1 provided com.facebook.presto.hive hive-apache - 0.8 + 4.0.1-1 provided @@ -114,6 +114,20 @@ slice + + javax.annotation + javax.annotation-api + 1.3.2 + provided + + + + org.slf4j + slf4j-api + 1.7.35 + provided + + junit diff --git a/src/main/java/com/facebook/hive/orc/OrcInputFormat.java b/src/main/java/com/facebook/hive/orc/OrcInputFormat.java index 016f9c3..81d4f84 100644 --- a/src/main/java/com/facebook/hive/orc/OrcInputFormat.java +++ b/src/main/java/com/facebook/hive/orc/OrcInputFormat.java @@ -21,7 +21,6 @@ package com.facebook.hive.orc; import java.io.IOException; -import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -168,7 +167,7 @@ private static boolean[] findIncludedColumns(List types, @Override public boolean validateInput(FileSystem fs, HiveConf conf, - ArrayList files + List files ) throws IOException { if (files.size() <= 0) { return false; diff --git a/src/main/java/com/facebook/hive/orc/OrcSerde.java b/src/main/java/com/facebook/hive/orc/OrcSerde.java index 27f64b3..d54c48c 100644 --- a/src/main/java/com/facebook/hive/orc/OrcSerde.java +++ b/src/main/java/com/facebook/hive/orc/OrcSerde.java @@ -27,6 +27,7 @@ import org.apache.hadoop.conf.Configuration; import com.facebook.hive.orc.lazy.OrcLazyRowObjectInspector; +import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; @@ -40,7 +41,7 @@ * A serde class for ORC. * It transparently passes the object to/from the ORC file reader/writer. */ -public class OrcSerde implements SerDe { +public class OrcSerde extends AbstractSerDe { private final OrcSerdeRow row = new OrcSerdeRow(); private ObjectInspector inspector = null; @@ -68,7 +69,7 @@ Object getRow() { } @Override - public void initialize(Configuration conf, Properties table) { + public void initialize(Configuration conf, Properties table, Properties partition) { // Read the configuration parameters String columnNameProperty = table.getProperty("columns"); // NOTE: if "columns.types" is missing, all columns will be of String type diff --git a/src/main/java/com/facebook/hive/orc/OrcStruct.java b/src/main/java/com/facebook/hive/orc/OrcStruct.java index c3bcd84..c69f6a8 100644 --- a/src/main/java/com/facebook/hive/orc/OrcStruct.java +++ b/src/main/java/com/facebook/hive/orc/OrcStruct.java @@ -76,7 +76,7 @@ public List getFieldNames() { /** * Change the names and number of fields in the struct. No effect if the number of * fields is the same. The old field values are copied to the new array. - * @param numFields the new number of fields + * @param fieldNames the field names */ public void setFieldNames(List fieldNames) { this.fieldNames = fieldNames; @@ -178,7 +178,8 @@ public String getFieldComment() { return null; } - public int getOffset() { + @Override + public int getFieldID() { return offset; } } @@ -187,8 +188,8 @@ public static class OrcStructInspector extends SettableStructObjectInspector { private final List fields; public OrcStructInspector(StructTypeInfo info) { - ArrayList fieldNames = info.getAllStructFieldNames(); - ArrayList fieldTypes = info.getAllStructFieldTypeInfos(); + List fieldNames = info.getAllStructFieldNames(); + List fieldTypes = info.getAllStructFieldTypeInfos(); fields = new ArrayList(fieldNames.size()); for(int i=0; i < fieldNames.size(); ++i) { fields.add(new Field(fieldNames.get(i), diff --git a/src/main/java/com/facebook/hive/orc/WriterImpl.java b/src/main/java/com/facebook/hive/orc/WriterImpl.java index 3e79993..0e2adcb 100644 --- a/src/main/java/com/facebook/hive/orc/WriterImpl.java +++ b/src/main/java/com/facebook/hive/orc/WriterImpl.java @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -52,7 +53,6 @@ import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; -import java.sql.Timestamp; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -1707,7 +1707,7 @@ void recordPosition(PositionRecorder recorder) throws IOException { public static final int MILLIS_PER_SECOND = 1000; public static final long BASE_TIMESTAMP = - Timestamp.valueOf("2015-01-01 00:00:00").getTime() / MILLIS_PER_SECOND; + Timestamp.valueOf("2015-01-01 00:00:00").toEpochMilli() / MILLIS_PER_SECOND; private static class TimestampTreeWriter extends TreeWriter { private final RunLengthIntegerWriter seconds; @@ -1736,9 +1736,9 @@ void write(Object obj) throws IOException { super.write(obj, RawDatasizeConst.TIMESTAMP_SIZE); if (obj != null) { Timestamp val = - ((TimestampObjectInspector) inspector). - getPrimitiveJavaObject(obj); - seconds.write((val.getTime() / MILLIS_PER_SECOND) - BASE_TIMESTAMP); + ((TimestampObjectInspector) inspector). + getPrimitiveJavaObject(obj); + seconds.write(val.toEpochMilli() / MILLIS_PER_SECOND - BASE_TIMESTAMP); nanos.write(formatNanos(val.getNanos())); } } diff --git a/src/main/java/com/facebook/hive/orc/lazy/LazyTimestampTreeReader.java b/src/main/java/com/facebook/hive/orc/lazy/LazyTimestampTreeReader.java index 3419b69..a1189ae 100644 --- a/src/main/java/com/facebook/hive/orc/lazy/LazyTimestampTreeReader.java +++ b/src/main/java/com/facebook/hive/orc/lazy/LazyTimestampTreeReader.java @@ -21,11 +21,11 @@ package com.facebook.hive.orc.lazy; import java.io.IOException; -import java.sql.Timestamp; import java.util.List; import java.util.Map; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import com.facebook.hive.orc.InStream; import com.facebook.hive.orc.OrcProto; @@ -72,12 +72,12 @@ public int loadIndeces(List rowIndexEntries, int startIndex) { @Override public Object next(Object previous) throws IOException { - TimestampWritable result = null; + TimestampWritableV2 result = null; if (valuePresent) { if (previous == null) { - result = new TimestampWritable(); + result = new TimestampWritableV2(); } else { - result = (TimestampWritable) previous; + result = (TimestampWritableV2) previous; } long millis = (data.next() + WriterImpl.BASE_TIMESTAMP) * WriterImpl.MILLIS_PER_SECOND; @@ -89,7 +89,7 @@ public Object next(Object previous) throws IOException { millis -= newNanos / 1000000; } Timestamp timestamp = result.getTimestamp(); - timestamp.setTime(millis); + timestamp.setTimeInMillis(millis); timestamp.setNanos(newNanos); result.set(timestamp); } diff --git a/src/main/java/com/facebook/hive/orc/lazy/OrcLazyRowObjectInspector.java b/src/main/java/com/facebook/hive/orc/lazy/OrcLazyRowObjectInspector.java index 3b53ac2..2651949 100644 --- a/src/main/java/com/facebook/hive/orc/lazy/OrcLazyRowObjectInspector.java +++ b/src/main/java/com/facebook/hive/orc/lazy/OrcLazyRowObjectInspector.java @@ -33,8 +33,8 @@ public class OrcLazyRowObjectInspector extends OrcLazyStructObjectInspector { public OrcLazyRowObjectInspector(StructTypeInfo info) { - ArrayList fieldNames = info.getAllStructFieldNames(); - ArrayList fieldTypes = info.getAllStructFieldTypeInfos(); + List fieldNames = info.getAllStructFieldNames(); + List fieldTypes = info.getAllStructFieldTypeInfos(); for(int i=0; i < fieldNames.size(); ++i) { fields.add(new Field(fieldNames.get(i), OrcLazyObjectInspectorUtils.createLazyObjectInspector(fieldTypes.get(i)), i)); @@ -53,7 +53,7 @@ public OrcLazyRowObjectInspector(int columnId, List types) { @Override public Object getStructFieldData(Object data, StructField fieldRef) { - int offset = ((Field) fieldRef).getOffset(); + int offset = fieldRef.getFieldID(); try { OrcLazyObject obj = ((OrcLazyRow) data).getFieldValue(offset); diff --git a/src/main/java/com/facebook/hive/orc/lazy/OrcLazyStructObjectInspector.java b/src/main/java/com/facebook/hive/orc/lazy/OrcLazyStructObjectInspector.java index 36f2de9..2bf58ee 100644 --- a/src/main/java/com/facebook/hive/orc/lazy/OrcLazyStructObjectInspector.java +++ b/src/main/java/com/facebook/hive/orc/lazy/OrcLazyStructObjectInspector.java @@ -41,8 +41,8 @@ protected OrcLazyStructObjectInspector() { } public OrcLazyStructObjectInspector(StructTypeInfo info) { - ArrayList fieldNames = info.getAllStructFieldNames(); - ArrayList fieldTypes = info.getAllStructFieldTypeInfos(); + List fieldNames = info.getAllStructFieldNames(); + List fieldTypes = info.getAllStructFieldTypeInfos(); fields = new ArrayList(fieldNames.size()); for(int i=0; i < fieldNames.size(); ++i) { fields.add(new Field(fieldNames.get(i), @@ -72,7 +72,7 @@ public Object getStructFieldData(Object data, StructField fieldRef) { return null; } - int offset = ((Field) fieldRef).getOffset(); + int offset = fieldRef.getFieldID(); OrcStruct struct; try { struct = (OrcStruct) ((OrcLazyStruct) data).materialize(); diff --git a/src/main/java/com/facebook/hive/orc/lazy/OrcLazyTimestamp.java b/src/main/java/com/facebook/hive/orc/lazy/OrcLazyTimestamp.java index 691ddf9..57366aa 100644 --- a/src/main/java/com/facebook/hive/orc/lazy/OrcLazyTimestamp.java +++ b/src/main/java/com/facebook/hive/orc/lazy/OrcLazyTimestamp.java @@ -20,7 +20,7 @@ package com.facebook.hive.orc.lazy; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; public class OrcLazyTimestamp extends OrcLazyObject { @@ -30,6 +30,6 @@ public OrcLazyTimestamp(LazyTimestampTreeReader treeReader) { public OrcLazyTimestamp(OrcLazyTimestamp copy) { super(copy); - previous = new TimestampWritable(((TimestampWritable)copy.previous)); + previous = new TimestampWritableV2(((TimestampWritableV2)copy.previous)); } } diff --git a/src/main/java/com/facebook/hive/orc/lazy/OrcLazyTimestampObjectInspector.java b/src/main/java/com/facebook/hive/orc/lazy/OrcLazyTimestampObjectInspector.java index eae37e1..e1a48ef 100644 --- a/src/main/java/com/facebook/hive/orc/lazy/OrcLazyTimestampObjectInspector.java +++ b/src/main/java/com/facebook/hive/orc/lazy/OrcLazyTimestampObjectInspector.java @@ -20,16 +20,15 @@ package com.facebook.hive.orc.lazy; -import java.sql.Timestamp; - -import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; public class OrcLazyTimestampObjectInspector extends - OrcLazyPrimitiveObjectInspector implements TimestampObjectInspector { + OrcLazyPrimitiveObjectInspector implements TimestampObjectInspector { protected OrcLazyTimestampObjectInspector() { super(PrimitiveObjectInspectorUtils.timestampTypeEntry); @@ -37,7 +36,7 @@ protected OrcLazyTimestampObjectInspector() { @Override public Timestamp getPrimitiveJavaObject(Object o) { - TimestampWritable writable = getPrimitiveWritableObject(o); + TimestampWritableV2 writable = getPrimitiveWritableObject(o); return writable == null ? null : writable.getTimestamp(); } diff --git a/src/test/java/com/facebook/hive/orc/TestInputOutputFormat.java b/src/test/java/com/facebook/hive/orc/TestInputOutputFormat.java index da39b42..f2d1aa3 100644 --- a/src/test/java/com/facebook/hive/orc/TestInputOutputFormat.java +++ b/src/test/java/com/facebook/hive/orc/TestInputOutputFormat.java @@ -33,7 +33,6 @@ import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.InputFormatChecker; -import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -100,7 +99,7 @@ public void testInOutFormat() throws Exception { ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } - SerDe serde = new OrcSerde(); + OrcSerde serde = new OrcSerde(); HiveOutputFormat outFormat = new OrcOutputFormat(); FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true, @@ -112,7 +111,7 @@ public void testInOutFormat() throws Exception { serde = new OrcSerde(); properties.setProperty("columns", "x,y"); properties.setProperty("columns.types", "int:int"); - serde.initialize(conf, properties); + serde.initialize(conf, properties, null); assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass()); inspector = (StructObjectInspector) serde.getObjectInspector(); assertEquals("struct", inspector.getTypeName()); @@ -215,7 +214,7 @@ public void testMROutput() throws Exception { ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } - SerDe serde = new OrcSerde(); + OrcSerde serde = new OrcSerde(); OutputFormat outFormat = new OrcOutputFormat(); RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), @@ -230,7 +229,7 @@ public void testMROutput() throws Exception { serde = new OrcSerde(); properties.setProperty("columns", "z,r"); properties.setProperty("columns.types", "int:struct"); - serde.initialize(conf, properties); + serde.initialize(conf, properties, null); inspector = (StructObjectInspector) serde.getObjectInspector(); InputFormat in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); @@ -273,8 +272,8 @@ public void testEmptyFile() throws Exception { writer.close(true); properties.setProperty("columns", "x,y"); properties.setProperty("columns.types", "int:int"); - SerDe serde = new OrcSerde(); - serde.initialize(conf, properties); + OrcSerde serde = new OrcSerde(); + serde.initialize(conf, properties, null); InputFormat in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); @@ -321,7 +320,7 @@ public void testDefaultTypes() throws Exception { ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } - SerDe serde = new OrcSerde(); + OrcSerde serde = new OrcSerde(); HiveOutputFormat outFormat = new OrcOutputFormat(); FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class, @@ -335,7 +334,7 @@ public void testDefaultTypes() throws Exception { writer.close(true); serde = new OrcSerde(); properties.setProperty("columns", "str,str2"); - serde.initialize(conf, properties); + serde.initialize(conf, properties, null); inspector = (StructObjectInspector) serde.getObjectInspector(); assertEquals("struct", inspector.getTypeName()); InputFormat in = new OrcInputFormat(); diff --git a/src/test/java/com/facebook/hive/orc/TestOrcFile.java b/src/test/java/com/facebook/hive/orc/TestOrcFile.java index ef92461..e36181e 100644 --- a/src/test/java/com/facebook/hive/orc/TestOrcFile.java +++ b/src/test/java/com/facebook/hive/orc/TestOrcFile.java @@ -33,7 +33,6 @@ import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; -import java.sql.Timestamp; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -43,10 +42,11 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -811,14 +811,14 @@ public void testUnionAndTimestamp() throws Exception { assertEquals("struct>", inspector.getTypeName()); assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"), - ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp()); + ((TimestampWritableV2) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp()); union = (OrcUnion) ((OrcLazyUnion) row.getFieldValue(1)).materialize(); assertEquals(0, union.getTag()); assertEquals(new IntWritable(42), union.getObject()); lazyRow = (OrcLazyStruct) rows.next(lazyRow); row = (OrcStruct) lazyRow.materialize(); assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"), - ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp()); + ((TimestampWritableV2) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp()); ((OrcLazyUnion) row.getFieldValue(1)).materialize(); assertEquals(1, union.getTag()); assertEquals(new Text("hello"), union.getObject()); @@ -841,14 +841,14 @@ public void testUnionAndTimestamp() throws Exception { lazyRow = (OrcLazyStruct) rows.next(lazyRow); row = (OrcStruct) lazyRow.materialize(); assertEquals(Timestamp.valueOf("1900-01-01 00:00:00"), - ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp()); + ((TimestampWritableV2) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp()); ((OrcLazyUnion) row.getFieldValue(1)).materialize(); assertEquals(new IntWritable(200000), union.getObject()); for(int i=1900; i < 2200; ++i) { lazyRow = (OrcLazyStruct) rows.next(lazyRow); row = (OrcStruct) lazyRow.materialize(); assertEquals(Timestamp.valueOf(i + "-05-05 12:34:56." + i), - ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp()); + ((TimestampWritableV2) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp()); ((OrcLazyUnion) row.getFieldValue(1)).materialize(); if ((i & 1) == 0) { assertEquals(0, union.getTag());