From de22c65be490e80b1c52989a6c5cb5871d3ec3a6 Mon Sep 17 00:00:00 2001 From: frisch Date: Tue, 7 Jun 2022 15:00:32 +0200 Subject: [PATCH] fix(csv): correct orc with csv field --- pom.xml | 2 +- .../java/com/cloudera/frisch/randomdatagen/model/Row.java | 7 +++++++ .../cloudera/frisch/randomdatagen/model/type/CsvField.java | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index c977842..482529c 100644 --- a/pom.xml +++ b/pom.xml @@ -12,7 +12,7 @@ 11 11 - 7.1.7.1000-141 + 7.1.7.0-551 diff --git a/src/main/java/com/cloudera/frisch/randomdatagen/model/Row.java b/src/main/java/com/cloudera/frisch/randomdatagen/model/Row.java index 6d822b4..d076643 100644 --- a/src/main/java/com/cloudera/frisch/randomdatagen/model/Row.java +++ b/src/main/java/com/cloudera/frisch/randomdatagen/model/Row.java @@ -1,6 +1,7 @@ package com.cloudera.frisch.randomdatagen.model; import com.cloudera.frisch.randomdatagen.model.type.CityField; +import com.cloudera.frisch.randomdatagen.model.type.CsvField; import com.cloudera.frisch.randomdatagen.model.type.Field; import com.cloudera.frisch.randomdatagen.sink.KafkaSink; import com.cloudera.frisch.randomdatagen.sink.storedobjects.OzoneObject; @@ -195,6 +196,12 @@ public void fillinOrcVector(int rowNumber, Map v CityField.City valueAsCity = (CityField.City) values.get(field); bytesColumnVectorCity.setVal(rowNumber, valueAsCity.getName().getBytes(StandardCharsets.UTF_8)); break; + case "CsvField": + BytesColumnVector bytesColumnVectorCsv = (BytesColumnVector) cv; + Map valueAsCsv = (Map) values.get(field); + CsvField csvField = (CsvField) model.getFieldFromName(field); + bytesColumnVectorCsv.setVal(rowNumber, valueAsCsv.get(csvField.getMainField()).getBytes(StandardCharsets.UTF_8)); + break; case "BirthdateField": BytesColumnVector bytesColumnVectorDate = (BytesColumnVector) cv; LocalDate valueDate = (LocalDate) values.get(field); diff --git a/src/main/java/com/cloudera/frisch/randomdatagen/model/type/CsvField.java b/src/main/java/com/cloudera/frisch/randomdatagen/model/type/CsvField.java index d84b32d..c39d59a 100644 --- a/src/main/java/com/cloudera/frisch/randomdatagen/model/type/CsvField.java +++ b/src/main/java/com/cloudera/frisch/randomdatagen/model/type/CsvField.java @@ -1,5 +1,6 @@ package com.cloudera.frisch.randomdatagen.model.type; +import lombok.Getter; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; @@ -24,6 +25,7 @@ public class CsvField extends Field> { // We suppose that each row of the CSV read will fit in a map of string to string (everything is converted to a string) private String file; + @Getter private String mainField; private LinkedList columnNames;