Skip to content

Commit

Permalink
fix(csv): correct orc with csv field
Browse files Browse the repository at this point in the history
  • Loading branch information
frischHWC committed Jun 7, 2022
1 parent a72c801 commit de22c65
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<!-- Check your EXACT version of CDP as all platform jars depends on it -->
<cdp.version>7.1.7.1000-141</cdp.version>
<cdp.version>7.1.7.0-551</cdp.version>
</properties>

<!-- Cloudera repository needed to get good CDP versions of jars -->
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.cloudera.frisch.randomdatagen.model;

import com.cloudera.frisch.randomdatagen.model.type.CityField;
import com.cloudera.frisch.randomdatagen.model.type.CsvField;
import com.cloudera.frisch.randomdatagen.model.type.Field;
import com.cloudera.frisch.randomdatagen.sink.KafkaSink;
import com.cloudera.frisch.randomdatagen.sink.storedobjects.OzoneObject;
Expand Down Expand Up @@ -195,6 +196,12 @@ public void fillinOrcVector(int rowNumber, Map<String, ? extends ColumnVector> v
CityField.City valueAsCity = (CityField.City) values.get(field);
bytesColumnVectorCity.setVal(rowNumber, valueAsCity.getName().getBytes(StandardCharsets.UTF_8));
break;
case "CsvField":
BytesColumnVector bytesColumnVectorCsv = (BytesColumnVector) cv;
Map<String, String> valueAsCsv = (Map<String, String>) values.get(field);
CsvField csvField = (CsvField) model.getFieldFromName(field);
bytesColumnVectorCsv.setVal(rowNumber, valueAsCsv.get(csvField.getMainField()).getBytes(StandardCharsets.UTF_8));
break;
case "BirthdateField":
BytesColumnVector bytesColumnVectorDate = (BytesColumnVector) cv;
LocalDate valueDate = (LocalDate) values.get(field);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.cloudera.frisch.randomdatagen.model.type;

import lombok.Getter;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
Expand All @@ -24,6 +25,7 @@ public class CsvField extends Field<Map<String, String>> {

// We suppose that each row of the CSV read will fit in a map of string to string (everything is converted to a string)
private String file;
@Getter
private String mainField;
private LinkedList<String> columnNames;

Expand Down

0 comments on commit de22c65

Please sign in to comment.