diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index c05eed9cd..84046688f 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -749,6 +749,13 @@ public enum CoreError implements ScalarDbError { ""), DATA_LOADER_MISSING_COLUMN( Category.USER_ERROR, "0168", "Missing field or column mapping for %s", "", ""), + DATA_LOADER_MISSING_SOURCE_FIELD( + Category.USER_ERROR, + "0169", + "The data mapping source field '%s' for table '%s' is missing in the json data record", + "", + ""), + // // Errors for the concurrency error category // diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java new file mode 100644 index 000000000..d90fd49b6 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java @@ -0,0 +1,14 @@ +package com.scalar.db.dataloader.core; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; + +public class DataLoaderObjectMapper extends ObjectMapper { + + public DataLoaderObjectMapper() { + super(); + this.setSerializationInclusion(JsonInclude.Include.NON_NULL); + this.registerModule(new JavaTimeModule()); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportEventListener.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportEventListener.java new file mode 100644 index 000000000..10157569b --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportEventListener.java @@ -0,0 +1,23 @@ +package com.scalar.db.dataloader.core.dataimport; + +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTaskResult; +import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchResult; +import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchStatus; + +public interface ImportEventListener { + + void onDataChunkStarted(ImportDataChunkStatus status); + + void addOrUpdateDataChunkStatus(ImportDataChunkStatus status); + + void onDataChunkCompleted(ImportDataChunkStatus status); + + void onAllDataChunksCompleted(); + + void onTransactionBatchStarted(ImportTransactionBatchStatus batchStatus); + + void onTransactionBatchCompleted(ImportTransactionBatchResult batchResult); + + void onTaskComplete(ImportTaskResult taskResult); +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java new file mode 100644 index 000000000..1815c9bf1 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java @@ -0,0 +1,139 @@ +package com.scalar.db.dataloader.core.dataimport; + +import com.scalar.db.api.*; +import com.scalar.db.dataloader.core.ScalarDBMode; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDao; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.processor.ImportProcessor; +import com.scalar.db.dataloader.core.dataimport.processor.ImportProcessorFactory; +import com.scalar.db.dataloader.core.dataimport.processor.ImportProcessorParams; +import com.scalar.db.dataloader.core.dataimport.processor.TableColumnDataTypes; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTaskResult; +import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchResult; +import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchStatus; +import java.io.BufferedReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.NonNull; + +@AllArgsConstructor +public class ImportManager implements ImportEventListener { + + @NonNull private final Map tableMetadata; + @NonNull private final BufferedReader importFileReader; + @NonNull private final ImportOptions importOptions; + private final ImportProcessorFactory importProcessorFactory; + private final List listeners = new ArrayList<>(); + private final ScalarDBMode scalarDBMode; + private final DistributedStorage distributedStorage; + private final DistributedTransactionManager distributedTransactionManager; + private final List importDataChunkStatusList = new ArrayList<>(); + + /** + * * Start the import process + * + * @return list of import data chunk status objects + */ + public List startImport() { + ImportProcessorParams params = + ImportProcessorParams.builder() + .scalarDBMode(scalarDBMode) + .importOptions(importOptions) + .tableMetadataByTableName(tableMetadata) + .dao(new ScalarDBDao()) + .distributedTransactionManager(distributedTransactionManager) + .distributedStorage(distributedStorage) + .tableColumnDataTypes(getTableColumnDataTypes()) + .build(); + ImportProcessor processor = importProcessorFactory.createImportProcessor(params); + processor.addListener(this); + // If the data chunk size is 0, then process the entire file in a single data chunk + int dataChunkSize = + importOptions.getDataChunkSize() == 0 + ? Integer.MAX_VALUE + : importOptions.getDataChunkSize(); + return processor.process( + dataChunkSize, importOptions.getTransactionBatchSize(), importFileReader); + } + + public void addListener(ImportEventListener listener) { + listeners.add(listener); + } + + public void removeListener(ImportEventListener listener) { + listeners.remove(listener); + } + + @Override + public void onDataChunkStarted(ImportDataChunkStatus status) { + for (ImportEventListener listener : listeners) { + listener.onDataChunkStarted(status); + } + } + + @Override + public void addOrUpdateDataChunkStatus(ImportDataChunkStatus status) { + synchronized (importDataChunkStatusList) { + for (int i = 0; i < importDataChunkStatusList.size(); i++) { + if (importDataChunkStatusList.get(i).getDataChunkId() == status.getDataChunkId()) { + // Object found, replace it with the new one + importDataChunkStatusList.set(i, status); + return; + } + } + // If object is not found, add it to the list + importDataChunkStatusList.add(status); + } + } + + @Override + public void onDataChunkCompleted(ImportDataChunkStatus status) { + for (ImportEventListener listener : listeners) { + listener.onDataChunkCompleted(status); + } + } + + @Override + public void onTransactionBatchStarted(ImportTransactionBatchStatus status) { + for (ImportEventListener listener : listeners) { + listener.onTransactionBatchStarted(status); + } + } + + @Override + public void onTransactionBatchCompleted(ImportTransactionBatchResult batchResult) { + for (ImportEventListener listener : listeners) { + listener.onTransactionBatchCompleted(batchResult); + } + } + + @Override + public void onTaskComplete(ImportTaskResult taskResult) { + for (ImportEventListener listener : listeners) { + listener.onTaskComplete(taskResult); + } + } + + @Override + public void onAllDataChunksCompleted() { + for (ImportEventListener listener : listeners) { + listener.onAllDataChunksCompleted(); + } + } + + public List getImportDataChunkStatusList() { + return importDataChunkStatusList; + } + + public TableColumnDataTypes getTableColumnDataTypes() { + TableColumnDataTypes tableColumnDataTypes = new TableColumnDataTypes(); + tableMetadata.forEach( + (name, metadata) -> + metadata + .getColumnDataTypes() + .forEach((k, v) -> tableColumnDataTypes.addColumnDataType(name, k, v))); + return tableColumnDataTypes; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java new file mode 100644 index 000000000..01f1dbcf1 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -0,0 +1,141 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.dataloader.core.DataLoaderObjectMapper; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; +import java.io.BufferedReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; + +public class CsvImportProcessor extends ImportProcessor { + private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); + private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); + + public CsvImportProcessor(ImportProcessorParams params) { + super(params); + } + + /** + * Process the data from the import file + * + * @param dataChunkSize size of data chunk + * @param transactionBatchSize size of transaction batch + * @param reader reader which reads the source file + * @return process data chunk status list + */ + @Override + public List process( + int dataChunkSize, int transactionBatchSize, BufferedReader reader) { + int numCores = Runtime.getRuntime().availableProcessors(); + ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); + // Create a queue to hold data batches + Queue dataChunkQueue = new LinkedList<>(); + Thread readerThread = + new Thread( + () -> { + try { + String header = params.getImportOptions().getCustomHeaderRow(); + String delimiter = Character.toString(params.getImportOptions().getDelimiter()); + if (delimiter.trim().isEmpty()) { + delimiter = ","; + } + if (header == null) { + header = reader.readLine(); + } + String[] headerArray = header.split(delimiter); + String line; + int rowNumber = 1; + List currentDataChunk = new ArrayList<>(); + while ((line = reader.readLine()) != null) { + String[] dataArray = line.split(delimiter); + if (headerArray.length != dataArray.length) { + // Throw a custom exception for related issue + throw new RuntimeException(); + } + JsonNode jsonNode = combineHeaderAndData(headerArray, dataArray); + if (jsonNode == null || jsonNode.isEmpty()) { + continue; + } + + ImportRow importRow = new ImportRow(rowNumber, jsonNode); + currentDataChunk.add(importRow); + // If the data chunk is full, add it to the queue + if (currentDataChunk.size() == dataChunkSize) { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + ImportDataChunk importDataChunk = + ImportDataChunk.builder() + .dataChunkId(dataChunkId) + .sourceData(currentDataChunk) + .build(); + dataChunkQueue.offer(importDataChunk); + currentDataChunk = new ArrayList<>(); + } + rowNumber++; + } + + // Add the last data chunk to the queue + if (!currentDataChunk.isEmpty()) { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + ImportDataChunk importDataChunk = + ImportDataChunk.builder() + .dataChunkId(dataChunkId) + .sourceData(currentDataChunk) + .build(); + dataChunkQueue.offer(importDataChunk); + } + + } catch (IOException e) { + throw new RuntimeException(); + } + }); + + readerThread.start(); + try { + // Wait for readerThread to finish + readerThread.join(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + // Process data chunks in parallel + List> dataChunkFutures = new ArrayList<>(); + while (!dataChunkQueue.isEmpty()) { + ImportDataChunk dataChunk = dataChunkQueue.poll(); + Future dataChunkFuture = + dataChunkExecutor.submit( + () -> processDataChunk(dataChunk, transactionBatchSize, numCores)); + dataChunkFutures.add(dataChunkFuture); + } + + List importDataChunkStatusList = new ArrayList<>(); + // Wait for all data chunk threads to complete + for (Future dataChunkFuture : dataChunkFutures) { + try { + importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); + } catch (Exception e) { + // TODO: handle the exception + e.printStackTrace(); + } + } + dataChunkExecutor.shutdown(); + notifyAllDataChunksCompleted(); + return importDataChunkStatusList; + } + + private JsonNode combineHeaderAndData(String[] header, String[] data) { + ObjectNode objectNode = OBJECT_MAPPER.createObjectNode(); + for (int i = 0; i < header.length; i++) { + objectNode.put(header[i], data[i]); + } + return objectNode; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java new file mode 100644 index 000000000..30c1c2608 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java @@ -0,0 +1,29 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +public class DefaultImportProcessorFactory implements ImportProcessorFactory { + + /** + * Create import processor object based in file format in import params + * + * @param params import processor params objects + * @return generated import processor object + */ + @Override + public ImportProcessor createImportProcessor(ImportProcessorParams params) { + ImportProcessor importProcessor; + switch (params.getImportOptions().getFileFormat()) { + case JSONL: + importProcessor = new JsonLinesImportProcessor(params); + break; + case JSON: + importProcessor = new JsonImportProcessor(params); + break; + case CSV: + importProcessor = new CsvImportProcessor(params); + break; + default: + importProcessor = null; + } + return importProcessor; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java new file mode 100644 index 000000000..2d16b9d18 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java @@ -0,0 +1,414 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import com.scalar.db.api.DistributedTransaction; +import com.scalar.db.dataloader.core.ScalarDBMode; +import com.scalar.db.dataloader.core.dataimport.ImportEventListener; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatusState; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; +import com.scalar.db.dataloader.core.dataimport.task.ImportStorageTask; +import com.scalar.db.dataloader.core.dataimport.task.ImportTaskParams; +import com.scalar.db.dataloader.core.dataimport.task.ImportTransactionalTask; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTargetResultStatus; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTaskResult; +import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatch; +import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchResult; +import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchStatus; +import com.scalar.db.exception.transaction.TransactionException; +import java.io.BufferedReader; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; +import lombok.RequiredArgsConstructor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@RequiredArgsConstructor +public abstract class ImportProcessor { + + final ImportProcessorParams params; + private static final Logger LOGGER = LoggerFactory.getLogger(ImportProcessor.class); + private final List listeners = new ArrayList<>(); + + /** + * * Process the source data from import file + * + * @param dataChunkSize size of data chunk + * @param transactionBatchSize size of transaction batch + * @param reader reader which reads the source file + * @return list of import data chunk status objects + */ + public List process( + int dataChunkSize, int transactionBatchSize, BufferedReader reader) { + return Collections.emptyList(); + } + + /** + * Add import event listener to listener list + * + * @param listener import event listener + */ + public void addListener(ImportEventListener listener) { + listeners.add(listener); + } + + /** + * Remove import event listener from listener list + * + * @param listener import event listener + */ + public void removeListener(ImportEventListener listener) { + listeners.remove(listener); + } + + /** + * Notify once the task is completed + * + * @param result task result object + */ + protected void notifyStorageRecordCompleted(ImportTaskResult result) { + // Add data to summary, success logs with/without raw data + for (ImportEventListener listener : listeners) { + listener.onTaskComplete(result); + } + } + + /** + * Notify once the data chunk process is started + * + * @param status data chunk status object + */ + protected void notifyDataChunkStarted(ImportDataChunkStatus status) { + for (ImportEventListener listener : listeners) { + listener.onDataChunkStarted(status); + listener.addOrUpdateDataChunkStatus(status); + } + } + + /** + * Notify once the data chunk process is completed + * + * @param status data chunk status object + */ + protected void notifyDataChunkCompleted(ImportDataChunkStatus status) { + for (ImportEventListener listener : listeners) { + listener.onDataChunkCompleted(status); + listener.addOrUpdateDataChunkStatus(status); + } + } + + /** + * Notify once the import transaction batch is started + * + * @param batchStatus import transaction batch status object + */ + protected void notifyTransactionBatchStarted(ImportTransactionBatchStatus batchStatus) { + for (ImportEventListener listener : listeners) { + listener.onTransactionBatchStarted(batchStatus); + } + } + + /** + * Notify once the import transaction batch is completed + * + * @param batchResult import transaction batch result object + */ + protected void notifyTransactionBatchCompleted(ImportTransactionBatchResult batchResult) { + for (ImportEventListener listener : listeners) { + listener.onTransactionBatchCompleted(batchResult); + } + } + + /** Notify when all data chunks processes are completed */ + protected void notifyAllDataChunksCompleted() { + for (ImportEventListener listener : listeners) { + listener.onAllDataChunksCompleted(); + } + } + + /** + * Split the data chunk into transaction batches + * + * @param dataChunk data chunk object + * @param batchSize batch size + * @return created list of transaction batches + */ + private List splitIntoTransactionBatches( + ImportDataChunk dataChunk, int batchSize) { + List transactionBatches = new ArrayList<>(); + AtomicInteger transactionBatchIdCounter = new AtomicInteger(0); + + List importRows = dataChunk.getSourceData(); + for (int i = 0; i < importRows.size(); i += batchSize) { + int endIndex = Math.min(i + batchSize, importRows.size()); + List transactionBatchData = importRows.subList(i, endIndex); + int transactionBatchId = transactionBatchIdCounter.getAndIncrement(); + ImportTransactionBatch transactionBatch = + ImportTransactionBatch.builder() + .transactionBatchId(transactionBatchId) + .sourceData(transactionBatchData) + .build(); + transactionBatches.add(transactionBatch); + } + return transactionBatches; + } + + /** + * To process a transaction batch and return the result + * + * @param dataChunk data chunk object + * @param transactionBatch transaction batch object + * @return processed transaction batch result + */ + private ImportTransactionBatchResult processTransactionBatch( + ImportDataChunk dataChunk, ImportTransactionBatch transactionBatch) { + ImportTransactionBatchStatus status = + ImportTransactionBatchStatus.builder() + .dataChunkId(dataChunk.getDataChunkId()) + .transactionBatchId(transactionBatch.getTransactionBatchId()) + .build(); + notifyTransactionBatchStarted(status); + List importRecordResult = new ArrayList<>(); + boolean isSuccess; + String error = ""; + try { + // Create the ScalarDB transaction + DistributedTransaction transaction = params.getDistributedTransactionManager().start(); + + // Loop over the transaction batch and process each record + for (ImportRow importRow : transactionBatch.getSourceData()) { + ImportTaskParams taskParams = + ImportTaskParams.builder() + .sourceRecord(importRow.getSourceData()) + .dataChunkId(dataChunk.getDataChunkId()) + .rowNumber(importRow.getRowNumber()) + .importOptions(params.getImportOptions()) + .tableColumnDataTypes(params.getTableColumnDataTypes()) + .tableMetadataByTableName(params.getTableMetadataByTableName()) + .dao(params.getDao()) + .build(); + importRecordResult.add(new ImportTransactionalTask(taskParams, transaction).execute()); + } + isSuccess = + importRecordResult.stream() + .allMatch( + importTaskResult -> + importTaskResult.getTargets().stream() + .allMatch( + targetResult -> + targetResult.getStatus().equals(ImportTargetResultStatus.SAVED))); + + // Check and Commit the transaction + if (isSuccess) { + transaction.commit(); + } else { + transaction.abort(); + error = "All transactions are aborted"; + } + + } catch (TransactionException e) { + isSuccess = false; + LOGGER.error(e.getMessage()); + } + ImportTransactionBatchResult importTransactionBatchResult = + ImportTransactionBatchResult.builder() + .transactionBatchId(transactionBatch.getTransactionBatchId()) + .success(isSuccess) + .dataChunkId(dataChunk.getDataChunkId()) + .records(importRecordResult) + .errors(Collections.singletonList(error)) + .build(); + notifyTransactionBatchCompleted(importTransactionBatchResult); + return importTransactionBatchResult; + } + + /** + * @param dataChunk data chunk object + * @param importRow data row object + * @return thr task result after processing the row data + */ + private ImportTaskResult processStorageRecord(ImportDataChunk dataChunk, ImportRow importRow) { + ImportTaskParams taskParams = + ImportTaskParams.builder() + .sourceRecord(importRow.getSourceData()) + .dataChunkId(dataChunk.getDataChunkId()) + .rowNumber(importRow.getRowNumber()) + .importOptions(params.getImportOptions()) + .tableColumnDataTypes(params.getTableColumnDataTypes()) + .tableMetadataByTableName(params.getTableMetadataByTableName()) + .dao(params.getDao()) + .build(); + ImportTaskResult importRecordResult = + new ImportStorageTask(taskParams, params.getDistributedStorage()).execute(); + + ImportTaskResult modifiedTaskResult = + ImportTaskResult.builder() + .rowNumber(importRecordResult.getRowNumber()) + .rawRecord(importRecordResult.getRawRecord()) + .targets(importRecordResult.getTargets()) + .dataChunkId(dataChunk.getDataChunkId()) + .build(); + notifyStorageRecordCompleted(modifiedTaskResult); + return modifiedTaskResult; + } + + /** + * Process data chunk data + * + * @param dataChunk data chunk object + * @param transactionBatchSize transaction batch size + * @param numCores num of cpu cores + * @return import data chunk status object after processing the data chunk + */ + protected ImportDataChunkStatus processDataChunk( + ImportDataChunk dataChunk, int transactionBatchSize, int numCores) { + ImportDataChunkStatus status = + ImportDataChunkStatus.builder() + .dataChunkId(dataChunk.getDataChunkId()) + .startTime(Instant.now()) + .status(ImportDataChunkStatusState.IN_PROGRESS) + .build(); + notifyDataChunkStarted(status); + ImportDataChunkStatus importDataChunkStatus; + if (params.getScalarDBMode() == ScalarDBMode.TRANSACTION) { + importDataChunkStatus = + processDataChunkWithTransactions(dataChunk, transactionBatchSize, numCores); + } else { + importDataChunkStatus = processDataChunkWithoutTransactions(dataChunk, numCores); + } + notifyDataChunkCompleted(importDataChunkStatus); + return importDataChunkStatus; + } + + /** + * Process data chunk data with transactions + * + * @param dataChunk data chunk object + * @param transactionBatchSize transaction batch size + * @param numCores num of cpu cores + * @return import data chunk status object after processing the data chunk + */ + private ImportDataChunkStatus processDataChunkWithTransactions( + ImportDataChunk dataChunk, int transactionBatchSize, int numCores) { + Instant startTime = Instant.now(); + List transactionBatches = + splitIntoTransactionBatches(dataChunk, transactionBatchSize); + ExecutorService transactionBatchExecutor = + Executors.newFixedThreadPool(Math.min(transactionBatches.size(), numCores)); + List> transactionBatchFutures = new ArrayList<>(); + AtomicInteger successCount = new AtomicInteger(0); + AtomicInteger failureCount = new AtomicInteger(0); + for (ImportTransactionBatch transactionBatch : transactionBatches) { + Future transactionBatchFuture = + transactionBatchExecutor.submit( + () -> processTransactionBatch(dataChunk, transactionBatch)); + transactionBatchFutures.add(transactionBatchFuture); + } + + waitForFuturesToComplete(transactionBatchFutures); + transactionBatchExecutor.shutdown(); + transactionBatchFutures.forEach( + batchResult -> { + try { + ImportTransactionBatchResult importTransactionBatchResult = + (ImportTransactionBatchResult) batchResult.get(); + importTransactionBatchResult + .getRecords() + .forEach( + batchRecords -> { + if (batchRecords.getTargets().stream() + .allMatch( + targetResult -> + targetResult + .getStatus() + .equals(ImportTargetResultStatus.SAVED))) { + successCount.incrementAndGet(); + } else { + failureCount.incrementAndGet(); + } + }); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }); + Instant endTime = Instant.now(); + int totalDuration = (int) Duration.between(startTime, endTime).toMillis(); + return ImportDataChunkStatus.builder() + .dataChunkId(dataChunk.getDataChunkId()) + .failureCount(failureCount.get()) + .successCount(successCount.get()) + .totalRecords(dataChunk.getSourceData().size()) + .batchCount(transactionBatches.size()) + .status(ImportDataChunkStatusState.COMPLETE) + .startTime(startTime) + .endTime(endTime) + .totalDurationInMilliSeconds(totalDuration) + .build(); + } + + /** + * Process data chunk data without transactions + * + * @param dataChunk data chunk object + * @param numCores num of cpu cores + * @return import data chunk status object after processing the data chunk + */ + private ImportDataChunkStatus processDataChunkWithoutTransactions( + ImportDataChunk dataChunk, int numCores) { + Instant startTime = Instant.now(); + AtomicInteger successCount = new AtomicInteger(0); + AtomicInteger failureCount = new AtomicInteger(0); + ExecutorService recordExecutor = Executors.newFixedThreadPool(numCores); + List> recordFutures = new ArrayList<>(); + for (ImportRow importRow : dataChunk.getSourceData()) { + Future recordFuture = + recordExecutor.submit(() -> processStorageRecord(dataChunk, importRow)); + recordFutures.add(recordFuture); + } + waitForFuturesToComplete(recordFutures); + recordExecutor.shutdown(); + recordFutures.forEach( + r -> { + try { + ImportTaskResult result = (ImportTaskResult) r.get(); + boolean allSaved = + result.getTargets().stream() + .allMatch(t -> t.getStatus().equals(ImportTargetResultStatus.SAVED)); + if (allSaved) successCount.incrementAndGet(); + else failureCount.incrementAndGet(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }); + Instant endTime = Instant.now(); + int totalDuration = (int) Duration.between(startTime, endTime).toMillis(); + return ImportDataChunkStatus.builder() + .dataChunkId(dataChunk.getDataChunkId()) + .totalRecords(dataChunk.getSourceData().size()) + .successCount(successCount.get()) + .failureCount(failureCount.get()) + .startTime(startTime) + .endTime(endTime) + .totalDurationInMilliSeconds(totalDuration) + .status(ImportDataChunkStatusState.COMPLETE) + .build(); + } + + private void waitForFuturesToComplete(List> futures) { + for (Future future : futures) { + try { + future.get(); + } catch (Exception e) { + LOGGER.error(e.getMessage()); + } + } + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorFactory.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorFactory.java new file mode 100644 index 000000000..e953b1222 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorFactory.java @@ -0,0 +1,5 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +public interface ImportProcessorFactory { + ImportProcessor createImportProcessor(ImportProcessorParams params); +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorParams.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorParams.java new file mode 100644 index 000000000..632b1dc24 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorParams.java @@ -0,0 +1,23 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import com.scalar.db.api.DistributedStorage; +import com.scalar.db.api.DistributedTransactionManager; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.ScalarDBMode; +import com.scalar.db.dataloader.core.dataimport.ImportOptions; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDao; +import java.util.Map; +import lombok.Builder; +import lombok.Value; + +@Builder +@Value +public class ImportProcessorParams { + ScalarDBMode scalarDBMode; + ImportOptions importOptions; + Map tableMetadataByTableName; + TableColumnDataTypes tableColumnDataTypes; + ScalarDBDao dao; + DistributedStorage distributedStorage; + DistributedTransactionManager distributedTransactionManager; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java new file mode 100644 index 000000000..c02fa625b --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java @@ -0,0 +1,136 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; +import com.fasterxml.jackson.databind.JsonNode; +import com.scalar.db.dataloader.core.DataLoaderObjectMapper; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; +import java.io.BufferedReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; + +public class JsonImportProcessor extends ImportProcessor { + + private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); + private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); + + public JsonImportProcessor(ImportProcessorParams params) { + super(params); + } + + /** + * Process the data from the import file + * + * @param dataChunkSize size of data chunk + * @param transactionBatchSize size of transaction batch + * @param reader reader which reads the source file + * @return process data chunk status list + */ + @Override + public List process( + int dataChunkSize, int transactionBatchSize, BufferedReader reader) { + // Set the number of threads based on the available CPU cores + int numCores = Runtime.getRuntime().availableProcessors(); + + // Create a thread pool for processing data batches + ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); + + // Create a queue to hold data batches + Queue dataChunkQueue = new LinkedList<>(); + + // Create a thread to read JSON lines and populate data batches + Thread readerThread = + new Thread( + () -> { + try (JsonParser jsonParser = new JsonFactory().createParser(reader)) { + if (jsonParser.nextToken() != JsonToken.START_ARRAY) { + throw new IOException("Expected content to be an array"); + } + + List currentDataChunk = new ArrayList<>(); + int rowNumber = 1; + while (jsonParser.nextToken() != JsonToken.END_ARRAY) { + JsonNode jsonNode = OBJECT_MAPPER.readTree(jsonParser); + // TODO: do something with the null jsonNode + if (jsonNode == null || jsonNode.isEmpty()) { + continue; + } + + ImportRow importRow = new ImportRow(rowNumber, jsonNode); + + currentDataChunk.add(importRow); + + // If the data chunk is full, add it to the queue + if (currentDataChunk.size() == dataChunkSize) { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + ImportDataChunk importDataChunk = + ImportDataChunk.builder() + .dataChunkId(dataChunkId) + .sourceData(currentDataChunk) + .build(); + dataChunkQueue.offer(importDataChunk); + currentDataChunk = new ArrayList<>(); + } + + rowNumber++; + } + + // Add the last data chunk to the queue + if (!currentDataChunk.isEmpty()) { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + ImportDataChunk importDataChunk = + ImportDataChunk.builder() + .dataChunkId(dataChunkId) + .sourceData(currentDataChunk) + .build(); + dataChunkQueue.offer(importDataChunk); + } + } catch (IOException e) { + // TODO: handle this exception + throw new RuntimeException(e); + } + }); + readerThread.start(); + + try { + // Wait for readerThread to finish + readerThread.join(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + System.err.println("Main thread was interrupted."); + } + + // Process data chunks in parallel + List> dataChunkFutures = new ArrayList<>(); + while (!dataChunkQueue.isEmpty()) { + ImportDataChunk dataChunk = dataChunkQueue.poll(); + Future dataChunkFuture = + dataChunkExecutor.submit( + () -> processDataChunk(dataChunk, transactionBatchSize, numCores)); + dataChunkFutures.add(dataChunkFuture); + } + List importDataChunkStatusList = new ArrayList<>(); + // Wait for all data chunk threads to complete + for (Future dataChunkFuture : dataChunkFutures) { + try { + importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); + } catch (Exception e) { + e.printStackTrace(); + } + } + + dataChunkExecutor.shutdown(); + notifyAllDataChunksCompleted(); + return importDataChunkStatusList; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java new file mode 100644 index 000000000..b63f897cb --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java @@ -0,0 +1,126 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import com.fasterxml.jackson.databind.JsonNode; +import com.scalar.db.dataloader.core.DataLoaderObjectMapper; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; +import java.io.BufferedReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; + +public class JsonLinesImportProcessor extends ImportProcessor { + + private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); + private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); + + public JsonLinesImportProcessor(ImportProcessorParams params) { + super(params); + } + + /** + * Process the data from the import file + * + * @param dataChunkSize size of data chunk + * @param transactionBatchSize size of transaction batch + * @param reader reader which reads the source file + * @return process data chunk status list + */ + @Override + public List process( + int dataChunkSize, int transactionBatchSize, BufferedReader reader) { + int numCores = Runtime.getRuntime().availableProcessors(); + + // Create a thread pool for processing data batches + ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); + + // Create a queue to hold data batches + Queue dataChunkQueue = new LinkedList<>(); + + // Create a thread to read JSON lines and populate data batches + Thread readerThread = + new Thread( + () -> { + try { + List currentDataChunk = new ArrayList<>(); + int rowNumber = 1; + String line; + while ((line = reader.readLine()) != null) { + JsonNode jsonNode = OBJECT_MAPPER.readTree(line); + // TODO: do something with the null jsonNode + if (jsonNode == null || jsonNode.isEmpty()) { + continue; + } + + ImportRow importRow = new ImportRow(rowNumber, jsonNode); + currentDataChunk.add(importRow); + + // If the data chunk is full, add it to the queue + if (currentDataChunk.size() == dataChunkSize) { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + ImportDataChunk importDataChunk = + ImportDataChunk.builder() + .dataChunkId(dataChunkId) + .sourceData(currentDataChunk) + .build(); + dataChunkQueue.offer(importDataChunk); + currentDataChunk = new ArrayList<>(); + } + rowNumber++; + } + + // Add the last data chunk to the queue + if (!currentDataChunk.isEmpty()) { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + ImportDataChunk importDataChunk = + ImportDataChunk.builder() + .dataChunkId(dataChunkId) + .sourceData(currentDataChunk) + .build(); + dataChunkQueue.offer(importDataChunk); + } + } catch (IOException e) { + // TODO: handle this exception + throw new RuntimeException(e); + } + }); + readerThread.start(); + try { + // Wait for readerThread to finish + readerThread.join(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + System.err.println("Main thread was interrupted."); + } + // Process data chunks in parallel + List> dataChunkFutures = new ArrayList<>(); + while (!dataChunkQueue.isEmpty()) { + ImportDataChunk dataChunk = dataChunkQueue.poll(); + Future dataChunkFuture = + dataChunkExecutor.submit( + () -> processDataChunk(dataChunk, transactionBatchSize, numCores)); + dataChunkFutures.add(dataChunkFuture); + } + + List importDataChunkStatusList = new ArrayList<>(); + // Wait for all data chunk threads to complete + for (Future dataChunkFuture : dataChunkFutures) { + try { + importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); + } catch (Exception e) { + // TODO: handle the exception + e.printStackTrace(); + } + } + dataChunkExecutor.shutdown(); + notifyAllDataChunksCompleted(); + return importDataChunkStatusList; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java new file mode 100644 index 000000000..54268b2cc --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java @@ -0,0 +1,31 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import com.scalar.db.io.DataType; +import java.util.HashMap; +import java.util.Map; + +public class TableColumnDataTypes { + private final Map> dataTypesByColumnsByTable; + + public TableColumnDataTypes() { + this.dataTypesByColumnsByTable = new HashMap<>(); + } + + public void addColumnDataType(String tableName, String columnName, DataType dataType) { + dataTypesByColumnsByTable + .computeIfAbsent(tableName, key -> new HashMap<>()) + .put(columnName, dataType); + } + + public DataType getDataType(String tableName, String columnName) { + Map columnDataTypes = dataTypesByColumnsByTable.get(tableName); + if (columnDataTypes != null) { + return columnDataTypes.get(columnName); + } + return null; + } + + public Map getColumnDataTypes(String tableName) { + return dataTypesByColumnsByTable.get(tableName); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java new file mode 100644 index 000000000..2211f054b --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java @@ -0,0 +1,36 @@ +package com.scalar.db.dataloader.core.dataimport.task; + +import com.scalar.db.api.DistributedStorage; +import com.scalar.db.api.Result; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDaoException; +import com.scalar.db.io.Column; +import com.scalar.db.io.Key; +import java.util.*; + +public class ImportStorageTask extends ImportTask { + + private final DistributedStorage storage; + + public ImportStorageTask(ImportTaskParams params, DistributedStorage storage) { + super(params); + this.storage = storage; + } + + @Override + protected Optional getDataRecord( + String namespace, String tableName, Key partitionKey, Key clusteringKey) + throws ScalarDBDaoException { + return params.getDao().get(namespace, tableName, partitionKey, clusteringKey, this.storage); + } + + @Override + protected void saveRecord( + String namespace, + String tableName, + Key partitionKey, + Key clusteringKey, + List> columns) + throws ScalarDBDaoException { + params.getDao().put(namespace, tableName, partitionKey, clusteringKey, columns, this.storage); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java new file mode 100644 index 000000000..ed54e742f --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java @@ -0,0 +1,360 @@ +package com.scalar.db.dataloader.core.dataimport.task; + +import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.*; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.api.Result; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.error.CoreError; +import com.scalar.db.dataloader.core.dataimport.ImportMode; +import com.scalar.db.dataloader.core.dataimport.ImportOptions; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFile; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTable; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTableFieldMapping; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDaoException; +import com.scalar.db.dataloader.core.dataimport.processor.TableColumnDataTypes; +import com.scalar.db.dataloader.core.dataimport.task.mapping.ImportDataMapping; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTargetResult; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTargetResultStatus; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTaskResult; +import com.scalar.db.dataloader.core.dataimport.task.validation.ImportSourceRecordValidationResult; +import com.scalar.db.dataloader.core.dataimport.task.validation.ImportSourceRecordValidator; +import com.scalar.db.dataloader.core.exception.Base64Exception; +import com.scalar.db.dataloader.core.exception.ColumnParsingException; +import com.scalar.db.dataloader.core.util.ColumnUtils; +import com.scalar.db.dataloader.core.util.KeyUtils; +import com.scalar.db.dataloader.core.util.TableMetadataUtil; +import com.scalar.db.io.Column; +import com.scalar.db.io.DataType; +import com.scalar.db.io.Key; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public abstract class ImportTask { + + protected final ImportTaskParams params; + + public ImportTaskResult execute() { + + ObjectNode mutableSourceRecord = params.getSourceRecord().deepCopy(); + ImportOptions importOptions = params.getImportOptions(); + + // Single table import + if (importOptions.getControlFile() == null) { + String tableLookupKey = + TableMetadataUtil.getTableLookupKey( + importOptions.getNamespace(), importOptions.getTableName()); + ImportTargetResult singleTargetResult = + importIntoSingleTable( + importOptions.getNamespace(), + importOptions.getTableName(), + params.getTableMetadataByTableName().get(tableLookupKey), + params.getTableColumnDataTypes().getColumnDataTypes(tableLookupKey), + null, + mutableSourceRecord); + // Add the single target result to the list of targets and return the result + return ImportTaskResult.builder() + .rawRecord(params.getSourceRecord()) + .rowNumber(params.getRowNumber()) + .targets(Collections.singletonList(singleTargetResult)) + .build(); + } + + // Multi-table import + List multiTargetResults = + startMultiTableImportProcess( + importOptions.getControlFile(), + params.getTableMetadataByTableName(), + params.getTableColumnDataTypes(), + mutableSourceRecord); + + return ImportTaskResult.builder() + .targets(multiTargetResults) + .rawRecord(params.getSourceRecord()) + .rowNumber(params.getRowNumber()) + .build(); + } + + private List startMultiTableImportProcess( + ControlFile controlFile, + Map tableMetadataByTableName, + TableColumnDataTypes tableColumnDataTypes, + ObjectNode mutableSourceRecord) { + + List targetResults = new ArrayList<>(); + + // Import for every table mapping specified in the control file + for (ControlFileTable controlFileTable : controlFile.getTables()) { + for (ControlFileTableFieldMapping mapping : controlFileTable.getMappings()) { + if (!mutableSourceRecord.has(mapping.getSourceField()) + && !mutableSourceRecord.has(mapping.getTargetColumn())) { + String errorMessage = + CoreError.DATA_LOADER_MISSING_SOURCE_FIELD.buildMessage( + mapping.getSourceField(), controlFileTable.getTableName()); + + ImportTargetResult targetResult = + ImportTargetResult.builder() + .namespace(controlFileTable.getNamespace()) + .tableName(controlFileTable.getTableName()) + .errors(Collections.singletonList(errorMessage)) + .status(ImportTargetResultStatus.VALIDATION_FAILED) + .build(); + return Collections.singletonList(targetResult); + } + } + + // Import into a single table + String tableLookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + TableMetadata tableMetadata = tableMetadataByTableName.get(tableLookupKey); + Map dataTypesByColumns = + tableColumnDataTypes.getColumnDataTypes(tableLookupKey); + // Copied data to an object node data was overwritten by following operations and data check + // fails when same object is referenced again in logic before + ObjectNode copyNode = mutableSourceRecord.deepCopy(); + ImportTargetResult result = + importIntoSingleTable( + controlFileTable.getNamespace(), + controlFileTable.getTableName(), + tableMetadata, + dataTypesByColumns, + controlFileTable, + copyNode); + targetResults.add(result); + } + return targetResults; + } + + private ImportTargetResult importIntoSingleTable( + String namespace, + String tableName, + TableMetadata tableMetadata, + Map dataTypeByColumnName, + ControlFileTable controlFileTable, + ObjectNode mutableSourceRecord) { + + ImportOptions importOptions = params.getImportOptions(); + + if (dataTypeByColumnName == null || tableMetadata == null) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .status(ImportTargetResultStatus.VALIDATION_FAILED) + .errors(Collections.singletonList(ERROR_TABLE_METADATA_MISSING)) + .build(); + } + + LinkedHashSet partitionKeyNames = tableMetadata.getPartitionKeyNames(); + LinkedHashSet clusteringKeyNames = tableMetadata.getClusteringKeyNames(); + LinkedHashSet columnNames = tableMetadata.getColumnNames(); + + applyDataMapping(controlFileTable, mutableSourceRecord); + + boolean checkForMissingColumns = shouldCheckForMissingColumns(importOptions); + + ImportSourceRecordValidationResult validationResult = + validateSourceRecord( + partitionKeyNames, + clusteringKeyNames, + columnNames, + mutableSourceRecord, + checkForMissingColumns); + + if (!validationResult.isValid()) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .status(ImportTargetResultStatus.VALIDATION_FAILED) + .errors(validationResult.getErrorMessages()) + .build(); + } + + Optional optionalPartitionKey = + KeyUtils.createPartitionKeyFromSource( + partitionKeyNames, dataTypeByColumnName, mutableSourceRecord); + if (!optionalPartitionKey.isPresent()) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .status(ImportTargetResultStatus.VALIDATION_FAILED) + .errors(Collections.singletonList(ERROR_COULD_NOT_FIND_PARTITION_KEY)) + .build(); + } + Optional optionalClusteringKey = Optional.empty(); + if (!clusteringKeyNames.isEmpty()) { + optionalClusteringKey = + KeyUtils.createClusteringKeyFromSource( + clusteringKeyNames, dataTypeByColumnName, mutableSourceRecord); + if (!optionalClusteringKey.isPresent()) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .status(ImportTargetResultStatus.VALIDATION_FAILED) + .errors(Collections.singletonList(ERROR_COULD_NOT_FIND_CLUSTERING_KEY)) + .build(); + } + } + + Optional optionalScalarDBResult; + + try { + optionalScalarDBResult = + getDataRecord( + namespace, tableName, optionalPartitionKey.get(), optionalClusteringKey.orElse(null)); + } catch (ScalarDBDaoException e) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .status(ImportTargetResultStatus.RETRIEVAL_FAILED) + .errors(Collections.singletonList(e.getMessage())) + .build(); + } + ImportTaskAction importAction = + optionalScalarDBResult.isPresent() ? ImportTaskAction.UPDATE : ImportTaskAction.INSERT; + + if (importAction == ImportTaskAction.INSERT + && shouldRevalidateMissingColumns(importOptions, checkForMissingColumns)) { + ImportSourceRecordValidationResult validationResultForMissingColumns = + new ImportSourceRecordValidationResult(); + ImportSourceRecordValidator.checkMissingColumns( + mutableSourceRecord, columnNames, validationResultForMissingColumns); + if (!validationResultForMissingColumns.isValid()) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .status(ImportTargetResultStatus.MISSING_COLUMNS) + .errors(Collections.singletonList(ERROR_UPSERT_INSERT_MISSING_COLUMNS)) + .build(); + } + } + + if (shouldFailForExistingData(importAction, importOptions)) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .importedRecord(mutableSourceRecord) + .importAction(importAction) + .status(ImportTargetResultStatus.DATA_ALREADY_EXISTS) + .errors(Collections.singletonList(ERROR_DATA_ALREADY_EXISTS)) + .build(); + } + + if (shouldFailForMissingData(importAction, importOptions)) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .importedRecord(mutableSourceRecord) + .importAction(importAction) + .status(ImportTargetResultStatus.DATA_NOT_FOUND) + .errors(Collections.singletonList(ERROR_DATA_NOT_FOUND)) + .build(); + } + + List> columns; + + try { + columns = + ColumnUtils.getColumnsFromResult( + optionalScalarDBResult.orElse(null), + mutableSourceRecord, + importOptions.isIgnoreNullValues(), + partitionKeyNames, + clusteringKeyNames, + columnNames, + dataTypeByColumnName); + } catch (Base64Exception | ColumnParsingException e) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .status(ImportTargetResultStatus.VALIDATION_FAILED) + .errors(Collections.singletonList(e.getMessage())) + .build(); + } + + // Time to save the record + try { + saveRecord( + namespace, + tableName, + optionalPartitionKey.get(), + optionalClusteringKey.orElse(null), + columns); + + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .importAction(importAction) + .importedRecord(mutableSourceRecord) + .status(ImportTargetResultStatus.SAVED) + .build(); + + } catch (ScalarDBDaoException e) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .importAction(importAction) + .status(ImportTargetResultStatus.SAVE_FAILED) + .errors(Collections.singletonList(e.getMessage())) + .build(); + } + } + + private void applyDataMapping(ControlFileTable controlFileTable, ObjectNode mutableSourceRecord) { + if (controlFileTable != null) { + ImportDataMapping.apply(mutableSourceRecord, controlFileTable); + } + } + + private boolean shouldCheckForMissingColumns(ImportOptions importOptions) { + return importOptions.getImportMode() == ImportMode.INSERT + || importOptions.isRequireAllColumns(); + } + + private ImportSourceRecordValidationResult validateSourceRecord( + LinkedHashSet partitionKeyNames, + LinkedHashSet clusteringKeyNames, + LinkedHashSet columnNames, + ObjectNode mutableSourceRecord, + boolean checkForMissingColumns) { + return ImportSourceRecordValidator.validateSourceRecord( + partitionKeyNames, + clusteringKeyNames, + columnNames, + mutableSourceRecord, + checkForMissingColumns); + } + + private boolean shouldRevalidateMissingColumns( + ImportOptions importOptions, boolean checkForMissingColumns) { + return !checkForMissingColumns && importOptions.getImportMode() == ImportMode.UPSERT; + } + + private boolean shouldFailForExistingData( + ImportTaskAction importAction, ImportOptions importOptions) { + return importAction == ImportTaskAction.UPDATE + && importOptions.getImportMode() == ImportMode.INSERT; + } + + private boolean shouldFailForMissingData( + ImportTaskAction importAction, ImportOptions importOptions) { + return importAction == ImportTaskAction.INSERT + && importOptions.getImportMode() == ImportMode.UPDATE; + } + + protected abstract Optional getDataRecord( + String namespace, String tableName, Key partitionKey, Key clusteringKey) + throws ScalarDBDaoException; + + protected abstract void saveRecord( + String namespace, + String tableName, + Key partitionKey, + Key clusteringKey, + List> columns) + throws ScalarDBDaoException; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskParams.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskParams.java new file mode 100644 index 000000000..f85671140 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskParams.java @@ -0,0 +1,24 @@ +package com.scalar.db.dataloader.core.dataimport.task; + +import com.fasterxml.jackson.databind.JsonNode; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.dataimport.ImportOptions; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDao; +import com.scalar.db.dataloader.core.dataimport.processor.TableColumnDataTypes; +import java.util.Map; +import lombok.Builder; +import lombok.NonNull; +import lombok.Value; + +@Builder +@Value +public class ImportTaskParams { + + @NonNull JsonNode sourceRecord; + int dataChunkId; + int rowNumber; + @NonNull ImportOptions importOptions; + @NonNull Map tableMetadataByTableName; + @NonNull TableColumnDataTypes tableColumnDataTypes; + @NonNull ScalarDBDao dao; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java new file mode 100644 index 000000000..71e0d3ae2 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java @@ -0,0 +1,54 @@ +package com.scalar.db.dataloader.core.dataimport.task; + +import com.scalar.db.api.DistributedTransaction; +import com.scalar.db.api.Result; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDaoException; +import com.scalar.db.exception.transaction.AbortException; +import com.scalar.db.exception.transaction.TransactionException; +import com.scalar.db.io.Column; +import com.scalar.db.io.Key; +import java.util.List; +import java.util.Optional; + +public class ImportTransactionalTask extends ImportTask { + + private final DistributedTransaction transaction; + + public ImportTransactionalTask(ImportTaskParams params, DistributedTransaction transaction) { + super(params); + this.transaction = transaction; + } + + @Override + protected Optional getDataRecord( + String namespace, String tableName, Key partitionKey, Key clusteringKey) + throws ScalarDBDaoException { + return params.getDao().get(namespace, tableName, partitionKey, clusteringKey, transaction); + } + + @Override + protected void saveRecord( + String namespace, + String tableName, + Key partitionKey, + Key clusteringKey, + List> columns) + throws ScalarDBDaoException { + params.getDao().put(namespace, tableName, partitionKey, clusteringKey, columns, transaction); + } + + /** + * Abort the active ScalarDB transaction + * + * @throws TransactionException if something goes wrong during the aborting process + */ + private void abortActiveTransaction(DistributedTransaction tx) throws TransactionException { + if (tx != null) { + try { + tx.abort(); + } catch (AbortException e) { + throw new TransactionException(e.getMessage(), tx.getId()); + } + } + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatch.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatch.java new file mode 100644 index 000000000..a922fd8af --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatch.java @@ -0,0 +1,14 @@ +package com.scalar.db.dataloader.core.dataimport.transactionbatch; + +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; +import java.util.List; +import lombok.Builder; +import lombok.Value; + +/** Transaction batch details */ +@Builder +@Value +public class ImportTransactionBatch { + int transactionBatchId; + List sourceData; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatchResult.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatchResult.java new file mode 100644 index 000000000..0e44b6695 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatchResult.java @@ -0,0 +1,32 @@ +package com.scalar.db.dataloader.core.dataimport.transactionbatch; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTaskResult; +import java.util.List; +import lombok.Builder; +import lombok.Value; + +/** Transaction batch result */ +@Builder +@Value +@JsonDeserialize(builder = ImportTransactionBatchResult.ImportTransactionBatchResultBuilder.class) +public class ImportTransactionBatchResult { + @JsonProperty("dataChunkId") + int dataChunkId; + + @JsonProperty("transactionBatchId") + int transactionBatchId; + + @JsonProperty("transactionId") + String transactionId; + + @JsonProperty("records") + List records; + + @JsonProperty("errors") + List errors; + + @JsonProperty("success") + boolean success; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatchStatus.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatchStatus.java new file mode 100644 index 000000000..1b7bae34c --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatchStatus.java @@ -0,0 +1,18 @@ +package com.scalar.db.dataloader.core.dataimport.transactionbatch; + +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTaskResult; +import java.util.List; +import lombok.Builder; +import lombok.Value; + +/** Batch status details */ +@Builder +@Value +public class ImportTransactionBatchStatus { + int dataChunkId; + int transactionBatchId; + String transactionId; + List records; + List errors; + boolean success; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java index 58f10d0f8..91008df3d 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java @@ -1,7 +1,12 @@ package com.scalar.db.dataloader.core.util; +import static com.scalar.db.dataloader.core.util.TableMetadataUtil.isMetadataColumn; + +import com.fasterxml.jackson.databind.JsonNode; +import com.scalar.db.api.Result; import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.ColumnInfo; +import com.scalar.db.dataloader.core.exception.Base64Exception; import com.scalar.db.dataloader.core.exception.ColumnParsingException; import com.scalar.db.io.BigIntColumn; import com.scalar.db.io.BlobColumn; @@ -12,7 +17,7 @@ import com.scalar.db.io.FloatColumn; import com.scalar.db.io.IntColumn; import com.scalar.db.io.TextColumn; -import java.util.Base64; +import java.util.*; import javax.annotation.Nullable; /** @@ -88,4 +93,140 @@ public static Column createColumnFromValue( e); } } + + /** + * Get columns from result data + * + * @param scalarDBResult result record + * @param sourceRecord source data + * @param ignoreNullValues ignore null values or not + * @param partitionKeyNames partition key names + * @param clusteringKeyNames clustering key names + * @param columnNames column names + * @param dataTypesByColumns data types of columns + * @return list of columns + * @throws Base64Exception if an error occurs while base64 decoding + */ + public static List> getColumnsFromResult( + Result scalarDBResult, + JsonNode sourceRecord, + boolean ignoreNullValues, + Set partitionKeyNames, + Set clusteringKeyNames, + Set columnNames, + Map dataTypesByColumns) + throws Base64Exception, ColumnParsingException { + + List> columns = new ArrayList<>(); + Set columnsToIgnore = getColumnsToIgnore(partitionKeyNames, clusteringKeyNames); + + for (String columnName : columnNames) { + if (isMetadataColumn(columnName, columnsToIgnore, columnNames)) { + continue; + } + + Column column = + getColumn(scalarDBResult, sourceRecord, columnName, ignoreNullValues, dataTypesByColumns); + + if (column != null) { + columns.add(column); + } + } + + return columns; + } + + /** + * Create a set of columns to ignore + * + * @param partitionKeyNames a set of partition key names + * @param clusteringKeyNames a set of clustering key names + * @return a set of columns to ignore + */ + private static Set getColumnsToIgnore( + Set partitionKeyNames, Set clusteringKeyNames) { + Set columnsToIgnore = new HashSet<>(TableMetadataUtil.getMetadataColumns()); + columnsToIgnore.addAll(partitionKeyNames); + columnsToIgnore.addAll(clusteringKeyNames); + return columnsToIgnore; + } + + /** + * Checks if a column is a metadata column + * + * @param columnName column name + * @param columnsToIgnore set of columns to ignore + * @param columnNames set of column names + * @return if column is a metadata column or not + */ + private static boolean isMetadataColumn( + String columnName, Set columnsToIgnore, Set columnNames) { + return TableMetadataUtil.isMetadataColumn(columnName, columnsToIgnore, columnNames); + } + + /** + * Get columns from result data + * + * @param scalarDBResult result record + * @param sourceRecord source data + * @param columnName column name + * @param ignoreNullValues ignore null values or not + * @param dataTypesByColumns data types of columns + * @return column data + * @throws Base64Exception if an error occurs while base64 decoding + */ + private static Column getColumn( + Result scalarDBResult, + JsonNode sourceRecord, + String columnName, + boolean ignoreNullValues, + Map dataTypesByColumns) + throws Base64Exception, ColumnParsingException { + if (scalarDBResult != null && !sourceRecord.has(columnName)) { + return getColumnFromResult(scalarDBResult, columnName); + } else { + return getColumnFromSourceRecord( + sourceRecord, columnName, ignoreNullValues, dataTypesByColumns); + } + } + + /** + * Get column from result + * + * @param scalarDBResult result record + * @param columnName column name + * @return column data + */ + private static Column getColumnFromResult(Result scalarDBResult, String columnName) { + Map> columnValues = scalarDBResult.getColumns(); + return columnValues.get(columnName); + } + + /** + * Get column from result + * + * @param sourceRecord source data + * @param columnName column name + * @param ignoreNullValues ignore null values or not + * @param dataTypesByColumns data types of columns + * @return column data + * @throws Base64Exception if an error occurs while base64 decoding + */ + private static Column getColumnFromSourceRecord( + JsonNode sourceRecord, + String columnName, + boolean ignoreNullValues, + Map dataTypesByColumns) + throws Base64Exception, ColumnParsingException { + DataType dataType = dataTypesByColumns.get(columnName); + String columnValue = + sourceRecord.has(columnName) && !sourceRecord.get(columnName).isNull() + ? sourceRecord.get(columnName).asText() + : null; + if (!ignoreNullValues || columnValue != null) { + ColumnInfo columnInfo = ColumnInfo.builder().columnName(columnName).build(); + return createColumnFromValue(dataType, columnInfo, columnValue); + } + return null; + } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java index c2491df0f..e46311545 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java @@ -1,14 +1,18 @@ package com.scalar.db.dataloader.core.util; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import com.scalar.db.api.TableMetadata; import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.ColumnInfo; import com.scalar.db.dataloader.core.ColumnKeyValue; +import com.scalar.db.dataloader.core.exception.Base64Exception; import com.scalar.db.dataloader.core.exception.ColumnParsingException; import com.scalar.db.dataloader.core.exception.KeyParsingException; import com.scalar.db.io.Column; import com.scalar.db.io.DataType; import com.scalar.db.io.Key; +import java.util.*; import javax.annotation.Nullable; /** @@ -22,6 +26,22 @@ public final class KeyUtils { /** Restrict instantiation via private constructor */ private KeyUtils() {} + public static Optional createClusteringKeyFromSource( + Set clusteringKeyNames, + Map dataTypeByColumnName, + ObjectNode sourceRecord) { + return clusteringKeyNames.isEmpty() + ? Optional.empty() + : createKeyFromSource(clusteringKeyNames, dataTypeByColumnName, sourceRecord); + } + + public static Optional createPartitionKeyFromSource( + Set partitionKeyNames, + Map dataTypeByColumnName, + ObjectNode sourceRecord) { + return createKeyFromSource(partitionKeyNames, dataTypeByColumnName, sourceRecord); + } + /** * Converts a key-value pair, in the format of =, into a ScalarDB Key instance for a * specific ScalarDB table. @@ -85,4 +105,51 @@ public static Key createKey(DataType dataType, ColumnInfo columnInfo, String val throw new KeyParsingException(e.getMessage(), e); } } + + /** + * Create a new composite ScalarDB key. + * + * @param dataTypes List of data types for the columns + * @param columnNames List of column names + * @param values List of key values + * @return ScalarDB Key instance, or empty if the provided arrays are not of the same length + * @throws Base64Exception if there is an error creating the key values + */ + public static Optional createCompositeKey( + List dataTypes, List columnNames, List values) + throws Base64Exception, ColumnParsingException { + if (!CollectionUtil.areSameLength(dataTypes, columnNames, values)) { + return Optional.empty(); + } + Key.Builder builder = Key.newBuilder(); + for (int i = 0; i < dataTypes.size(); i++) { + ColumnInfo columnInfo = ColumnInfo.builder().columnName(columnNames.get(i)).build(); + Column keyValue = + ColumnUtils.createColumnFromValue(dataTypes.get(i), columnInfo, values.get(i)); + builder.add(keyValue); + } + return Optional.of(builder.build()); + } + + private static Optional createKeyFromSource( + Set keyNames, Map columnDataTypes, JsonNode sourceRecord) { + List dataTypes = new ArrayList<>(); + List columnNames = new ArrayList<>(); + List values = new ArrayList<>(); + + for (String keyName : keyNames) { + if (!columnDataTypes.containsKey(keyName) || !sourceRecord.has(keyName)) { + return Optional.empty(); + } + dataTypes.add(columnDataTypes.get(keyName)); + columnNames.add(keyName); + values.add(sourceRecord.get(keyName).asText()); + } + + try { + return createCompositeKey(dataTypes, columnNames, values); + } catch (Base64Exception | ColumnParsingException e) { + return Optional.empty(); + } + } } diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactoryTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactoryTest.java new file mode 100644 index 000000000..e78b019dd --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactoryTest.java @@ -0,0 +1,60 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import static org.junit.jupiter.api.Assertions.assertInstanceOf; + +import com.scalar.db.dataloader.core.FileFormat; +import com.scalar.db.dataloader.core.dataimport.ImportOptions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class DefaultImportProcessorFactoryTest { + + private DefaultImportProcessorFactory factory; + + @BeforeEach + void setUp() { + factory = new DefaultImportProcessorFactory(); + } + + @Test + void createImportProcessor_givenFileFormatIsJsonl_shouldReturnJsonLinesImportProcessor() { + // Arrange + ImportOptions importOptions = ImportOptions.builder().fileFormat(FileFormat.JSONL).build(); + ImportProcessorParams params = + ImportProcessorParams.builder().importOptions(importOptions).build(); + + // Act + ImportProcessor result = factory.createImportProcessor(params); + + // Assert + assertInstanceOf(JsonLinesImportProcessor.class, result); + } + + @Test + void createImportProcessor_givenFileFormatIsJson_shouldReturnJsonImportProcessor() { + // Given + ImportOptions importOptions = ImportOptions.builder().fileFormat(FileFormat.JSON).build(); + ImportProcessorParams params = + ImportProcessorParams.builder().importOptions(importOptions).build(); + + // When + ImportProcessor result = factory.createImportProcessor(params); + + // Then + assertInstanceOf(JsonImportProcessor.class, result); + } + + @Test + void createImportProcessor_givenFileFormatIsCsv_shouldReturnCsvImportProcessor() { + // Given + ImportOptions importOptions = ImportOptions.builder().fileFormat(FileFormat.CSV).build(); + ImportProcessorParams params = + ImportProcessorParams.builder().importOptions(importOptions).build(); + + // When + ImportProcessor result = factory.createImportProcessor(params); + + // Then + assertInstanceOf(CsvImportProcessor.class, result); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypesTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypesTest.java new file mode 100644 index 000000000..2d72827f4 --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypesTest.java @@ -0,0 +1,33 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import com.scalar.db.io.DataType; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class TableColumnDataTypesTest { + + TableColumnDataTypes tableColumnDataTypes; + + @Test + void addColumnDataType_withValidData_shouldAddColumnDataType() { + tableColumnDataTypes = new TableColumnDataTypes(); + tableColumnDataTypes.addColumnDataType("table", "id", DataType.BIGINT); + tableColumnDataTypes.addColumnDataType("table", "name", DataType.TEXT); + Assertions.assertEquals( + DataType.BIGINT, tableColumnDataTypes.getColumnDataTypes("table").get("id")); + } + + @Test + void getDataType_withValidTableAndColumnName_shouldReturnCorrectDataType() { + tableColumnDataTypes = new TableColumnDataTypes(); + tableColumnDataTypes.addColumnDataType("table", "id", DataType.BIGINT); + tableColumnDataTypes.addColumnDataType("table", "name", DataType.TEXT); + Assertions.assertEquals(DataType.TEXT, tableColumnDataTypes.getDataType("table", "name")); + } + + @Test + void getDataType_withInvalidTableAndColumnName_shouldReturnCorrectDataType() { + tableColumnDataTypes = new TableColumnDataTypes(); + Assertions.assertNull(tableColumnDataTypes.getDataType("table", "name")); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java index cd47243b1..cefb0dcb6 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java @@ -2,8 +2,14 @@ import static org.junit.jupiter.api.Assertions.*; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.api.Result; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.ResultImpl; import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.ColumnInfo; +import com.scalar.db.dataloader.core.UnitTestUtils; +import com.scalar.db.dataloader.core.exception.Base64Exception; import com.scalar.db.dataloader.core.exception.ColumnParsingException; import com.scalar.db.io.BigIntColumn; import com.scalar.db.io.BlobColumn; @@ -16,7 +22,10 @@ import com.scalar.db.io.TextColumn; import java.nio.charset.StandardCharsets; import java.util.Base64; +import java.util.List; +import java.util.Map; import java.util.stream.Stream; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; @@ -25,6 +34,11 @@ class ColumnUtilsTest { private static final float FLOAT_VALUE = 2.78f; + private static final TableMetadata mockMetadata = UnitTestUtils.createTestTableMetadata(); + private static final ObjectNode sourceRecord = UnitTestUtils.getOutputDataWithMetadata(); + private static final Map dataTypesByColumns = UnitTestUtils.getColumnData(); + private static final Map> values = UnitTestUtils.createTestValues(); + private static final Result scalarDBResult = new ResultImpl(values, mockMetadata); private static Stream provideColumnsForCreateColumnFromValue() { return Stream.of( @@ -105,4 +119,19 @@ void createColumnFromValue_invalidBase64_throwsBase64Exception() { columnName, "table", "ns"), exception.getMessage()); } + + @Test + void getColumnsFromResult_withValidData_shouldReturnColumns() + throws Base64Exception, ColumnParsingException { + List> columns = + ColumnUtils.getColumnsFromResult( + scalarDBResult, + sourceRecord, + false, + mockMetadata.getPartitionKeyNames(), + mockMetadata.getClusteringKeyNames(), + mockMetadata.getColumnNames(), + dataTypesByColumns); + Assertions.assertEquals(4, columns.size()); + } } diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java index f2fe68049..5c1a04cc2 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java @@ -3,10 +3,12 @@ import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.*; +import com.fasterxml.jackson.databind.node.ObjectNode; import com.scalar.db.api.TableMetadata; import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.ColumnInfo; import com.scalar.db.dataloader.core.ColumnKeyValue; +import com.scalar.db.dataloader.core.UnitTestUtils; import com.scalar.db.dataloader.core.exception.KeyParsingException; import com.scalar.db.io.BigIntColumn; import com.scalar.db.io.BlobColumn; @@ -18,7 +20,8 @@ import com.scalar.db.io.Key; import com.scalar.db.io.TextColumn; import java.nio.charset.StandardCharsets; -import java.util.Base64; +import java.util.*; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; @@ -28,6 +31,8 @@ class KeyUtilsTest { @Mock private TableMetadata tableMetadata; + private static final Map dataTypeByColumnName = UnitTestUtils.getColumnData(); + private static final ObjectNode sourceRecord = UnitTestUtils.getOutputDataWithMetadata(); @Test void parseKeyValue_nullKeyValue_returnsNull() throws KeyParsingException { @@ -146,4 +151,42 @@ void createKey_invalidBase64_throwsBase64Exception() { assertThrows( KeyParsingException.class, () -> KeyUtils.createKey(DataType.BLOB, columnInfo, value)); } + + @Test + void createClusteringKeyFromSource_withEmptyClusteringKeySet_shouldReturnEmpty() { + Optional key = KeyUtils.createClusteringKeyFromSource(Collections.EMPTY_SET, null, null); + Assertions.assertEquals(Optional.empty(), key); + } + + @Test + void createClusteringKeyFromSource_withValidClusteringKeySet_shouldReturnValidKey() { + Set clusterKeySet = new HashSet<>(); + clusterKeySet.add(UnitTestUtils.TEST_COLUMN_2_CK); + clusterKeySet.add(UnitTestUtils.TEST_COLUMN_3_CK); + Optional key = + KeyUtils.createClusteringKeyFromSource(clusterKeySet, dataTypeByColumnName, sourceRecord); + Assertions.assertEquals( + "Optional[Key{IntColumn{name=col2, value=2147483647, hasNullValue=false}, BooleanColumn{name=col3, value=true, hasNullValue=false}}]", + key.toString()); + } + + @Test + void createPartitionKeyFromSource_withInvalidData_shouldReturnEmpty() { + Set partitionKeySet = new HashSet<>(); + partitionKeySet.add("id1"); + Optional key = + KeyUtils.createPartitionKeyFromSource(partitionKeySet, dataTypeByColumnName, sourceRecord); + Assertions.assertEquals(Optional.empty(), key); + } + + @Test + void createPartitionKeyFromSource_withValidData_shouldReturnValidKey() { + Set partitionKeySet = new HashSet<>(); + partitionKeySet.add(UnitTestUtils.TEST_COLUMN_1_PK); + Optional key = + KeyUtils.createPartitionKeyFromSource(partitionKeySet, dataTypeByColumnName, sourceRecord); + Assertions.assertEquals( + "Optional[Key{BigIntColumn{name=col1, value=9007199254740992, hasNullValue=false}}]", + key.toString()); + } } diff --git a/gradle/spotbugs-exclude.xml b/gradle/spotbugs-exclude.xml index 23254eb3a..bab1669d8 100644 --- a/gradle/spotbugs-exclude.xml +++ b/gradle/spotbugs-exclude.xml @@ -37,7 +37,7 @@ - +