Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add table metadata service #2434

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package com.scalar.db.dataloader.core.dataimport.controlfile;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.ArrayList;
import java.util.List;
import lombok.Getter;
import lombok.Setter;

/**
* Represents the configuration for a single table in the control file, including its namespace,
* table name, and field mappings. This class is used to define how data from a control file maps to
* a specific table in ScalarDB.
*/
@Getter
@Setter
public class ControlFileTable {

/** The namespace of the table in ScalarDB. */
@JsonProperty("namespace")
private String namespace;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[minor] This seems the name of a namespace as well as the name of a table below. So, this should be namespaceName for consistency?


/** The name of the table in ScalarDB. */
@JsonProperty("table_name")
private String tableName;

/**
* A list of mappings defining the correspondence between control file fields and table columns.
*/
@JsonProperty("mappings")
private final List<ControlFileTableFieldMapping> mappings;

/**
* Creates a new {@code ControlFileTable} instance with the specified namespace and table name.
* The mappings list is initialized as an empty list.
*
* @param namespace The namespace of the table in ScalarDB.
* @param tableName The name of the table in ScalarDB.
*/
public ControlFileTable(String namespace, String tableName) {
this.namespace = namespace;
this.tableName = tableName;
this.mappings = new ArrayList<>();
}

/**
* Constructs a {@code ControlFileTable} instance using data from a serialized JSON object. This
* constructor is used for deserialization of API requests or control files.
*
* @param namespace The namespace of the table in ScalarDB.
* @param tableName The name of the table in ScalarDB.
* @param mappings A list of mappings that define the relationship between control file fields and
* table columns.
*/
@JsonCreator
public ControlFileTable(
@JsonProperty("namespace") String namespace,
@JsonProperty("table_name") String tableName,
@JsonProperty("mappings") List<ControlFileTableFieldMapping> mappings) {
this.namespace = namespace;
this.tableName = tableName;
this.mappings = mappings;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package com.scalar.db.dataloader.core.dataimport.controlfile;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Getter;
import lombok.Setter;

/**
* Represents the mapping of a single field in the control file to a column in a ScalarDB table.
* This class defines how data from a specific field in the input source should be mapped to the
* corresponding column in the database.
*/
@Getter
@Setter
public class ControlFileTableFieldMapping {

/** The name of the field in the input source (e.g., JSON or CSV). */
@JsonProperty("source_field")
private String sourceField;

/** The name of the column in the ScalarDB table that the field maps to. */
@JsonProperty("target_column")
private String targetColumn;

/**
* Constructs a {@code ControlFileTableFieldMapping} instance using data from a serialized JSON
* object. This constructor is primarily used for deserialization of control file mappings.
*
* @param sourceField The name of the field in the input source (e.g., JSON or CSV).
* @param targetColumn The name of the corresponding column in the ScalarDB table.
*/
@JsonCreator
public ControlFileTableFieldMapping(
@JsonProperty("source_field") String sourceField,
@JsonProperty("target_column") String targetColumn) {
this.sourceField = sourceField;
this.targetColumn = targetColumn;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package com.scalar.db.dataloader.core.tablemetadata;

/** A custom exception that encapsulates errors thrown by the TableMetaDataService */
public class TableMetadataException extends Exception {

/**
* Class constructor
*
* @param message error message
* @param cause reason for exception
*/
public TableMetadataException(String message, Throwable cause) {
super(message, cause);
}

/**
* Class constructor
*
* @param message error message
*/
public TableMetadataException(String message) {
super(message);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package com.scalar.db.dataloader.core.tablemetadata;

import lombok.Getter;

/** Represents the request for metadata for a single ScalarDB table */
@Getter
public class TableMetadataRequest {

private final String namespace;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[minor] Same as above

private final String tableName;

/**
* Class constructor
*
* @param namespace ScalarDB namespace
* @param tableName ScalarDB table name
*/
public TableMetadataRequest(String namespace, String tableName) {
this.namespace = namespace;
this.tableName = tableName;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package com.scalar.db.dataloader.core.tablemetadata;

import com.scalar.db.api.DistributedStorageAdmin;
import com.scalar.db.api.TableMetadata;
import com.scalar.db.dataloader.core.util.TableMetadataUtil;
import com.scalar.db.exception.storage.ExecutionException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import lombok.RequiredArgsConstructor;

/**
* Service for retrieving {@link TableMetadata} from ScalarDB. Provides methods to fetch metadata
* for individual tables or a collection of tables.
*/
@RequiredArgsConstructor
public class TableMetadataService {

private static final String ERROR_MISSING_NAMESPACE_OR_TABLE =
"Missing namespace or table: %s, %s";

private final DistributedStorageAdmin storageAdmin;

/**
* Retrieves the {@link TableMetadata} for a specific namespace and table name.
*
* @param namespace The ScalarDB namespace.
* @param tableName The name of the table within the specified namespace.
* @return The {@link TableMetadata} object containing schema details of the specified table.
* @throws TableMetadataException If the table or namespace does not exist, or if an error occurs
* while fetching the metadata.
*/
public TableMetadata getTableMetadata(String namespace, String tableName)
throws TableMetadataException {
try {
TableMetadata tableMetadata = storageAdmin.getTableMetadata(namespace, tableName);
if (tableMetadata == null) {
throw new TableMetadataException(
String.format(ERROR_MISSING_NAMESPACE_OR_TABLE, namespace, tableName));
}
return tableMetadata;
} catch (ExecutionException e) {
throw new TableMetadataException(
String.format(ERROR_MISSING_NAMESPACE_OR_TABLE, namespace, tableName), e.getCause());
}
}

/**
* Retrieves the {@link TableMetadata} for a collection of table metadata requests.
*
* <p>Each request specifies a namespace and table name. The method consolidates the metadata into
* a map keyed by a unique lookup key generated for each table.
*
* @param requests A collection of {@link TableMetadataRequest} objects specifying the tables to
* retrieve metadata for.
* @return A map where the keys are unique lookup keys (namespace + table name) and the values are
* the corresponding {@link TableMetadata} objects.
* @throws TableMetadataException If any of the requested tables or namespaces are missing, or if
* an error occurs while fetching the metadata.
*/
public Map<String, TableMetadata> getTableMetadata(Collection<TableMetadataRequest> requests)
throws TableMetadataException {
Map<String, TableMetadata> metadataMap = new HashMap<>();

for (TableMetadataRequest request : requests) {
String namespace = request.getNamespace();
String tableName = request.getTableName();
TableMetadata tableMetadata = getTableMetadata(namespace, tableName);
String key = TableMetadataUtil.getTableLookupKey(namespace, tableName);
metadataMap.put(key, tableMetadata);
}

return metadataMap;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package com.scalar.db.dataloader.core.util;

import com.scalar.db.api.TableMetadata;
import com.scalar.db.dataloader.core.Constants;
import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTable;
import com.scalar.db.io.DataType;
import com.scalar.db.transaction.consensuscommit.Attribute;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lombok.AccessLevel;
import lombok.NoArgsConstructor;

/** Utility class for handling ScalarDB table metadata operations. */
@NoArgsConstructor(access = AccessLevel.PRIVATE)
public class TableMetadataUtil {

/**
* Determines whether a given column is a metadata column based on predefined criteria.
*
* @param columnName The name of the table column to check.
* @param metadataColumns A set of predefined metadata columns.
* @param columnNames A set of all column names in the table.
* @return {@code true} if the column is a metadata column; {@code false} otherwise.
*/
public static boolean isMetadataColumn(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use ConsensusCommitUtils.isTransactionMetaColumn():

public static boolean isTransactionMetaColumn(String columnName, TableMetadata tableMetadata) {

String columnName, Set<String> metadataColumns, Set<String> columnNames) {
if (metadataColumns.contains(columnName)) {
return true;
}
return columnName.startsWith(Attribute.BEFORE_PREFIX)
&& !columnNames.contains(Attribute.BEFORE_PREFIX + columnName);
}

/**
* Determines whether a given column is a metadata column using table metadata.
*
* @param columnName The name of the ScalarDB table column to check.
* @param tableMetadata The metadata of the table.
* @return {@code true} if the column is a metadata column; {@code false} otherwise.
*/
public static boolean isMetadataColumn(String columnName, TableMetadata tableMetadata) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

Set<String> metadataColumns = getMetadataColumns();
LinkedHashSet<String> columnNames = tableMetadata.getColumnNames();
return isMetadataColumn(columnName, metadataColumns, columnNames);
}

/**
* Retrieves a set of fixed metadata column names used in ScalarDB.
*
* @return A set of predefined metadata column names.
*/
public static Set<String> getMetadataColumns() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about using com.scalar.db.transaction.consensuscommit.ConsensusCommitUtils#getTransactionMetaColumns to avoid duplicated logic?

Some other methods might be replaced with ConsensusCommitUtils methods.

return Stream.of(
Attribute.ID,
Attribute.STATE,
Attribute.VERSION,
Attribute.PREPARED_AT,
Attribute.COMMITTED_AT,
Attribute.BEFORE_ID,
Attribute.BEFORE_STATE,
Attribute.BEFORE_VERSION,
Attribute.BEFORE_PREPARED_AT,
Attribute.BEFORE_COMMITTED_AT)
.collect(Collectors.toCollection(HashSet::new));
}

/**
* Extracts a mapping of column names to their data types from the table metadata.
*
* @param tableMetadata The metadata of the ScalarDB table.
* @return A map where keys are column names and values are their corresponding {@link DataType}.
*/
public static Map<String, DataType> extractColumnDataTypes(TableMetadata tableMetadata) {
Map<String, DataType> definitions = new HashMap<>();
for (String columnName : tableMetadata.getColumnNames()) {
definitions.put(columnName, tableMetadata.getColumnDataType(columnName));
}
return definitions;
}

/**
* Generates a unique lookup key for a table within a namespace.
*
* @param namespace The namespace of the table.
* @param tableName The name of the table.
* @return A formatted string representing the table lookup key.
*/
public static String getTableLookupKey(String namespace, String tableName) {
return String.format(Constants.TABLE_LOOKUP_KEY_FORMAT, namespace, tableName);
}

/**
* Generates a unique lookup key for a table using control file table data.
*
* @param controlFileTable The control file table object containing namespace and table name.
* @return A formatted string representing the table lookup key.
*/
public static String getTableLookupKey(ControlFileTable controlFileTable) {
return String.format(
Constants.TABLE_LOOKUP_KEY_FORMAT,
controlFileTable.getNamespace(),
controlFileTable.getTableName());
}

/**
* Adds metadata columns to a list of projection columns for a ScalarDB table.
*
* @param tableMetadata The metadata of the ScalarDB table.
* @param projections A list of projection column names.
* @return A new list containing projection columns along with metadata columns.
*/
public static List<String> populateProjectionsWithMetadata(
TableMetadata tableMetadata, List<String> projections) {
List<String> projectionMetadata = new ArrayList<>();
projections.forEach(
projection -> {
projectionMetadata.add(projection);
if (!isKeyColumn(projection, tableMetadata)) {
projectionMetadata.add(Attribute.BEFORE_PREFIX + projection);
}
});
projectionMetadata.addAll(getMetadataColumns());
return projectionMetadata;
}

/**
* Checks whether a column is a key column (partition key or clustering key) in the table.
*
* @param column The name of the column to check.
* @param tableMetadata The metadata of the ScalarDB table.
* @return {@code true} if the column is a key column; {@code false} otherwise.
*/
private static boolean isKeyColumn(String column, TableMetadata tableMetadata) {
return tableMetadata.getPartitionKeyNames().contains(column)
|| tableMetadata.getClusteringKeyNames().contains(column);
}
}
Loading
Loading