Skip to content

Commit

Permalink
Implement DictionaryColumn and support copyPositions and getPositions…
Browse files Browse the repository at this point in the history
… method in Column
  • Loading branch information
Wei-hao-Li authored Jan 9, 2025
1 parent 0fb1d96 commit fe2200c
Show file tree
Hide file tree
Showing 19 changed files with 1,365 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,23 @@ default TsPrimitiveType getTsPrimitiveType(int position) {
/** This method will create a copy of origin column with different array offset. */
Column subColumnCopy(int fromIndex);

/**
* Create a new colum from the current colum by keeping the same elements only with respect to
* {@code positions} that starts at {@code offset} and has length of {@code length}. The
* implementation may return a view over the data in this colum or may return a copy, and the
* implementation is allowed to retain the positions array for use in the view.
*/
Column getPositions(int[] positions, int offset, int length);

/**
* Returns a column containing the specified positions. Positions to copy are stored in a subarray
* within {@code positions} array that starts at {@code offset} and has length of {@code length}.
* All specified positions must be valid for this block.
*
* <p>The returned column must be a compact representation of the original column.
*/
Column copyPositions(int[] positions, int offset, int length);

/** reverse the column */
void reverse();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ public enum ColumnEncoding {
/** TEXT. */
BINARY_ARRAY((byte) 3),
/** All data types. */
RLE((byte) 4);
RLE((byte) 4),
/** All data types. */
DICTIONARY((byte) 5);

private final byte value;

Expand Down Expand Up @@ -61,6 +63,8 @@ private static ColumnEncoding getColumnEncoding(byte value) {
return BINARY_ARRAY;
case 4:
return RLE;
case 5:
return DICTIONARY;
default:
throw new IllegalArgumentException("Invalid value: " + value);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
import java.util.Arrays;
import java.util.Optional;

import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange;
import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition;
import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfObjectArray;
Expand Down Expand Up @@ -197,6 +199,34 @@ public void reverse() {
}
}

@Override
public Column getPositions(int[] positions, int offset, int length) {
checkArrayRange(positions, offset, length);

return DictionaryColumn.createInternal(
offset, length, this, positions, DictionaryId.randomDictionaryId());
}

@Override
public Column copyPositions(int[] positions, int offset, int length) {
checkArrayRange(positions, offset, length);

boolean[] newValueIsNull = null;
if (valueIsNull != null) {
newValueIsNull = new boolean[length];
}
Binary[] newValues = new Binary[length];
for (int i = 0; i < length; i++) {
int position = positions[offset + i];
checkReadablePosition(this, position);
if (newValueIsNull != null) {
newValueIsNull[i] = valueIsNull[position + arrayOffset];
}
newValues[i] = values[position + arrayOffset];
}
return new BinaryColumn(0, length, newValueIsNull, newValues);
}

@Override
public int getInstanceSize() {
return INSTANCE_SIZE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import java.util.Arrays;
import java.util.Optional;

import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange;
import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition;
import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray;

Expand Down Expand Up @@ -195,6 +197,34 @@ public void reverse() {
}
}

@Override
public Column getPositions(int[] positions, int offset, int length) {
checkArrayRange(positions, offset, length);

return DictionaryColumn.createInternal(
offset, length, this, positions, DictionaryId.randomDictionaryId());
}

@Override
public Column copyPositions(int[] positions, int offset, int length) {
checkArrayRange(positions, offset, length);

boolean[] newValueIsNull = null;
if (valueIsNull != null) {
newValueIsNull = new boolean[length];
}
boolean[] newValues = new boolean[length];
for (int i = 0; i < length; i++) {
int position = positions[offset + i];
checkReadablePosition(this, position);
if (newValueIsNull != null) {
newValueIsNull[i] = valueIsNull[position + arrayOffset];
}
newValues[i] = values[position + arrayOffset];
}
return new BooleanColumn(0, length, newValueIsNull, newValues);
}

@Override
public int getInstanceSize() {
return INSTANCE_SIZE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ private ColumnEncoderFactory() {
encodingToEncoder.put(ColumnEncoding.BYTE_ARRAY, new ByteArrayColumnEncoder());
encodingToEncoder.put(ColumnEncoding.BINARY_ARRAY, new BinaryArrayColumnEncoder());
encodingToEncoder.put(ColumnEncoding.RLE, new RunLengthColumnEncoder());
encodingToEncoder.put(ColumnEncoding.DICTIONARY, new DictionaryColumnEncoder());
}

public static ColumnEncoder get(ColumnEncoding columnEncoding) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@

package org.apache.tsfile.read.common.block.column;

import org.apache.tsfile.block.column.Column;

import java.util.Arrays;

import static java.lang.Math.ceil;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
Expand Down Expand Up @@ -68,6 +72,17 @@ static void checkValidPosition(int position, int positionCount) {
}
}

static void checkReadablePosition(Column column, int position) {
checkValidPosition(position, column.getPositionCount());
}

static int[] compactArray(int[] array, int index, int length) {
if (index == 0 && length == array.length) {
return array;
}
return Arrays.copyOfRange(array, index, index + length);
}

static int calculateNewArraySize(int currentSize) {
// grow array by 50%
long newSize = (long) currentSize + (currentSize >> 1);
Expand Down
Loading

0 comments on commit fe2200c

Please sign in to comment.