Skip to content

Commit

Permalink
[CALCITE-6236] EnumerableBatchNestedLoopJoin::estimateRowCount return…
Browse files Browse the repository at this point in the history
…s wrong value
  • Loading branch information
kramerul committed Feb 2, 2024
1 parent 351ddeb commit fe44f65
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.metadata.RelMdCollation;
import org.apache.calcite.rel.metadata.RelMdUtil;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.util.BuiltInMethod;
Expand All @@ -55,6 +56,7 @@
public class EnumerableBatchNestedLoopJoin extends Join implements EnumerableRel {

private final ImmutableBitSet requiredColumns;
private final double rightSideFilterSelectivity;
protected EnumerableBatchNestedLoopJoin(
RelOptCluster cluster,
RelTraitSet traits,
Expand All @@ -63,9 +65,11 @@ protected EnumerableBatchNestedLoopJoin(
RexNode condition,
Set<CorrelationId> variablesSet,
ImmutableBitSet requiredColumns,
JoinRelType joinType) {
JoinRelType joinType,
double rightSideFilterSelectivity) {
super(cluster, traits, ImmutableList.of(), left, right, condition, variablesSet, joinType);
this.requiredColumns = requiredColumns;
this.rightSideFilterSelectivity = rightSideFilterSelectivity;
}

public static EnumerableBatchNestedLoopJoin create(
Expand All @@ -74,7 +78,8 @@ public static EnumerableBatchNestedLoopJoin create(
RexNode condition,
ImmutableBitSet requiredColumns,
Set<CorrelationId> variablesSet,
JoinRelType joinType) {
JoinRelType joinType,
double rightSideFilterSelectivity) {
final RelOptCluster cluster = left.getCluster();
final RelMetadataQuery mq = cluster.getMetadataQuery();
final RelTraitSet traitSet =
Expand All @@ -89,7 +94,19 @@ public static EnumerableBatchNestedLoopJoin create(
condition,
variablesSet,
requiredColumns,
joinType);
joinType,
rightSideFilterSelectivity);
}

@Deprecated
public static EnumerableBatchNestedLoopJoin create(
RelNode left,
RelNode right,
RexNode condition,
ImmutableBitSet requiredColumns,
Set<CorrelationId> variablesSet,
JoinRelType joinType) {
return create(left, right, condition, requiredColumns, variablesSet, joinType, 1.0);
}

@Override public @Nullable Pair<RelTraitSet, List<RelTraitSet>> passThroughTraits(
Expand All @@ -115,14 +132,14 @@ public static EnumerableBatchNestedLoopJoin create(
@Override public EnumerableBatchNestedLoopJoin copy(RelTraitSet traitSet,
RexNode condition, RelNode left, RelNode right, JoinRelType joinType,
boolean semiJoinDone) {
return new EnumerableBatchNestedLoopJoin(getCluster(), traitSet,
left, right, condition, variablesSet, requiredColumns, joinType);
return new EnumerableBatchNestedLoopJoin(getCluster(), traitSet, left, right, condition,
variablesSet, requiredColumns, joinType, rightSideFilterSelectivity);
}

@Override public @Nullable RelOptCost computeSelfCost(
final RelOptPlanner planner,
final RelMetadataQuery mq) {
double rowCount = mq.getRowCount(this);
double rowCount = estimateRowCount(mq);

final double rightRowCount = right.estimateRowCount(mq);
final double leftRowCount = left.estimateRowCount(mq);
Expand All @@ -144,6 +161,18 @@ public static EnumerableBatchNestedLoopJoin create(
rowCount + leftRowCount, 0, 0).plus(rescanCost);
}

@Override public double estimateRowCount(RelMetadataQuery mq) {
return unwrapDouble(RelMdUtil.getJoinRowCount(mq, this, condition,

Check failure on line 165 in core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableBatchNestedLoopJoin.java

View workflow job for this annotation

GitHub Actions / CheckerFramework (JDK 11), oldest Guava

[Task :core:compileJava] [argument.type.incompatible] incompatible argument for parameter value of unwrapDouble. return unwrapDouble(RelMdUtil.getJoinRowCount(mq, this, condition, ^ found : @initialized @nullable Double

Check failure on line 165 in core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableBatchNestedLoopJoin.java

View workflow job for this annotation

GitHub Actions / CheckerFramework (JDK 11)

[Task :core:compileJava] [argument.type.incompatible] incompatible argument for parameter value of unwrapDouble. return unwrapDouble(RelMdUtil.getJoinRowCount(mq, this, condition, ^ found : @initialized @nullable Double
unwrapDouble(mq.getRowCount(getRight())) / rightSideFilterSelectivity));
}

static double unwrapDouble(Double value) {
if (value == null) {
return Double.POSITIVE_INFINITY;
}
return value.doubleValue();
}

@Override public RelWriter explainTerms(RelWriter pw) {
super.explainTerms(pw);
return pw.item("batchSize", variablesSet.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
import java.util.List;
import java.util.Set;

import static org.apache.calcite.adapter.enumerable.EnumerableBatchNestedLoopJoin.unwrapDouble;

/** Rule to convert a {@link LogicalJoin} to an {@link EnumerableBatchNestedLoopJoin}.
* You may provide a custom config to convert other nodes that extend {@link Join}.
*
Expand Down Expand Up @@ -134,9 +136,14 @@ public EnumerableBatchNestedLoopJoinRule(RelBuilderFactory relBuilderFactory,
conditionList.add(condition2);
}

RexNode filterCondition = relBuilder.or(conditionList);

// Push a filter with batchSize disjunctions
relBuilder.push(join.getRight()).filter(relBuilder.or(conditionList));
relBuilder.push(join.getRight()).filter(filterCondition);
final RelNode right = relBuilder.build();
final double filterSelectivity = right.getInputs().size() == 1
? unwrapDouble(call.getMetadataQuery().getSelectivity(right.getInput(0), filterCondition))

Check failure on line 145 in core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableBatchNestedLoopJoinRule.java

View workflow job for this annotation

GitHub Actions / CheckerFramework (JDK 11), oldest Guava

[Task :core:compileJava] [argument.type.incompatible] incompatible argument for parameter value of unwrapDouble. ? unwrapDouble(call.getMetadataQuery().getSelectivity(right.getInput(0), filterCondition)) ^ found : @initialized @nullable Double

Check failure on line 145 in core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableBatchNestedLoopJoinRule.java

View workflow job for this annotation

GitHub Actions / CheckerFramework (JDK 11)

[Task :core:compileJava] [argument.type.incompatible] incompatible argument for parameter value of unwrapDouble. ? unwrapDouble(call.getMetadataQuery().getSelectivity(right.getInput(0), filterCondition)) ^ found : @initialized @nullable Double
: 1.0;

call.transformTo(
EnumerableBatchNestedLoopJoin.create(
Expand All @@ -147,7 +154,8 @@ public EnumerableBatchNestedLoopJoinRule(RelBuilderFactory relBuilderFactory,
join.getCondition(),
requiredColumns.build(),
correlationIds,
join.getJoinType()));
join.getJoinType(),
filterSelectivity));
}

/** Rule configuration. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,12 @@ public static double getMinusRowCount(RelMetadataQuery mq, Minus minus) {
/** Returns an estimate of the number of rows returned by a {@link Join}. */
public static @Nullable Double getJoinRowCount(RelMetadataQuery mq, Join join,
RexNode condition) {
return getJoinRowCount(mq, join, condition, mq.getRowCount(join.getRight()));
}

/** Returns an estimate of the number of rows returned by a {@link Join}. */
public static @Nullable Double getJoinRowCount(RelMetadataQuery mq, Join join,
RexNode condition, Double rightRowCount) {
if (!join.getJoinType().projectsRight()) {
// Create a RexNode representing the selectivity of the
// semijoin filter and pass it to getSelectivity
Expand All @@ -813,7 +819,7 @@ public static double getMinusRowCount(RelMetadataQuery mq, Minus minus) {
// Row count estimates of 0 will be rounded up to 1.
// So, use maxRowCount where the product is very small.
final Double left = mq.getRowCount(join.getLeft());
final Double right = mq.getRowCount(join.getRight());
final Double right = rightRowCount;
if (left == null || right == null) {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ class EnumerableBatchNestedLoopJoinTest {
+ "join locations l on e.empid <> l.empid and d.deptno = l.empid")
.withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner -> {
planner.removeRule(EnumerableRules.ENUMERABLE_CORRELATE_RULE);
planner.removeRule(EnumerableRules.ENUMERABLE_JOIN_RULE);
// Use a small batch size, otherwise we will run into Janino's
// "InternalCompilerException: Code of method grows beyond 64 KB".
planner.addRule(
Expand Down

0 comments on commit fe44f65

Please sign in to comment.