-
Notifications
You must be signed in to change notification settings - Fork 2.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Draft][Please review] Make Paginator use simpler queries when there are no sql joins used (#8278) #11595
base: 3.3.x
Are you sure you want to change the base?
[Draft][Please review] Make Paginator use simpler queries when there are no sql joins used (#8278) #11595
Changes from 9 commits
3822686
9dc93b9
c9896b5
d698406
7155bfe
1e066c7
f88bad9
585db83
b2f087d
c06e100
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,21 +7,32 @@ | |
use ArrayIterator; | ||
use Countable; | ||
use Doctrine\Common\Collections\Collection; | ||
use Doctrine\ORM\EntityManagerInterface; | ||
use Doctrine\ORM\Internal\SQLResultCasing; | ||
use Doctrine\ORM\Mapping\ClassMetadata; | ||
use Doctrine\ORM\NoResultException; | ||
use Doctrine\ORM\Query; | ||
use Doctrine\ORM\Query\AST\Join; | ||
use Doctrine\ORM\Query\AST\JoinAssociationDeclaration; | ||
use Doctrine\ORM\Query\AST\Node; | ||
use Doctrine\ORM\Query\AST\SelectExpression; | ||
use Doctrine\ORM\Query\Parameter; | ||
use Doctrine\ORM\Query\Parser; | ||
use Doctrine\ORM\Query\ResultSetMapping; | ||
use Doctrine\ORM\QueryBuilder; | ||
use Doctrine\Persistence\Mapping\MappingException; | ||
use IteratorAggregate; | ||
use Traversable; | ||
|
||
use function array_key_exists; | ||
use function array_map; | ||
use function array_sum; | ||
use function assert; | ||
use function count; | ||
use function in_array; | ||
use function is_string; | ||
use function reset; | ||
use function str_starts_with; | ||
|
||
/** | ||
* The paginator can handle various complex scenarios with DQL. | ||
|
@@ -36,13 +47,26 @@ | |
public const HINT_ENABLE_DISTINCT = 'paginator.distinct.enable'; | ||
|
||
private readonly Query $query; | ||
private bool|null $useOutputWalkers = null; | ||
private int|null $count = null; | ||
private bool|null $useResultQueryOutputWalker = null; | ||
private bool|null $useCountQueryOutputWalker = null; | ||
private int|null $count = null; | ||
/** | ||
* The auto-detection of queries style was added a lot later to this class, and this | ||
* class historically was by default using the more complex queries style, which means that | ||
* the simple queries style is potentially very under-tested in production systems. The purpose | ||
* of this variable is to not introduce breaking changes until an impression is developed that | ||
* the simple queries style has been battle-tested enough. | ||
*/ | ||
private bool $queryStyleAutoDetectionEnabled = false; | ||
private bool $queryCouldHaveToManyJoins = true; | ||
|
||
/** @param bool $fetchJoinCollection Whether the query joins a collection (true by default). */ | ||
/** | ||
* @param bool $queryCouldProduceDuplicates Whether the query could produce partially duplicated records. One case | ||
* when it does is when it joins a collection. | ||
*/ | ||
public function __construct( | ||
Query|QueryBuilder $query, | ||
private readonly bool $fetchJoinCollection = true, | ||
private readonly bool $queryCouldProduceDuplicates = true, | ||
) { | ||
if ($query instanceof QueryBuilder) { | ||
$query = $query->getQuery(); | ||
|
@@ -51,6 +75,190 @@ | |
$this->query = $query; | ||
} | ||
|
||
/** | ||
* Create an instance of Paginator with auto-detection of whether the provided | ||
* query is suitable for simple (and fast) pagination queries, or whether a complex | ||
* set of pagination queries has to be used. | ||
*/ | ||
public static function newWithAutoDetection(Query|QueryBuilder $query): self | ||
{ | ||
if ($query instanceof QueryBuilder) { | ||
$query = $query->getQuery(); | ||
} | ||
|
||
$queryAST = $query->getAST(); | ||
[ | ||
'queryHasGroupByClause' => $queryHasGroupByClause, | ||
'queryHasHavingClause' => $queryHasHavingClause, | ||
'rootEntityHasSingleIdentifierFieldName' => $rootEntityHasSingleIdentifierFieldName, | ||
'queryCouldProduceDuplicates' => $queryCouldProduceDuplicates, | ||
'queryCouldHaveToManyJoins' => $queryCouldHaveToManyJoins, | ||
] = self::autoDetectQueryFeatures($query->getEntityManager(), $queryAST); | ||
|
||
$paginator = new self($query, $queryCouldProduceDuplicates); | ||
|
||
$paginator->queryStyleAutoDetectionEnabled = true; | ||
$paginator->queryCouldHaveToManyJoins = $queryCouldHaveToManyJoins; | ||
// The following is ensuring the conditions for when the CountWalker cannot be used. | ||
$paginator->useCountQueryOutputWalker = $queryHasHavingClause !== false | ||
|| $rootEntityHasSingleIdentifierFieldName !== true | ||
|| ($queryCouldHaveToManyJoins && $queryHasGroupByClause !== false); | ||
|
||
return $paginator; | ||
} | ||
|
||
/** | ||
* @return array{ | ||
* queryHasGroupByClause: bool|null, | ||
* queryHasHavingClause: bool|null, | ||
* rootEntityHasSingleIdentifierFieldName: bool|null, | ||
* queryCouldProduceDuplicates: bool, | ||
* queryCouldHaveToManyJoins: bool, | ||
d-ph marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* } | ||
*/ | ||
private static function autoDetectQueryFeatures(EntityManagerInterface $entityManager, Node $queryAST): array | ||
{ | ||
$queryFeatures = [ | ||
// Null means undetermined | ||
'queryHasGroupByClause' => null, | ||
'queryHasHavingClause' => null, | ||
'rootEntityHasSingleIdentifierFieldName' => null, | ||
'queryCouldProduceDuplicates' => true, | ||
'queryCouldHaveToManyJoins' => true, | ||
]; | ||
|
||
if (! $queryAST instanceof Query\AST\SelectStatement) { | ||
return $queryFeatures; | ||
} | ||
|
||
$queryFeatures['queryHasGroupByClause'] = $queryAST->groupByClause !== null; | ||
$queryFeatures['queryHasHavingClause'] = $queryAST->havingClause !== null; | ||
|
||
$from = $queryAST->fromClause->identificationVariableDeclarations; | ||
if (count($from) > 1) { | ||
return $queryFeatures; | ||
} | ||
|
||
$fromRoot = reset($from); | ||
if (! $fromRoot instanceof Query\AST\IdentificationVariableDeclaration) { | ||
return $queryFeatures; | ||
} | ||
|
||
if (! $fromRoot->rangeVariableDeclaration) { | ||
return $queryFeatures; | ||
} | ||
|
||
$rootAlias = $fromRoot->rangeVariableDeclaration->aliasIdentificationVariable; | ||
try { | ||
$rootClassMetadata = $entityManager->getClassMetadata($fromRoot->rangeVariableDeclaration->abstractSchemaName); | ||
$queryFeatures['rootEntityHasSingleIdentifierFieldName'] = (bool) $rootClassMetadata->getSingleIdentifierFieldName(); | ||
} catch (MappingException) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this necessary? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean the catching of the It's primarily needed because this is how Also, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We usually avoid using exception for flow control, and I think that's because they are costly. I think I would add a dedicated method, so as to remove the need of casting the result to a boolean as well, even if it means a little duplication.
In what case will |
||
return $queryFeatures; | ||
} | ||
|
||
$rootClassName = $fromRoot->rangeVariableDeclaration->abstractSchemaName; | ||
|
||
$aliasesToClassNameOrClassMetadataMap = []; | ||
$aliasesToClassNameOrClassMetadataMap[$rootAlias] = $rootClassMetadata; | ||
$toManyJoinsAliases = []; | ||
|
||
// Check the Joins list. | ||
foreach ($fromRoot->joins as $join) { | ||
if (! $join instanceof Join || ! $join->joinAssociationDeclaration instanceof JoinAssociationDeclaration) { | ||
return $queryFeatures; | ||
} | ||
|
||
$joinParentAlias = $join->joinAssociationDeclaration->joinAssociationPathExpression->identificationVariable; | ||
$joinParentFieldName = $join->joinAssociationDeclaration->joinAssociationPathExpression->associationField; | ||
$joinAlias = $join->joinAssociationDeclaration->aliasIdentificationVariable; | ||
|
||
// Every Join descending from a ToMany Join is "in principle" also a ToMany Join | ||
if (in_array($joinParentAlias, $toManyJoinsAliases, true)) { | ||
$toManyJoinsAliases[] = $joinAlias; | ||
|
||
continue; | ||
} | ||
|
||
$parentClassMetadata = $aliasesToClassNameOrClassMetadataMap[$joinParentAlias] ?? null; | ||
if (! $parentClassMetadata) { | ||
return $queryFeatures; | ||
} | ||
|
||
// Load entity class metadata. | ||
if (is_string($parentClassMetadata)) { | ||
try { | ||
$parentClassMetadata = $entityManager->getClassMetadata($parentClassMetadata); | ||
$aliasesToClassNameOrClassMetadataMap[$joinParentAlias] = $parentClassMetadata; | ||
} catch (MappingException) { | ||
return $queryFeatures; | ||
} | ||
} | ||
|
||
$parentJoinAssociationMapping = $parentClassMetadata->associationMappings[$joinParentFieldName] ?? null; | ||
if (! $parentJoinAssociationMapping) { | ||
return $queryFeatures; | ||
} | ||
|
||
$aliasesToClassNameOrClassMetadataMap[$joinAlias] = $parentJoinAssociationMapping['targetEntity']; | ||
|
||
if (! ($parentJoinAssociationMapping['type'] & ClassMetadata::TO_MANY)) { | ||
continue; | ||
} | ||
|
||
// The Join is a ToMany Join. | ||
$toManyJoinsAliases[] = $joinAlias; | ||
} | ||
|
||
$queryFeatures['queryCouldHaveToManyJoins'] = count($toManyJoinsAliases) > 0; | ||
|
||
// Check the Select list. | ||
foreach ($queryAST->selectClause->selectExpressions as $selectExpression) { | ||
if (! $selectExpression instanceof SelectExpression) { | ||
return $queryFeatures; | ||
} | ||
|
||
// Must not use any of the ToMany aliases | ||
if (is_string($selectExpression->expression)) { | ||
foreach ($toManyJoinsAliases as $toManyJoinAlias) { | ||
if ( | ||
$selectExpression->expression === $toManyJoinAlias | ||
|| str_starts_with($selectExpression->expression, $toManyJoinAlias . '.') | ||
) { | ||
return $queryFeatures; | ||
} | ||
} | ||
} | ||
|
||
// If it's a function, then it has to be one from the following list. Reason: in some databases, | ||
// there are functions that "generate rows". | ||
if ( | ||
$selectExpression->expression instanceof Query\AST\Functions\FunctionNode | ||
&& ! in_array($selectExpression->expression::class, [ | ||
Query\AST\Functions\CountFunction::class, | ||
Query\AST\Functions\AvgFunction::class, | ||
Query\AST\Functions\SumFunction::class, | ||
Query\AST\Functions\MinFunction::class, | ||
Query\AST\Functions\MaxFunction::class, | ||
], true) | ||
) { | ||
return $queryFeatures; | ||
} | ||
} | ||
|
||
// If there are ToMany Joins, then the Select clause has to use the DISTINCT keyword. Note: the foreach | ||
// above also ensures that the ToMany Joins are not in the Select list, which is relevant. | ||
if ( | ||
count($toManyJoinsAliases) > 0 | ||
&& ! $queryAST->selectClause->isDistinct | ||
) { | ||
return $queryFeatures; | ||
} | ||
|
||
$queryFeatures['queryCouldProduceDuplicates'] = false; | ||
|
||
return $queryFeatures; | ||
} | ||
|
||
/** | ||
* Returns the query. | ||
*/ | ||
|
@@ -60,31 +268,80 @@ | |
} | ||
|
||
/** | ||
* Returns whether the query joins a collection. | ||
* @deprecated Use ::getQueryCouldProduceDuplicates() instead. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
* | ||
* @return bool Whether the query joins a collection. | ||
* Returns whether the query joins a collection. | ||
*/ | ||
public function getFetchJoinCollection(): bool | ||
{ | ||
return $this->fetchJoinCollection; | ||
return $this->queryCouldProduceDuplicates; | ||
} | ||
|
||
/** | ||
* Returns whether the query could produce partially duplicated records. | ||
*/ | ||
public function getQueryCouldProduceDuplicates(): bool | ||
{ | ||
return $this->queryCouldProduceDuplicates; | ||
} | ||
|
||
/** | ||
* @deprecated Use the individual ::get*OutputWalker() | ||
* | ||
* Returns whether the paginator will use an output walker. | ||
*/ | ||
public function getUseOutputWalkers(): bool|null | ||
{ | ||
return $this->useOutputWalkers; | ||
return $this->getUseResultQueryOutputWalker() && $this->getUseCountQueryOutputWalker(); | ||
} | ||
|
||
/** | ||
* @deprecated Use the individual ::set*OutputWalker() | ||
* | ||
* Sets whether the paginator will use an output walker. | ||
* | ||
* @return $this | ||
*/ | ||
public function setUseOutputWalkers(bool|null $useOutputWalkers): static | ||
{ | ||
$this->useOutputWalkers = $useOutputWalkers; | ||
$this->setUseResultQueryOutputWalker($useOutputWalkers); | ||
$this->setUseCountQueryOutputWalker($useOutputWalkers); | ||
|
||
return $this; | ||
} | ||
|
||
/** | ||
* Returns whether the paginator will use an output walker for the result query. | ||
*/ | ||
public function getUseResultQueryOutputWalker(): bool|null | ||
{ | ||
return $this->useResultQueryOutputWalker; | ||
} | ||
|
||
/** | ||
* Sets whether the paginator will use an output walker for the result query. | ||
*/ | ||
public function setUseResultQueryOutputWalker(bool|null $useResultQueryOutputWalker): static | ||
{ | ||
$this->useResultQueryOutputWalker = $useResultQueryOutputWalker; | ||
|
||
return $this; | ||
} | ||
|
||
/** | ||
* Returns whether the paginator will use an output walker for the count query. | ||
*/ | ||
public function getUseCountQueryOutputWalker(): bool|null | ||
{ | ||
return $this->useCountQueryOutputWalker; | ||
} | ||
|
||
/** | ||
* Sets whether the paginator will use an output walker for the count query. | ||
*/ | ||
public function setUseCountQueryOutputWalker(bool|null $useCountQueryOutputWalker): static | ||
{ | ||
$this->useCountQueryOutputWalker = $useCountQueryOutputWalker; | ||
|
||
return $this; | ||
} | ||
|
@@ -112,7 +369,7 @@ | |
$offset = $this->query->getFirstResult(); | ||
$length = $this->query->getMaxResults(); | ||
|
||
if ($this->fetchJoinCollection && $length !== null) { | ||
if ($this->queryCouldProduceDuplicates && $length !== null) { | ||
$subQuery = $this->cloneQuery($this->query); | ||
|
||
if ($this->useOutputWalker($subQuery)) { | ||
|
@@ -171,13 +428,18 @@ | |
/** | ||
* Determines whether to use an output walker for the query. | ||
*/ | ||
private function useOutputWalker(Query $query): bool | ||
private function useOutputWalker(Query $query, bool $forCountQuery = false): bool | ||
{ | ||
if ($this->useOutputWalkers === null) { | ||
return (bool) $query->getHint(Query::HINT_CUSTOM_OUTPUT_WALKER) === false; | ||
if ($forCountQuery === false && $this->useResultQueryOutputWalker !== null) { | ||
d-ph marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return $this->useResultQueryOutputWalker; | ||
} | ||
|
||
if ($forCountQuery === true && $this->useCountQueryOutputWalker !== null) { | ||
d-ph marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return $this->useCountQueryOutputWalker; | ||
} | ||
|
||
return $this->useOutputWalkers; | ||
// When a custom output walker already present, then do not use the Paginator's. | ||
return $query->getHint(Query::HINT_CUSTOM_OUTPUT_WALKER) === false; | ||
} | ||
|
||
/** | ||
|
@@ -205,10 +467,20 @@ | |
$countQuery = $this->cloneQuery($this->query); | ||
|
||
if (! $countQuery->hasHint(CountWalker::HINT_DISTINCT)) { | ||
$countQuery->setHint(CountWalker::HINT_DISTINCT, true); | ||
$hintDistinctDefaultTrue = true; | ||
|
||
// When not joining onto *ToMany relations, then use a simpler COUNT query in the CountWalker. | ||
d-ph marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if ( | ||
$this->queryStyleAutoDetectionEnabled | ||
&& ! $this->queryCouldHaveToManyJoins | ||
) { | ||
$hintDistinctDefaultTrue = false; | ||
} | ||
|
||
$countQuery->setHint(CountWalker::HINT_DISTINCT, $hintDistinctDefaultTrue); | ||
} | ||
|
||
if ($this->useOutputWalker($countQuery)) { | ||
if ($this->useOutputWalker($countQuery, forCountQuery: true)) { | ||
$platform = $countQuery->getEntityManager()->getConnection()->getDatabasePlatform(); // law of demeter win | ||
|
||
$rsm = new ResultSetMapping(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The docs need to be updated at the very least because of this. Can you also read it and see if anything else needs to be modified, and whether any new behavior you are adding is worth documenting?