From 75e1641338dff9b7545d9b6829f720426c244ba4 Mon Sep 17 00:00:00 2001 From: Marcel Klehr Date: Thu, 9 Feb 2023 13:01:35 +0100 Subject: [PATCH] Clustering: Cap clustering batch size at 10k Signed-off-by: Marcel Klehr --- lib/BackgroundJobs/ClusterFacesJob.php | 3 ++- lib/Service/FaceClusterAnalyzer.php | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/BackgroundJobs/ClusterFacesJob.php b/lib/BackgroundJobs/ClusterFacesJob.php index 6fb89cf1..a624121d 100644 --- a/lib/BackgroundJobs/ClusterFacesJob.php +++ b/lib/BackgroundJobs/ClusterFacesJob.php @@ -19,6 +19,7 @@ class ClusterFacesJob extends QueuedJob { private IJobList $jobList; private LoggerInterface $logger; + public const BATCH_SIZE = 10000; public function __construct(ITimeFactory $time, Logger $logger, IJobList $jobList, FaceClusterAnalyzer $clusterAnalyzer) { parent::__construct($time); $this->logger = $logger; @@ -35,7 +36,7 @@ protected function run($argument) { /** @var string $userId */ $userId = $argument['userId']; try { - $this->clusterAnalyzer->calculateClusters($userId); + $this->clusterAnalyzer->calculateClusters($userId, self::BATCH_SIZE); } catch (\JsonException|Exception $e) { $this->logger->error('Failed to calculate face clusters', ['exception' => $e]); } diff --git a/lib/Service/FaceClusterAnalyzer.php b/lib/Service/FaceClusterAnalyzer.php index c4683583..afe33c51 100644 --- a/lib/Service/FaceClusterAnalyzer.php +++ b/lib/Service/FaceClusterAnalyzer.php @@ -36,7 +36,7 @@ public function __construct(FaceDetectionMapper $faceDetections, FaceClusterMapp * @throws \OCP\DB\Exception * @throws \JsonException */ - public function calculateClusters(string $userId): void { + public function calculateClusters(string $userId, int $batchSize = -1): void { $this->logger->debug('ClusterDebug: Retrieving face detections for user ' . $userId); $unclusteredDetections = $this->faceDetections->findUnclusteredByUserId($userId); @@ -50,6 +50,10 @@ public function calculateClusters(string $userId): void { return; } + if ($batchSize > 0 && count($unclusteredDetections) > $batchSize) { + $unclusteredDetections = array_slice($unclusteredDetections, 0, $batchSize); + } + $this->logger->debug('ClusterDebug: Found ' . count($unclusteredDetections) . " unclustered detections. Calculating clusters."); $sampledDetections = [];