From 3a5152831004840f5a2328f5577e5b4bbba9dd97 Mon Sep 17 00:00:00 2001 From: ilya Date: Sun, 6 Oct 2024 14:18:07 +0300 Subject: [PATCH] query only total not in years images fixes --- .../src/main/resources/application.conf | 5 +++++ .../src/main/scala/org/scalawiki/MwBot.scala | 4 ++-- .../org/scalawiki/query/QueryLibrary.scala | 2 +- .../org/scalawiki/wlx/query/ImageQuery.scala | 20 ++++++++++++------- .../org/scalawiki/wlx/stat/Statistics.scala | 13 ++++++------ 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/scalawiki-core/src/main/resources/application.conf b/scalawiki-core/src/main/resources/application.conf index 11f6f807..eb1aedc0 100644 --- a/scalawiki-core/src/main/resources/application.conf +++ b/scalawiki-core/src/main/resources/application.conf @@ -1,4 +1,9 @@ akka.http.parsing { max-content-length = 16m max-to-strict-bytes = 16m +} + +akka.default-dispatcher { + type = Dispatcher + executor = "thread-pool-executor" } \ No newline at end of file diff --git a/scalawiki-core/src/main/scala/org/scalawiki/MwBot.scala b/scalawiki-core/src/main/scala/org/scalawiki/MwBot.scala index 042918a2..fb46fc5c 100644 --- a/scalawiki-core/src/main/scala/org/scalawiki/MwBot.scala +++ b/scalawiki-core/src/main/scala/org/scalawiki/MwBot.scala @@ -25,6 +25,8 @@ trait ActionBot { limit: Option[Long] = None ): Future[Iterable[Page]] + def log: LoggingAdapter + } trait MwBot extends ActionBot { @@ -70,8 +72,6 @@ trait MwBot extends ActionBot { def system: ActorSystem - def log: LoggingAdapter - def mediaWikiVersion: MediaWikiVersion } diff --git a/scalawiki-core/src/main/scala/org/scalawiki/query/QueryLibrary.scala b/scalawiki-core/src/main/scala/org/scalawiki/query/QueryLibrary.scala index fb46a79d..08ba310a 100644 --- a/scalawiki-core/src/main/scala/org/scalawiki/query/QueryLibrary.scala +++ b/scalawiki-core/src/main/scala/org/scalawiki/query/QueryLibrary.scala @@ -24,7 +24,7 @@ trait QueryLibrary { ): Action = imagesQuery(generator, withUrl, withMetadata, rvSlots) def imagesByIds( - pageIds: Set[Long], + pageIds: Seq[Long], withUrl: Boolean = false, withMetadata: Boolean = false, rvSlots: Option[String] = None diff --git a/scalawiki-wlx/src/main/scala/org/scalawiki/wlx/query/ImageQuery.scala b/scalawiki-wlx/src/main/scala/org/scalawiki/wlx/query/ImageQuery.scala index 03991940..08479413 100644 --- a/scalawiki-wlx/src/main/scala/org/scalawiki/wlx/query/ImageQuery.scala +++ b/scalawiki-wlx/src/main/scala/org/scalawiki/wlx/query/ImageQuery.scala @@ -1,14 +1,14 @@ package org.scalawiki.wlx.query import org.scalawiki.dto.cmd.Action -import org.scalawiki.dto.cmd.query.{Generator, Query} import org.scalawiki.dto.cmd.query.list._ +import org.scalawiki.dto.cmd.query.{Generator, Query} import org.scalawiki.dto.{Image, Namespace} import org.scalawiki.query.QueryLibrary import org.scalawiki.wlx.dto.Contest import org.scalawiki.{ActionBot, MwBot} -import scala.collection.IterableOnce.iterableOnceExtensionMethods +import java.util.concurrent.atomic.AtomicInteger import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future @@ -31,7 +31,7 @@ class ImageQueryApi(bot: ActionBot) extends ImageQuery with QueryLibrary { CategoryMembers( CmTitle(contest.imagesCategory), CmNamespace(Seq(Namespace.FILE)), - CmLimit("400") + CmLimit("max") ) ) @@ -56,14 +56,20 @@ class ImageQueryApi(bot: ActionBot) extends ImageQuery with QueryLibrary { contest: Contest, pageIds: Set[Long] ): Future[Iterable[Image]] = { + bot.log.info(s"imagesWithTemplateByIds pageIds size: ${pageIds.size}") + val blockSize = 50 + val fetched = new AtomicInteger(0) val specialNominationTemplates = contest.specialNominations.flatMap(_.fileTemplate).toSet Future - .sequence(pageIds.sliding(50).map { idsSlice => + .sequence(pageIds.toSeq.sorted.grouped(blockSize).map { idsSlice => imagesByIds(idsSlice, withMetadata = true) for (pages <- bot.run(imagesByIds(idsSlice, withMetadata = true))) - yield pages.flatMap( - Image.fromPage(contest.fileTemplate, specialNominationTemplates) - ) + yield { + bot.log.info(s"Fetched ${fetched.addAndGet(pages.size)} of ${pageIds.size}") + pages.flatMap( + Image.fromPage(contest.fileTemplate, specialNominationTemplates) + ) + } }) .map(_.flatten.toIndexedSeq) } diff --git a/scalawiki-wlx/src/main/scala/org/scalawiki/wlx/stat/Statistics.scala b/scalawiki-wlx/src/main/scala/org/scalawiki/wlx/stat/Statistics.scala index 351cad6b..a43bca1e 100644 --- a/scalawiki-wlx/src/main/scala/org/scalawiki/wlx/stat/Statistics.scala +++ b/scalawiki-wlx/src/main/scala/org/scalawiki/wlx/stat/Statistics.scala @@ -101,6 +101,8 @@ class Statistics( private val contests = (startYear.getOrElse(currentYear) to currentYear).map(y => contest.copy(year = y)) + private lazy val totalImageQuery: ImageQuery = imageQuery.getOrElse(getImageQuery()) + def getImageQuery(year: Option[Int] = None): ImageQuery = { val cacheName = s"${contest.campaign}-${year.getOrElse("all")}" ImageQuery.create(new CachedBot(Site.commons, cacheName, true)) @@ -152,17 +154,16 @@ class Statistics( dbsByYear: Seq[ImageDB], totalPageIds: Iterable[Long] ): Future[ImageDB] = { - val missingPageIds = totalPageIds.toSet -- dbsByYear.flatMap(_.images.flatMap(_.pageId)).toSet + val idsByYear = dbsByYear.flatMap(_.images.flatMap(_.pageId)).toSet + val missingPageIds = totalPageIds.toSet -- idsByYear for { - commons <- imageQuery - .getOrElse(getImageQuery()) - .imagesWithTemplateByIds(contest, missingPageIds) + commons <- totalImageQuery.imagesWithTemplateByIds(contest, missingPageIds) wiki <- imageQueryWiki.map(_.imagesWithTemplate(contest)).getOrElse(Future.successful(Nil)) - } yield new ImageDB(contest, commons ++ wiki, monumentDb) + } yield new ImageDB(contest, dbsByYear.flatMap(_.images) ++ commons ++ wiki, monumentDb) } private def imageIdsByTemplate(): Future[Iterable[Long]] = - imageQuery.getOrElse(getImageQuery()).imageIdsWithTemplate(contest) + totalImageQuery.imageIdsWithTemplate(contest) def init(total: Boolean): Unit = { gatherData(total = total)