Skip to content

Commit

Permalink
query only total not in years images fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
intracer committed Oct 6, 2024
1 parent 418aa45 commit 3a51528
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 16 deletions.
5 changes: 5 additions & 0 deletions scalawiki-core/src/main/resources/application.conf
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
akka.http.parsing {
max-content-length = 16m
max-to-strict-bytes = 16m
}

akka.default-dispatcher {
type = Dispatcher
executor = "thread-pool-executor"
}
4 changes: 2 additions & 2 deletions scalawiki-core/src/main/scala/org/scalawiki/MwBot.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ trait ActionBot {
limit: Option[Long] = None
): Future[Iterable[Page]]

def log: LoggingAdapter

}

trait MwBot extends ActionBot {
Expand Down Expand Up @@ -70,8 +72,6 @@ trait MwBot extends ActionBot {

def system: ActorSystem

def log: LoggingAdapter

def mediaWikiVersion: MediaWikiVersion
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ trait QueryLibrary {
): Action = imagesQuery(generator, withUrl, withMetadata, rvSlots)

def imagesByIds(
pageIds: Set[Long],
pageIds: Seq[Long],
withUrl: Boolean = false,
withMetadata: Boolean = false,
rvSlots: Option[String] = None
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
package org.scalawiki.wlx.query

import org.scalawiki.dto.cmd.Action
import org.scalawiki.dto.cmd.query.{Generator, Query}
import org.scalawiki.dto.cmd.query.list._
import org.scalawiki.dto.cmd.query.{Generator, Query}
import org.scalawiki.dto.{Image, Namespace}
import org.scalawiki.query.QueryLibrary
import org.scalawiki.wlx.dto.Contest
import org.scalawiki.{ActionBot, MwBot}

import scala.collection.IterableOnce.iterableOnceExtensionMethods
import java.util.concurrent.atomic.AtomicInteger
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future

Expand All @@ -31,7 +31,7 @@ class ImageQueryApi(bot: ActionBot) extends ImageQuery with QueryLibrary {
CategoryMembers(
CmTitle(contest.imagesCategory),
CmNamespace(Seq(Namespace.FILE)),
CmLimit("400")
CmLimit("max")
)
)

Expand All @@ -56,14 +56,20 @@ class ImageQueryApi(bot: ActionBot) extends ImageQuery with QueryLibrary {
contest: Contest,
pageIds: Set[Long]
): Future[Iterable[Image]] = {
bot.log.info(s"imagesWithTemplateByIds pageIds size: ${pageIds.size}")
val blockSize = 50
val fetched = new AtomicInteger(0)
val specialNominationTemplates = contest.specialNominations.flatMap(_.fileTemplate).toSet
Future
.sequence(pageIds.sliding(50).map { idsSlice =>
.sequence(pageIds.toSeq.sorted.grouped(blockSize).map { idsSlice =>
imagesByIds(idsSlice, withMetadata = true)
for (pages <- bot.run(imagesByIds(idsSlice, withMetadata = true)))
yield pages.flatMap(
Image.fromPage(contest.fileTemplate, specialNominationTemplates)
)
yield {
bot.log.info(s"Fetched ${fetched.addAndGet(pages.size)} of ${pageIds.size}")
pages.flatMap(
Image.fromPage(contest.fileTemplate, specialNominationTemplates)
)
}
})
.map(_.flatten.toIndexedSeq)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ class Statistics(
private val contests =
(startYear.getOrElse(currentYear) to currentYear).map(y => contest.copy(year = y))

private lazy val totalImageQuery: ImageQuery = imageQuery.getOrElse(getImageQuery())

def getImageQuery(year: Option[Int] = None): ImageQuery = {
val cacheName = s"${contest.campaign}-${year.getOrElse("all")}"
ImageQuery.create(new CachedBot(Site.commons, cacheName, true))
Expand Down Expand Up @@ -152,17 +154,16 @@ class Statistics(
dbsByYear: Seq[ImageDB],
totalPageIds: Iterable[Long]
): Future[ImageDB] = {
val missingPageIds = totalPageIds.toSet -- dbsByYear.flatMap(_.images.flatMap(_.pageId)).toSet
val idsByYear = dbsByYear.flatMap(_.images.flatMap(_.pageId)).toSet
val missingPageIds = totalPageIds.toSet -- idsByYear
for {
commons <- imageQuery
.getOrElse(getImageQuery())
.imagesWithTemplateByIds(contest, missingPageIds)
commons <- totalImageQuery.imagesWithTemplateByIds(contest, missingPageIds)
wiki <- imageQueryWiki.map(_.imagesWithTemplate(contest)).getOrElse(Future.successful(Nil))
} yield new ImageDB(contest, commons ++ wiki, monumentDb)
} yield new ImageDB(contest, dbsByYear.flatMap(_.images) ++ commons ++ wiki, monumentDb)
}

private def imageIdsByTemplate(): Future[Iterable[Long]] =
imageQuery.getOrElse(getImageQuery()).imageIdsWithTemplate(contest)
totalImageQuery.imageIdsWithTemplate(contest)

def init(total: Boolean): Unit = {
gatherData(total = total)
Expand Down

0 comments on commit 3a51528

Please sign in to comment.