Skip to content

Commit

Permalink
Added catalog etag check
Browse files Browse the repository at this point in the history
  • Loading branch information
rgaudin committed Jan 18, 2025
1 parent 576e681 commit dc24e50
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 23 deletions.
14 changes: 14 additions & 0 deletions bittorrent-seeder/src/kiwixseeder/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import platform
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Self
from urllib.parse import ParseResult, urlparse

Expand Down Expand Up @@ -177,3 +178,16 @@ def get(cls) -> "Context":
if not cls._instance:
raise OSError("Uninitialized context") # pragma: no cover
return cls._instance

@staticmethod
def get_cache_path(fname: str) -> Path:
"""Path to save/read cache from/to"""
xdg_cache_home = os.getenv("XDG_CACHE_HOME")
# favor this env on any platform
if xdg_cache_home:
return Path(xdg_cache_home) / fname
if Context.is_mac:
return Path.home() / "Library" / "Caches" / NAME / fname
if Context.is_win:
return Path(os.getenv("APPDATA", "C:")) / NAME / fname
return Path.home() / ".config" / NAME / fname
55 changes: 34 additions & 21 deletions bittorrent-seeder/src/kiwixseeder/library.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import collections
import datetime
import os
import re
import urllib.parse
from collections.abc import Generator
Expand All @@ -13,10 +12,12 @@
import xmltodict
from iso639.exceptions import DeprecatedLanguageValue, InvalidLanguageValue

from kiwixseeder.context import NAME, Context
from kiwixseeder.context import Context
from kiwixseeder.download import get_btih_from_url, session
from kiwixseeder.utils import format_size

ETAG_CACHE_FILE = "OPDS.etag"
BTIH_CACHE_FOLDER = "zim-btih-maps"
context = Context.get()
logger = context.logger

Expand All @@ -28,19 +29,6 @@ def to_human_id(name: str, publisher: str | None = "", flavour: str | None = "")
return f"{publisher}:{name}:{flavour}"


def get_cache_path(fname: str) -> Path:
"""Path to save/read cache from/to"""
xdg_cache_home = os.getenv("XDG_CACHE_HOME")
# favor this env on any platform
if xdg_cache_home:
return Path(xdg_cache_home) / fname
if Context.is_mac:
return Path.home() / "Library" / "Caches" / NAME / fname
if Context.is_win:
return Path(os.getenv("APPDATA", "C:")) / NAME / fname
return Path.home() / ".config" / NAME / fname


class BookBtihMapper:
""" Disk-cached mapping of Book UUID to BT Info Hash
Expand All @@ -58,7 +46,7 @@ def read(cls, *, force: bool = False):
60
) >= now:
return
folder = get_cache_path("zim-btih-maps")
folder = context.get_cache_path("zim-btih-maps")
folder.mkdir(parents=True, exist_ok=True)
data = {
fpath.name.split(":", 1)[0]: fpath.name.split(":", 1)[1]
Expand All @@ -70,7 +58,7 @@ def read(cls, *, force: bool = False):

@classmethod
def write(cls):
folder = get_cache_path("zim-btih-maps")
folder = context.get_cache_path(BTIH_CACHE_FOLDER)
folder.mkdir(parents=True, exist_ok=True)
for uuid, btih in cls.data:
folder.joinpath(f"{uuid}:{btih}").touch()
Expand All @@ -86,7 +74,7 @@ def add(cls, uuid: UUID, btih: str):
if uuids in cls.data:
return
cls.data[uuids] = btih
folder = get_cache_path("zim-btih-maps")
folder = context.get_cache_path(BTIH_CACHE_FOLDER)
folder.mkdir(parents=True, exist_ok=True)
folder.joinpath(f"{uuids}:{btih}").touch()

Expand Down Expand Up @@ -192,16 +180,38 @@ def __str__(self) -> str:
)


def read_etag_from_cache() -> str:
fpath = context.get_cache_path(ETAG_CACHE_FILE)
fpath.parent.mkdir(parents=True, exist_ok=True)
try:
return fpath.read_text().strip()
except Exception:
return ""

def write_etag_to_cache(value: str):
fpath = context.get_cache_path(ETAG_CACHE_FILE)
fpath.parent.mkdir(parents=True, exist_ok=True)
fpath.write_text(value)

def query_etag() -> str:
try:
resp = session.head(
f"{context.catalog_url}/entries", params={"count": "-1"}, timeout=30
)
return resp.headers.get("etag") or ""
except Exception:
...
return ""


class Catalog:
def __init__(self):
# list of Book by ident
self._books: dict[str, Book] = {}
# list of book-idents by language (ISO-639-1)
self._by_langs: dict[str, list[str]] = {}
self.updated_on: datetime.datetime = datetime.datetime(
1970, 1, 1, tzinfo=datetime.UTC
)
BookBtihMapper.read(force=True)
self.etag: str = read_etag_from_cache()

def __contains__(self, ident: str) -> bool:
return ident in self.get_all_ids()
Expand Down Expand Up @@ -263,6 +273,8 @@ def ensure_fresh(self):
if not self._books:
self.do_refresh()



def do_refresh(self):
logger.debug(f"refreshing catalog via {context.catalog_url}")
books: dict[str, Book] = {}
Expand All @@ -272,6 +284,7 @@ def do_refresh(self):
f"{context.catalog_url}/entries", params={"count": "-1"}, timeout=30
)
resp.raise_for_status()
self.etag = resp.headers.get("etag") or ""
fetched_on = datetime.datetime.now(datetime.UTC)
catalog = xmltodict.parse(resp.content)
if "feed" not in catalog:
Expand Down
12 changes: 10 additions & 2 deletions bittorrent-seeder/src/kiwixseeder/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
RC_NOFILTER,
Context,
)
from kiwixseeder.library import Book, Catalog
from kiwixseeder.library import Book, Catalog, query_etag, write_etag_to_cache
from kiwixseeder.qbittorrent import TorrentManager
from kiwixseeder.utils import format_size

Expand Down Expand Up @@ -44,7 +44,9 @@ def run(self) -> int:
else:
raise exc

self.fetch_catalog()
if self.fetch_catalog():
logger.info("Catalog has not changed since last run, exiting.")
return 0
catalog_size = self.catalog.nb_books
self.reduce_catalog()

Expand Down Expand Up @@ -112,7 +114,13 @@ def connect_to_backend(self):

def fetch_catalog(self):
logger.info("Fetching catalog…")
etag = query_etag()
# resources online is same as last time
if etag and self.catalog.etag and etag == self.catalog.etag:
return True
self.catalog.ensure_fresh()
if not context.dry_run:
write_etag_to_cache(self.catalog.etag)
logger.info(f"Catalog contains {self.catalog.nb_books} ZIMs")

def reduce_catalog(self):
Expand Down

0 comments on commit dc24e50

Please sign in to comment.