-
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
use mirrors data to update db instead of countries
- Loading branch information
Showing
13 changed files
with
307 additions
and
267 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,140 +1,110 @@ | ||
from typing import Any | ||
from urllib.parse import urlsplit | ||
from dataclasses import dataclass | ||
|
||
import requests | ||
from bs4 import BeautifulSoup, NavigableString | ||
from bs4.element import Tag | ||
from sqlalchemy import select | ||
from sqlalchemy.orm import Session as OrmSession | ||
from sqlalchemy.orm import selectinload | ||
|
||
from mirrors_qa_backend import logger, schemas | ||
from mirrors_qa_backend.db import models | ||
from mirrors_qa_backend.settings import Settings | ||
from mirrors_qa_backend.exceptions import EmptyMirrorsError | ||
|
||
|
||
def create_mirrors(session: OrmSession, countries: list[schemas.Country]) -> None: | ||
for country in countries: | ||
c = models.Country(code=country.code, name=country.name) | ||
c.mirrors = [models.Mirror(**m.model_dump()) for m in country.mirrors] | ||
session.add(c) | ||
@dataclass | ||
class UpdateMirrorsResult: | ||
"""Represents the results of an update to the list of mirrors in the database""" | ||
|
||
nb_mirrors_added: int = 0 | ||
nb_mirrors_disabled: int = 0 | ||
|
||
def update_mirrors(session: OrmSession, countries: list[schemas.Country]) -> None: | ||
|
||
def create_mirrors(session: OrmSession, mirrors: list[schemas.Mirror]) -> int: | ||
""" | ||
Given a list of schemas.Mirror, saves all the mirrors | ||
to the database. | ||
Returns the total number of mirrors created. | ||
Assumes that each mirror does not exist on the database. | ||
""" | ||
Updates the status of mirrors in the database. Any mirrors in the database | ||
that do not exist in the current mirrors obtained from `countries` are | ||
marked as disabled. New mirrors are saved accordingly. | ||
total = 0 | ||
for mirror in mirrors: | ||
db_mirror = models.Mirror( | ||
id=mirror.id, | ||
base_url=mirror.base_url, | ||
enabled=mirror.enabled, | ||
region=mirror.region, | ||
asn=mirror.asn, | ||
score=mirror.score, | ||
latitude=mirror.latitude, | ||
longitude=mirror.longitude, | ||
country_only=mirror.country_only, | ||
region_only=mirror.country_only, | ||
as_only=mirror.as_only, | ||
other_countries=mirror.other_countries, | ||
) | ||
# Ensure the country exists for the mirror | ||
country = session.scalars( | ||
select(models.Country).where(models.Country.code == mirror.country.code) | ||
).one_or_none() | ||
|
||
if country is None: | ||
country = models.Country(code=mirror.country.code, name=mirror.country.name) | ||
session.add(country) | ||
|
||
db_mirror.country = country | ||
session.add(db_mirror) | ||
logger.debug( | ||
f"Registered new mirror: {db_mirror.id!r} for country: {country.name!r}" | ||
) | ||
total += 1 | ||
return total | ||
|
||
|
||
def update_mirrors( | ||
session: OrmSession, mirrors: list[schemas.Mirror] | ||
) -> UpdateMirrorsResult: | ||
""" | ||
Given a list of current_mirrors, compares the list with the existing mirrors | ||
in the database and disables mirrors in the database that are not in the list. | ||
New mirrors from the list that are not in the database are created in the | ||
database. | ||
Returns UpdateMirrorsResult showing the total mirrors added and updated. | ||
""" | ||
result = UpdateMirrorsResult() | ||
# If there are no countries, disable all mirrors | ||
if not countries: | ||
for mirror in session.scalars(select(models.Mirror)).all(): | ||
mirror.enabled = False | ||
session.add(mirror) | ||
return | ||
|
||
query = select(models.Country).options(selectinload(models.Country.mirrors)) | ||
# Map the country codes to each country from the database. To be used | ||
# to compare against the list of current countries | ||
db_countries: dict[str, models.Country] = { | ||
country.code: country for country in session.scalars(query).all() | ||
if not mirrors: | ||
raise EmptyMirrorsError("mirrors list must not be empty") | ||
|
||
# Map the id (hostname) of each mirror from the mirrors list for comparison | ||
# against the id of mirrors from the database. To be used in determining | ||
# if this mirror is a new mirror, in which case it should be added | ||
current_mirrors: dict[str, schemas.Mirror] = { | ||
mirror.id: mirror for mirror in mirrors | ||
} | ||
# Map the country codes to each country from the current list of coutnries. | ||
# To be used in determining if a country is to be newly registered | ||
current_countries: dict[str, schemas.Country] = { | ||
country.code: country for country in countries | ||
|
||
# Map the id (hostname) of each mirror from the database for comparison | ||
# against the id of mirrors in current_mirrors. To be used in determining | ||
# if this mirror should be disabled | ||
query = select(models.Mirror).options(selectinload(models.Mirror.country)) | ||
db_mirrors: dict[str, models.Mirror] = { | ||
mirror.id: mirror for mirror in session.scalars(query).all() | ||
} | ||
|
||
for country_code, country in current_countries.items(): | ||
if country_code not in db_countries: | ||
# Register all of the country's mirrors as the country is | ||
# a new country | ||
logger.debug("Registering new mirrors for {country_code!r}") | ||
c = models.Country(code=country.code, name=country.name) | ||
c.mirrors = [models.Mirror(**m.model_dump()) for m in country.mirrors] | ||
session.add(c) | ||
|
||
for code, db_country in db_countries.items(): | ||
if code in current_countries: | ||
# Even though the db_country is "current", ensure it's mirrors | ||
# are in sync with the current mirrors | ||
current_mirrors: dict[str, schemas.Mirror] = { | ||
m.id: m for m in current_countries[code].mirrors | ||
} | ||
db_mirrors: dict[str, models.Mirror] = {m.id: m for m in db_country.mirrors} | ||
|
||
for db_mirror in db_mirrors.values(): | ||
if db_mirror.id not in current_mirrors: | ||
logger.debug(f"Disabling mirror {db_mirror.id!r}") | ||
db_mirror.enabled = False | ||
session.add(db_mirror) | ||
|
||
for mirror_id, mirror in current_mirrors.items(): | ||
if mirror_id not in db_mirrors: | ||
logger.debug( | ||
f"Registering new mirror {mirror.id!r} for " | ||
"country: {db_country.name!r}" | ||
) | ||
db_country.mirrors.append(models.Mirror(**mirror.model_dump())) | ||
session.add(db_country) | ||
else: | ||
# disable all of the country's mirrors as they have been removed | ||
for db_mirror in db_country.mirrors: | ||
logger.debug(f"Disabling mirror {db_mirror.id!r}") | ||
db_mirror.enabled = False | ||
session.add(db_mirror) | ||
|
||
|
||
def get_current_mirror_countries() -> list[schemas.Country]: | ||
def find_country_rows(tag: Tag) -> bool: | ||
""" | ||
Filters out table rows that do not contain mirror | ||
data from the table body. | ||
""" | ||
return tag.name == "tr" and tag.findChild("td", class_="newregion") is None | ||
|
||
r = requests.get(Settings.mirrors_url, timeout=Settings.requests_timeout) | ||
r.raise_for_status() | ||
|
||
soup = BeautifulSoup(r.text, features="html.parser") | ||
body = soup.find("tbody") | ||
|
||
if body is None or isinstance(body, NavigableString): | ||
raise ValueError | ||
# Given a country might have more than one mirror, set up a dictionary | ||
# of country_code to the country's data. If it is the first time we | ||
# are seeing the country, we save it along with its mirror, else, | ||
# we simply update its mirrors list. | ||
countries: dict[str, schemas.Country] = {} | ||
rows = body.find_all(find_country_rows) | ||
for row in rows: | ||
country_name = row.find("img").next_sibling.text.strip() | ||
if country_name in Settings.mirrors_exclusion_list: | ||
continue | ||
country_code = row.find("img")["alt"] | ||
base_url = row.find("a", string="HTTP")["href"] | ||
hostname: Any = urlsplit( | ||
base_url | ||
).netloc # pyright: ignore [reportUnknownMemberType] | ||
|
||
if country_code not in countries: | ||
countries[country_code] = schemas.Country( | ||
code=country_code, | ||
name=country_name, | ||
mirrors=[ | ||
schemas.Mirror( | ||
id=hostname, | ||
base_url=base_url, | ||
enabled=True, | ||
) | ||
], | ||
) | ||
else: | ||
countries[country_code].mirrors.append( | ||
schemas.Mirror( | ||
id=hostname, | ||
base_url=base_url, | ||
enabled=True, | ||
) | ||
# Create any mirror that doesn't exist on the database | ||
for mirror_id, mirror in current_mirrors.items(): | ||
if mirror_id not in db_mirrors: | ||
# Create the mirror as it doesn't exists on the database. | ||
result.nb_mirrors_added += create_mirrors(session, [mirror]) | ||
|
||
# Disable any mirror in the database that doesn't exist on the current | ||
# list of mirrors | ||
for db_mirror_id, db_mirror in db_mirrors.items(): | ||
if db_mirror_id not in current_mirrors: | ||
logger.debug( | ||
f"Disabling mirror: {db_mirror.id!r} for " | ||
f"country: {db_mirror.country.name!r}" | ||
) | ||
return list(countries.values()) | ||
db_mirror.enabled = False | ||
session.add(db_mirror) | ||
result.nb_mirrors_disabled += 1 | ||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import argparse | ||
import logging | ||
|
||
from mirrors_qa_backend import Settings, db, logger | ||
from mirrors_qa_backend.db import mirrors | ||
from mirrors_qa_backend.extract import get_current_mirrors | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--update-mirrors", | ||
action="store_true", | ||
help=f"Update the list of mirrors from {Settings.mirrors_url}", | ||
) | ||
parser.add_argument( | ||
"--verbose", "-v", help="Show verbose output", action="store_true" | ||
) | ||
|
||
args = parser.parse_args() | ||
|
||
if args.verbose: | ||
logger.setLevel(logging.DEBUG) | ||
|
||
if args.update_mirrors: | ||
with db.Session.begin() as session: | ||
mirrors.update_mirrors(session, get_current_mirrors()) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
class EmptyMirrorsError(Exception): | ||
pass |
Oops, something went wrong.