From 68b4d31f38c1f1020d747cfeaef98c754f7a6533 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Tue, 19 Mar 2024 13:51:48 +0100 Subject: [PATCH] Fix is_current for new TeX Live webpage --- install_texlive/__init__.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/install_texlive/__init__.py b/install_texlive/__init__.py index dcb77fd..5100c31 100644 --- a/install_texlive/__init__.py +++ b/install_texlive/__init__.py @@ -6,8 +6,11 @@ from io import BytesIO import tarfile from functools import lru_cache +from io import StringIO +from html.parser import HTMLParser -__version__ = '0.3.3' + +__version__ = '0.3.4' log = logging.getLogger(__name__) @@ -15,12 +18,30 @@ OLDURL = 'https://ftp.tu-chemnitz.de/pub/tug/historic/systems/texlive/{v}/tlnet-final/' +class GetText(HTMLParser): + """Only extract text of html page""" + + def __init__(self): + super().__init__() + self._text = StringIO() + + def handle_data(self, d): + self._text.write(d) + + @property + def text(self): + return self._text.getvalue() + + @lru_cache def is_current(version): r = requests.get('https://tug.org/texlive/') r.raise_for_status() - m = re.search(r'Current release: TeX Live ([0-9]{4})', r.text) + parser = GetText() + parser.feed(r.text) + + m = re.search(r'Current release: TeX Live ([0-9]{4})', parser.text) if not m: raise ValueError('Could not determine current TeX Live version')