Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement EN Kantine Parsing #238

Merged
merged 3 commits into from
Mar 16, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 93 additions & 6 deletions canteens/personalkantine.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,104 @@
import datetime
import re
from typing import Dict, List

from backend.backend import app, cache, cache_date_format, cache_ttl
import requests
from bs4 import BeautifulSoup
from bs4.element import Tag
from celery.utils.log import get_task_logger

from canteens.canteen import get_current_week, get_next_week
from backend.backend import app, cache, cache_date_format, cache_ttl
from canteens.canteen import get_current_week, get_next_week, VEGGIE, MEAT, FISH

logger = get_task_logger(__name__)
URL = 'https://personalkantine.personalabteilung.tu-berlin.de'

def get_date_range():

def get_date_range() -> List[datetime.date]:
today = datetime.date.today()
if today.weekday() > 4:
return get_next_week()
else:
return get_current_week()


def get_menu() -> Dict[str, str]:
menu_str = download_menu()
soup = BeautifulSoup(menu_str, 'html.parser')

menus = soup.find_all("ul", class_="Menu__accordion")
if len(menus) == 0:
logger.error('Could not find any menu items for EN Kantine')
raise Exception
if len(menus) > 1:
logger.warning('Found more than one menu item for EN Kantine, using the first one')

return parse_menu(menus[0])


def download_menu() -> str:
try:
request = requests.get(URL)
request.raise_for_status()
except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as ex:
raise ex
if request.status_code != requests.codes.ok:
logger.error('Could not update EN Kantine with status code %s' % request.status_code)
raise Exception

return request.text


def parse_menu(menu: Tag) -> Dict[str, str]:
parsed_menu = {}

for day in menu.children:
date_tag = day.find("h2")
if date_tag == -1:
continue

date_str = date_tag.text.split(" ")[1]
dishlist = day.find("ul")
if dishlist == -1:
logger.warn('Could not find any dishes in EN Kantine for %s' % date_str)
continue

menu_str = "\n".join(parse_menu_items(dishlist.find_all("li")))
parsed_menu[date_str] = '[EN Kantine](%s) (%s)\n%s\n\n*Öffnungszeiten*\nMo - Fr: 11 - 15 Uhr' % (URL, date_str, menu_str)

return parsed_menu


def parse_menu_items(dishes: List[Tag]) -> List[str]:
parsed_dishes = []

for dish in dishes:
dish_text = dish.text.lower()
if '(v)' in dish_text or 'gemüseplatte' in dish_text:
annotation = VEGGIE
elif '(F)' in dish_text:
annotation = FISH
else:
annotation = MEAT

parsed_dish = format_dish(" ".join(dish.stripped_strings))
parsed_dishes.append('%s %s' % (annotation, parsed_dish))

return parsed_dishes or ['Leider kenne ich (noch) keinen Speiseplan für diesen Tag.']


def format_dish(dish: str) -> str:
dish = dish.strip()

# remove ingredient hints
dish = re.sub(r'\([\w\s+]+\)', '', dish)

# use common price tag design
dish = re.sub(r'\s+(\d,\d+)\s+€', r': *\g<1>€*', dish)

return dish


@app.task(bind=True, default_retry_delay=30)
def update_personalkantine(self):
try:
Expand All @@ -31,9 +115,12 @@ def update_personalkantine(self):
def update_en_canteen(self):
try:
logger.info('[Update] TU EN Canteen')
menu = get_menu()
for day in get_date_range():
menu = 'Die EN-Kantine hat ihren Speiseplan leider nicht mehr online. (https://personalkantine.personalabteilung.tu-berlin.de)'
cache.hset(day.strftime(cache_date_format), 'tu_en_kantine', menu)
cache.expire(day.strftime(cache_date_format), cache_ttl)
day_menu = menu.get(day.strftime('%d.%m.%Y'))
if day_menu:
cache.hset(day.strftime(cache_date_format), 'tu_en_kantine', day_menu)
cache.expire(day.strftime(cache_date_format), cache_ttl)

except Exception as ex:
raise self.retry(exc=ex)