diff --git a/README.md b/README.md index 867aaff..a8cc6d9 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,8 @@ # voctosched -Scripts for converting various formats to [frab](https://github.com/frab/frab)-style fahrplan XML. -This also may or may not be useful for users of [pentabarf](https://github.com/nevs/pentabarf). +Tool converting various formats to [frab](https://github.com/frab/frab)-style schedule XML and JSON as described by [in the c3voc wiki](https://c3voc.de/wiki/schedule). The format was initialy introduced by [pentabarf](https://github.com/nevs/pentabarf), got more popular with [frab](https://frab.github.io/frab/) and is also provided by [[https://pretalx.com/p/about/|pretalx]] – as it became the main interexchange format [between various systems](https://c3voc.de/wiki/api) recording and publishing talks of conferences and other events. -This repository implements an object representation of the frab export data model. -This can be used to build different im-/exporters or manipulators for the frab schedule export. + +This repository implements an object representation of the frab data model and can be used to build different im-/exporters or manipulators. There is also a number of application which can display the schedule on mobile devices or infoscreens. Some examples are: @@ -12,6 +11,8 @@ Some examples are: * https://github.com/EventFahrplan/EventFahrplan * https://github.com/Wilm0r/giggity +More information about the format, and other tools with different approaches can be found on https://c3voc.de/wiki/schedule#conversion_tools + # Usage ``` pip3 install --user -r requirements.txt @@ -21,15 +22,17 @@ For more information about the input parameters and the generated output have a look at the `demo` directory, this should be a good starting point. # Currently supported inputs -* CSV -* JSON +* [CSV](demo/gpn11.csv) +* schedule JSON +* [Proyektor](demo/fusion22.json) # Currently supported outputs -* basic XML, frab format -* extended XML, with additional `video_download_url` in events +* [schedule XML](https://github.com/voc/schedule/tree/master/validator/xsd) + * extended schedule XML, with additional `video_download_url` in events +* [schedule JSON](https://github.com/voc/schedule/tree/master/validator/json) # Validator -The generated XML can be validated with the c3voc validator, which can be found here https://github.com/voc/schedule/tree/master/validator +The generated schedule XML can be validated with the c3voc validator, which can be found at https://github.com/voc/schedule/tree/master/validator and https://c3voc.de/schedulexml/ A quick validation can also be done with: diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..b7be57f --- /dev/null +++ b/__init__.py @@ -0,0 +1,2 @@ +import sys +sys.path.append('.') \ No newline at end of file diff --git a/fahrplan/model/day.py b/fahrplan/model/day.py index 99f01da..5067eda 100644 --- a/fahrplan/model/day.py +++ b/fahrplan/model/day.py @@ -1,6 +1,6 @@ import datetime as dt import logging -from fahrplan.datetime import format_datetime +from fahrplan.datetime import format_datetime, parse_date, parse_datetime from fahrplan.exception import FahrplanError from fahrplan.xml import XmlWriter, XmlSerializable from .event import Event @@ -8,6 +8,7 @@ log = logging.getLogger(__name__) + class Day(XmlSerializable): def __init__(self, index: int, date: dt.date, start: dt.datetime = None, end: dt.datetime = None): self.index = index @@ -17,6 +18,18 @@ def __init__(self, index: int, date: dt.date, start: dt.datetime = None, end: dt self.rooms = {} self.schedule = None + @classmethod + def from_dict(cls, data: dict): + assert isinstance(data, dict), 'Data must be a dictionary.' + + obj = Day( + index=data['index'], + date=parse_date(data['date']), + start=parse_datetime(data['start']), + end=parse_datetime(data['end']) + ) + return obj + def add_room(self, room: Room): if room.name not in self.rooms: self.rooms[room.name] = room diff --git a/fahrplan/model/event.py b/fahrplan/model/event.py index 45a7366..a2cf869 100644 --- a/fahrplan/model/event.py +++ b/fahrplan/model/event.py @@ -1,8 +1,12 @@ +from dataclasses import dataclass +from datetime import datetime, time, timedelta import datetime as dt -from typing import Dict, Union, Callable +from typing import Dict, List, Union, Callable -from fahrplan.datetime import format_datetime, format_time, format_duration -from fahrplan.exception import FahrplanError +from binascii import crc32 + +from fahrplan.datetime import format_datetime, format_time, format_duration, parse_datetime, parse_duration, parse_time +from fahrplan.model.person import Person from fahrplan.xml import XmlWriter, XmlSerializable from ..uuid import uuid @@ -41,6 +45,56 @@ def __init__(self, uid: int, date: dt.datetime, start: dt.time, duration: dt.tim else: self.slug = slug + @classmethod + def from_dict(cls, data: dict, pop_used_keys=False): + assert isinstance(data, dict), 'Data must be a dictionary.' + + persons = {} + for person_info in data.get('persons', []): + person = Person.from_dict(person_info) + # generate some hopefully unique ids if they are 0 + uid = person_info['id'] or (crc32(person.name.encode()) & 0xffffffff) + persons[uid] = person + + links = {} + for link_info in data.get('links', []): + title = link_info['title'] + # generate some hopefully unique ids if they are 0 + url = link_info['url'] + links[url] = title + + attachments = {} + # TODO extract as util method + for attachment_info in data.get('attachments', []): + title = attachment_info['title'] + # generate some hopefully unique ids if they are 0 + url = attachment_info['url'] + attachments[url] = title + + obj = Event( + uid=data['id'], + guid=data['guid'], + date=parse_datetime(data['date']), + start=parse_time(data['start']), + duration=parse_duration(data['duration']), + slug=data['slug'], + title=data['title'], + subtitle=data.get('subtitle', ''), + abstract=data.get('abstract', ''), + description=data.get('description', ''), + language=data.get('language'), + persons=persons, + download_url=data.get('download_url', ''), + recording_license=data.get('recording_license', ''), + recording_optout=data['do_not_record'], + track=data.get('track', ''), + event_type=data.get('type', ''), + logo=data.get('logo', ''), + links=links, + attachments=attachments + ) + return obj + def add_person(self, uid: int, name: str): self.persons[uid] = name @@ -50,7 +104,7 @@ def add_link(self, href: str, title: str): def add_attachment(self, href: str, title: str): self.attachments[href] = title - def append_xml(self, xml: XmlWriter, extended: bool): + def append_xml(self, xml: XmlWriter, extended: bool = False): with xml.context("event", guid=self.guid, id=self.id): xml.tag("date", format_datetime(self.date)) xml.tag("start", format_time(self.start)) diff --git a/fahrplan/model/person.py b/fahrplan/model/person.py new file mode 100644 index 0000000..d440f02 --- /dev/null +++ b/fahrplan/model/person.py @@ -0,0 +1,102 @@ +from dataclasses import InitVar, dataclass, field, fields +from typing import Optional, Union +import uuid + +from fahrplan.uuid import NAMESPACE_VOCTOSCHED + + +@dataclass +class Person: + """A person e.g. speaker, contributor etc.""" + + name: str + "public name of the person" + guid: InitVar[Optional[str]] = None + "global unique idenifier, might be automatically computed from uri, email, id or code" + _guid: Optional[uuid.UUID] = field(init=False, repr=False, default=None) + "internal storage place if we got a precomputed person guid from origin system" + _uri: Optional[str] = field(init=False, repr=False, default=None) + "URL, URN or other URI identifing this person" + email: Optional[str] = None + "public email address, used as base for URI/GUID when nothing else is set" + id: Optional[int] = None + "deprecated: integer id of this person in the origin system" + code: Optional[str] = None + "deprecated: pretalx internal 5 char random id of this person in the origin system, e.g. `DEFAB`" + + # avatar: Optional[str] = None + # biography: Optional[str] = None + # links: Optional[List[Any]] = None + # contacts: Optional[List[Any]] = None + # state: Optional[str] = None + + origin_system: Optional[str] = None + "internet domain of system this entity originating from, e.g. `frab.cccv.de`" + + def __post_init__(self, guid): + if type(guid) == str or type(guid) == uuid.UUID: + self._guid = guid + + @classmethod + def from_dict(cls, data: dict): + assert isinstance(data, dict), 'Data must be a dictionary.' + + # older schedule versions used other keys for public person name + name = data.get('name') or data.get('public_name') or data.get('full_public_name') + + fieldSet = {f.name for f in fields(cls) if f.init and f.name != 'name'} + fieldSet.add('guid') + filteredData = {k: v for k, v in data.items() if k in fieldSet} + + # support shorthand of previous `origin_system` key + if 'origin' in data: + filteredData['origin_system'] = data['origin'] + + return Person(name, **filteredData) + + @property + def uri(self): + # if we got a precomputed person URID from the origin system, stay consistent + if self._uri: + return self._uri + + if self.email: + return f"acct:{self.email}" + + if (self.id or self.code) and self.origin_system: + # TODO person vs people + return f'urn:{self.origin_system}:person:{self.id or self.code}' + + if self._guid: + return f'urn:uuid:{self._guid}' + + return None + + @uri.setter + def uri(self, value): + # TODO: raise exception if value is not an URI + self._uri = value + + @property + def guid(self) -> str: + # if we got a precomputed person guid from the origin system, stay consistent + if self._guid: + return str(self._guid) + + uri = self.uri + if uri: + return uuid.uuid5(uuid.NAMESPACE_URL, uri).__str__() + + # we tried everything else, so fall back to hash of person name + return uuid.uuid5(NAMESPACE_VOCTOSCHED, self.name).__str__() + + @guid.setter + def guid(self, value: Union[str, uuid.UUID]): + if type(value) != uuid.UUID: + self._guid = uuid.UUID(value) + else: + self._guid = value + +# used resources: +# https://stackoverflow.com/a/61480946 +# https://medium.com/swlh/python-dataclasses-with-properties-and-pandas-5c59b05e9131 \ No newline at end of file diff --git a/fahrplan/model/room.py b/fahrplan/model/room.py index 9fce00c..a693bad 100644 --- a/fahrplan/model/room.py +++ b/fahrplan/model/room.py @@ -5,12 +5,37 @@ log = logging.getLogger(__name__) + class Room(XmlSerializable): - def __init__(self, name: str): + def __init__(self, name: str, guid: str = None): self.name = name + self.guid = guid self.events = dict() self.day = None + @classmethod + def from_dict(cls, data: dict, pop_used_keys=False): + """ + Loads an Room instance from the given dictionary. + An existing event can be provided which's data is overwritten (in parts). + + :param data: a dictionary containing Room attributes' names as key (and their values) + :param pop_used_keys: Remove 'used' keys from the provided data. This can be used to detect additional/errornous fields. + + """ + assert isinstance(data, dict), 'Data must be a dictionary.' + + obj = Room(name=data['name']) + direct_fields = ['guid'] # 'description', 'blocked', 'capacity'] + integer_fields = ['capacity'] + for fld in direct_fields: + if fld in data: + value = data[fld] if not pop_used_keys else data.pop(fld) + if fld in integer_fields: + value = int(value) + setattr(obj, fld, value) + return obj + def add_event(self, event: Event): event.room = self self.events[event.id] = event diff --git a/fahrplan/model/schedule.py b/fahrplan/model/schedule.py index b5d676c..3890546 100644 --- a/fahrplan/model/schedule.py +++ b/fahrplan/model/schedule.py @@ -60,6 +60,8 @@ def add_room(self, name: str, day_filter: List[int] = None): day.add_room(Room(name)) def add_event(self, day: int, room: str, event: Event): + # ensure that room exiists + self.add_room(room, day) self.days[day].add_event(room, event) def merge(self, other: 'Schedule'): @@ -82,15 +84,15 @@ def has_collision(self, new_event: 'Event'): for day in self.days.values(): for room in day.rooms.values(): for event in room.events.values(): + if event.guid == new_event.guid: + log.error(f'Duplicate guid "{event.guid}"') + return True if event.slug == new_event.slug: log.error(f'Duplicate slug "{event.slug}"') return True if event.id == new_event.id: log.error(f'Duplicate event id "{event.id}"') return True - if event.guid == new_event.guid: - log.error(f'Duplicate guid "{event.guid}"') - return True else: return False diff --git a/fahrplan/model/tests/test_person.py b/fahrplan/model/tests/test_person.py new file mode 100644 index 0000000..a7486b8 --- /dev/null +++ b/fahrplan/model/tests/test_person.py @@ -0,0 +1,69 @@ +from fahrplan.model.person import Person + + +def test_minimal_person(): + person = Person(name="Alice") + assert person.guid == "71bc5e0c-582a-5442-aa86-ff0b3e7cc2bd" + + +def test_person_guid(): + person = Person(name="Alice", guid="f4c4c716-7ee6-11ed-aa36-6c400891b752") + assert person.guid == "f4c4c716-7ee6-11ed-aa36-6c400891b752" + # we have no other information about the origin system, so the URI falls back to the UUID + assert person.uri == "urn:uuid:f4c4c716-7ee6-11ed-aa36-6c400891b752" + + +def test_person_from_dict_with_guid(): + person = Person.from_dict( + {"name": "Alice", "guid": "f4c4c716-7ee6-11ed-aa36-6c400891b752", "id": 123, "origin": "domain.tld"} + ) + assert person.guid == "f4c4c716-7ee6-11ed-aa36-6c400891b752" + assert person.uri == "urn:domain.tld:person:123" + + +def test_classic_frab_person(): + data = {"id": 7797, "public_name": "royce"} + person = Person.from_dict(data) + assert person.name == "royce" + assert person.uri is None + assert person.guid == "00d95612-6fe0-56f9-a9b5-2c9ed405abec" + + person.origin_system = "frab.cccv.de" + assert person.uri == "urn:frab.cccv.de:person:7797" + assert person.guid == "d04f5971-648b-5aa6-af6d-843b38a6bb54" + + +def test_pretalx_person_from_dict(): + data = { + "name": "Jane", + "code": "DEFAB", + "biography": "A speaker", + "avatar": "avatar.png", + } + person = Person.from_dict(data) + person.origin_system = "pretalx.com" + assert person.name == "Jane" + assert person.uri == "urn:pretalx.com:person:DEFAB" + assert person.guid == "2d818073-0a8a-5240-952a-e81e0cd19766" + + +def test_full_frab_person_from_dict(): + data = { + "id": 100, + "name": "Royce", + "full_name": "Royce Jakubowski", + "email": "royce_jakubowski@example.net", + "avatar": None, + "biography": "Royce Jakubowski is a well-respected tech conference speaker with over a decade of experience in the industry. He has a deep understanding of a wide range of technologies, from software development to data analytics, and is known for his engaging and informative presentations.", + "links": [{"url": "https://domain.tld", "title": "title"}], + "contacts": [], + "state": "unclear", + "availabilities": [ + {"start": "2021-10-20T11:00:00+02:00", "end": "2021-10-20T17:00:00+02:00"} + ], + "url": "http://localhost:3000/en/democon/people/100", + } + person = Person.from_dict(data) + assert person.name == "Royce" + assert person.uri == "acct:royce_jakubowski@example.net" + assert person.guid == "6b4d383e-70f5-5a7a-8b38-3d168d02210f" diff --git a/fahrplan/uuid.py b/fahrplan/uuid.py index 9e0da76..e50e16e 100644 --- a/fahrplan/uuid.py +++ b/fahrplan/uuid.py @@ -1,11 +1,8 @@ import uuid as _uuid -# TODO (AK) rename to UUID_NAMESPACE as this is a global constant? -uuid_namespace = _uuid.UUID('54dc9c85-9b6a-40bd-9a36-41c004a5829b') +NAMESPACE_VOCTOSCHED = _uuid.UUID('54dc9c85-9b6a-40bd-9a36-41c004a5829b') def uuid(uid, name): # TODO (AK) please document this method in detail and what the uuid namespace is for - # TODO (AK) use format string for concatenation of name and uid - # f"{name}{uid}" - return str(_uuid.uuid5(uuid_namespace, str(name) + str(uid))) + return str(_uuid.uuid5(NAMESPACE_VOCTOSCHED, f"{name}{uid}")) diff --git a/fahrplan/xml/writer.py b/fahrplan/xml/writer.py index 87ce69c..c21e3ac 100644 --- a/fahrplan/xml/writer.py +++ b/fahrplan/xml/writer.py @@ -60,7 +60,7 @@ def exit(self, tag: str): def append_dict(self, tag: str, content: Dict, prop: str): """ - Serializes a dictionary. + Serializes a dictionary. :param tag: Tag name to be used for dict items. The container tag will have a pluralized version. :param content: Dictionary to be serialized. Keys turn into diff --git a/hacks.py b/hacks.py index 2e75741..e27d40c 100644 --- a/hacks.py +++ b/hacks.py @@ -14,7 +14,7 @@ def noexcept_inner(*args, **kwargs): # noinspection PyBroadException try: return f(*args, **kwargs) - except: + except Exception: log.exception("An unexpected error occurred.") return False return noexcept_inner diff --git a/handlers/directory.py b/handlers/directory.py index 1d2c854..32af3fa 100644 --- a/handlers/directory.py +++ b/handlers/directory.py @@ -2,7 +2,7 @@ from .base import ImportHandler, ExportHandler from .import_handlers import FakeImportHandler, CSVImportHandler, JSONImportHandler, ProyektorImportHandler -from .export_handlers import BasicXMLExportHandler, ExtendedXMLExportHandler, FrabJsonExportHandler +from .export_handlers import ScheduleXMLExportHandler, ExtendedXMLExportHandler, FrabJsonExportHandler import_handlers = { "csv": CSVImportHandler, @@ -12,7 +12,8 @@ } export_handlers = { - "xml-basic": BasicXMLExportHandler, + "xml": ScheduleXMLExportHandler, + "xml-basic": ScheduleXMLExportHandler, "xml-extended": ExtendedXMLExportHandler, "json-frab": FrabJsonExportHandler, } diff --git a/handlers/export_handlers/__init__.py b/handlers/export_handlers/__init__.py index 9cb14b7..1d956d7 100644 --- a/handlers/export_handlers/__init__.py +++ b/handlers/export_handlers/__init__.py @@ -1,3 +1,3 @@ -from .basic_xml import BasicXMLExportHandler +from .frab_xml import ScheduleXMLExportHandler from .extended_xml import ExtendedXMLExportHandler from .frab_json import FrabJsonExportHandler diff --git a/handlers/export_handlers/frab_json.py b/handlers/export_handlers/frab_json.py index fb54ef2..b768dd6 100644 --- a/handlers/export_handlers/frab_json.py +++ b/handlers/export_handlers/frab_json.py @@ -25,7 +25,7 @@ def get_data(self, schedule): """ return { "version": schedule.version, - "base_url": None, # we don't have this information + "base_url": None, # we don't have this information "conference": { "acronym": schedule.conference.acronym, "title": schedule.conference.title, diff --git a/handlers/export_handlers/basic_xml.py b/handlers/export_handlers/frab_xml.py similarity index 77% rename from handlers/export_handlers/basic_xml.py rename to handlers/export_handlers/frab_xml.py index 32cf5ba..aa4000f 100644 --- a/handlers/export_handlers/basic_xml.py +++ b/handlers/export_handlers/frab_xml.py @@ -9,9 +9,9 @@ log = logging.getLogger(__name__) -class BasicXMLExportHandler(ExportHandler): +class ScheduleXMLExportHandler(ExportHandler): @noexcept(log) def run(self, schedule: Schedule) -> bool: path = self.config["path"] - content = schedule.to_xml(extended=False) + content = schedule.to_xml() return write_output(path, content) diff --git a/handlers/import_handlers/json.py b/handlers/import_handlers/json.py index 6e0dea2..d5b2da7 100644 --- a/handlers/import_handlers/json.py +++ b/handlers/import_handlers/json.py @@ -1,10 +1,8 @@ import json import logging -from binascii import crc32 - from ..base import ImportHandler -from fahrplan.datetime import parse_date, parse_time, parse_datetime, parse_duration +from fahrplan.datetime import parse_date, parse_duration from fahrplan.model.conference import Conference from fahrplan.model.day import Day from fahrplan.model.event import Event @@ -24,75 +22,34 @@ def run(self): tree = json.loads(read_input(self.config['path'])) # handy references to subtrees - conf_tree = tree['schedule']['conference'] + sched = tree['schedule'] + con = tree['schedule']['conference'] # create the conference object conference = Conference( - title=conf_tree['title'], - acronym=conf_tree['acronym'], + title=con['title'], + acronym=con['acronym'], day_count=0, # do not automatically generate days - start=parse_date(conf_tree['start']), - end=parse_date(conf_tree['end']), - time_slot_duration=parse_duration(conf_tree['timeslot_duration']) + start=parse_date(con['start']), + end=parse_date(con['end']), + time_slot_duration=parse_duration(con['timeslot_duration']) ) - schedule = Schedule(conference=conference, version=tree['schedule']['version']) - for day_tree in conf_tree['days']: - day = Day( - index=day_tree['index'], - date=parse_date(day_tree['date']), - start=parse_datetime(day_tree['start']), - end=parse_datetime(day_tree['end']) - ) + schedule = Schedule(conference=conference, version=sched['version']) + + for room in con['rooms']: + conference.add_room(Room.from_dict(room)) + + for day_tree in con['days']: + day = Day.from_dict(day_tree) schedule.add_day(day) for room_name, room_talks in day_tree['rooms'].items(): day.add_room(Room(room_name)) for talk in room_talks: - persons = {} - for person_info in talk.get('persons', []): - name = person_info['full_public_name'] - # generate some hopefully unique ids if they are 0 - uid = person_info['id'] or (crc32(name.encode()) & 0xffffffff) - persons[uid] = name - - links = {} - for link_info in talk.get('links', []): - title = link_info['title'] - # generate some hopefully unique ids if they are 0 - url = link_info['url'] - links[url] = title - - attachments = {} - for attachment_info in talk.get('attachments', []): - title = attachment_info['title'] - # generate some hopefully unique ids if they are 0 - url = attachment_info['url'] - attachments[url] = title - - day.add_event(room_name, Event( - uid=talk['id'], - date=parse_datetime(talk['date']), - start=parse_time(talk['start']), - duration=parse_duration(talk['duration']), - slug=talk['slug'], - title=talk['title'], - description=talk.get('description', ''), - abstract=talk.get('abstract', ''), - language=talk['language'], - persons=persons, - download_url=talk.get('download_url', ''), - recording_license=talk.get('recording_license', ''), - recording_optout=talk['do_not_record'], - subtitle=talk.get('subtitle', ''), - track=talk.get('track', ''), - event_type=talk.get('type', ''), - logo=talk.get('logo', ''), - links=links, - attachments=attachments - )) + day.add_event(room_name, Event.from_dict(talk)) - assert conference.day_count == conf_tree['daysCount'] + assert conference.day_count == con['daysCount'] return schedule diff --git a/handlers/import_handlers/proyektor.py b/handlers/import_handlers/proyektor.py index 692b3d9..47508ee 100644 --- a/handlers/import_handlers/proyektor.py +++ b/handlers/import_handlers/proyektor.py @@ -1,6 +1,5 @@ import json import logging -import datetime from ..base import ImportHandler from fahrplan.datetime import parse_date, parse_datetime, parse_duration @@ -73,7 +72,7 @@ def run(self): else: rec_optout = True - if b.get('artist_name'): + if b.get('artist_name'): title = b.get('artist_name') else: title = b['program_name'] @@ -81,9 +80,9 @@ def run(self): if not title: continue - if b.get('program_name'): + if b.get('program_name'): persons_names = [x.strip() for x in b['program_name'].split(',')] - persons = dict(zip(range(len(persons_names)),persons_names)) + persons = dict(zip(range(len(persons_names)), persons_names)) else: persons = {} @@ -99,11 +98,9 @@ def run(self): persons=persons, recording_license=rec_license, recording_optout=rec_optout, - event_type=b['genre'], - download_url='https://content.kulturkosmos.de/' + event_type=b['genre'] ) - schedule.add_room(show['stage']) schedule.add_event(day, show['stage'], event) return schedule diff --git a/helper_scripts/add_download_url_to_schedule.py b/helper_scripts/add_download_url_to_schedule.py index 2cf30dd..f48b25b 100644 --- a/helper_scripts/add_download_url_to_schedule.py +++ b/helper_scripts/add_download_url_to_schedule.py @@ -5,56 +5,57 @@ import xml.etree.ElementTree as ET import pprint -schedule = 'froscon2008.xml' -file_list = 'froscon2008.txt' -output = 'froscon2008_dl.xml' -delimitter = '_-_' +schedule = "froscon2008.xml" +file_list = "froscon2008.txt" +output = "froscon2008_dl.xml" +delimitter = "_-_" id_posittion = -1 -video_base_url = 'https://cdn.media.ccc.de/events/froscon/2008/' +video_base_url = "https://cdn.media.ccc.de/events/froscon/2008/" files = {} files_count = 0 files_format_error = 0 -print('reading file list') -with open(file_list,'r') as list: +print("reading file list") +with open(file_list, "r") as list: for line in list: try: files_count += 1 - files[line.split(delimitter)[id_posittion].split('.')[0].strip()] = video_base_url + line.rstrip() - print(str(files_count) + ': ' + line.rstrip()) + files[line.split(delimitter)[id_posittion].split(".")[0].strip()] = ( + video_base_url + line.rstrip() + ) + print(str(files_count) + ": " + line.rstrip()) except: - print('Warning: line ' + line + ' is not in expected format, skipping') + print("Warning: line " + line + " is not in expected format, skipping") files_format_error += 1 printer = pprint.PrettyPrinter() printer.pprint(files) print(len(files)) -print('reading schedule') +print("reading schedule") frab_data = None events_count = 0 matches_count = 0 -with open(schedule, 'r') as schedule: +with open(schedule, "r") as schedule: tree = ET.parse(schedule) root = tree.getroot() to_be_deleted = {} - for room in root.iter('room'): - for event in room.iter('event'): - print(event.attrib['id']) + for room in root.iter("room"): + for event in room.iter("event"): + print(event.attrib["id"]) events_count += 1 - if event.attrib['id'] in files.keys(): - event.append(ET.Element('video_download_url')) - event.find('video_download_url').text = files[event.attrib['id']] + if event.attrib["id"] in files.keys(): + event.append(ET.Element("video_download_url")) + event.find("video_download_url").text = files[event.attrib["id"]] matches_count += 1 - del files[event.attrib['id']] + del files[event.attrib["id"]] else: to_be_deleted[event] = room - for event,room in to_be_deleted.items(): + for event, room in to_be_deleted.items(): room.remove(event) tree.write(output) -print('schedule written to ' + output) -print('Found ' + str(events_count) + ' events and ' + str(files_count) + ' video files. There where ' + - str(matches_count) + ' matches. ' + str(files_format_error) + ' files had a format errors in its ' - 'filnename and have been skipped') -print('not matching files' + str(files)) \ No newline at end of file + +print(f"schedule written to {output}") +print(f"Found {events_count} events and {files_count} video files. There where {matches_count} matches. {files_format_error} files had a format errors in its filnename and have been skipped") +print(f"not matching files {files}") diff --git a/helper_scripts/fix_frab_date_tag.py b/helper_scripts/fix_frab_date_tag.py index ecaddd8..ed110d0 100644 --- a/helper_scripts/fix_frab_date_tag.py +++ b/helper_scripts/fix_frab_date_tag.py @@ -4,12 +4,12 @@ from time import strftime, localtime schedule_url = "ADD URL HERE" -print( "## getting meta data from " + schedule_url + " ##") +print("## getting meta data from " + schedule_url + " ##") global frab_data try: frab_data = urllib.request.urlopen(schedule_url) except: - print( "Could not load schedule xml. Please check url") + print("Could not load schedule xml. Please check url") sys.exit(1) tree = ET.parse(frab_data) @@ -18,16 +18,16 @@ download_time = localtime() formatted_download_time = strftime("%Y-%m-%d %H:%M", download_time) -root.append(ET.Element('version')) -root.find('version').text = formatted_download_time +root.append(ET.Element("version")) +root.find("version").text = formatted_download_time -for day in root.iter('day'): - date = day.attrib['date'] - day.set('end', date + "T05:00:00+01:00") - day.set('start', date + "T10:00:00+01:00") - for event in day.iter('event'): +for day in root.iter("day"): + date = day.attrib["date"] + day.set("end", date + "T05:00:00+01:00") + day.set("start", date + "T10:00:00+01:00") + for event in day.iter("event"): # Append ISO 8601 date; example: 2016-02-29T23:42:00+01:00 - event.append(ET.Element('date')) - event.find('date').text = date + "T" + event.find('start').text + ":00+01:00" + event.append(ET.Element("date")) + event.find("date").text = date + "T" + event.find("start").text + ":00+01:00" tree.write("test.xml") diff --git a/helper_scripts/fix_missing_uuid_and_slug.py b/helper_scripts/fix_missing_uuid_and_slug.py index 8aafa13..34bfb31 100644 --- a/helper_scripts/fix_missing_uuid_and_slug.py +++ b/helper_scripts/fix_missing_uuid_and_slug.py @@ -1,27 +1,31 @@ #!/usr/bin/python3 """ -early versions of frab and pentabrf schedules are missing a slug and uuid which we need for the tracker +early versions of frab and pentabarf schedules are missing a slug and guid which we need for the tracker """ import xml.etree.ElementTree as ET import uuid -schedule = 'froscon2008_dl.xml' -output = 'froscon2008_foo.xml' +schedule = "froscon2008_dl.xml" +output = "froscon2008_foo.xml" -uuid_namespace = uuid.UUID('54dc9c85-9b6a-40bd-9a36-41c004a5829b') +uuid_namespace = uuid.UUID("54dc9c85-9b6a-40bd-9a36-41c004a5829b") -with open(schedule, 'r') as schedule: +with open(schedule, "r") as schedule: tree = ET.parse(schedule) root = tree.getroot() - for event in root.iter('event'): - print(event.attrib['id']) - event.append(ET.Element('slug')) - print(event.find('room').text) - slug_elements = [event.find('room').text, event.find('title').text, event.attrib['id']] + for event in root.iter("event"): + print(event.attrib["id"]) + event.append(ET.Element("slug")) + print(event.find("room").text) + slug_elements = [ + event.find("room").text, + event.find("title").text, + event.attrib["id"], + ] print(slug_elements) - slug = '_-_'.join(slug_elements).replace(' ', '_') - event.find('slug').text = slug - event.attrib['guid'] = str(uuid.uuid5(uuid_namespace, slug)) + slug = "_-_".join(slug_elements).replace(" ", "_") + event.find("slug").text = slug + event.attrib["guid"] = str(uuid.uuid5(uuid_namespace, slug)) tree.write(output) -print('schedule written to ' + output) +print("schedule written to " + output) diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..f94dc9a --- /dev/null +++ b/poetry.lock @@ -0,0 +1,191 @@ +[[package]] +name = "attrs" +version = "22.1.0" +description = "Classes Without Boilerplate" +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.extras] +dev = ["cloudpickle", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy (>=0.900,!=0.940)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "sphinx", "sphinx-notfound-page", "zope.interface"] +docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"] +tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "zope.interface"] +tests_no_zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" + +[[package]] +name = "exceptiongroup" +version = "1.0.4" +description = "Backport of PEP 654 (exception groups)" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "flake8" +version = "6.0.0" +description = "the modular source code checker: pep8 pyflakes and co" +category = "dev" +optional = false +python-versions = ">=3.8.1" + +[package.dependencies] +mccabe = ">=0.7.0,<0.8.0" +pycodestyle = ">=2.10.0,<2.11.0" +pyflakes = ">=3.0.0,<3.1.0" + +[[package]] +name = "iniconfig" +version = "1.1.1" +description = "iniconfig: brain-dead simple config-ini parsing" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "mccabe" +version = "0.7.0" +description = "McCabe checker, plugin for flake8" +category = "dev" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "packaging" +version = "22.0" +description = "Core utilities for Python packages" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "pluggy" +version = "1.0.0" +description = "plugin and hook calling mechanisms for python" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pycodestyle" +version = "2.10.0" +description = "Python style guide checker" +category = "dev" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "pyflakes" +version = "3.0.1" +description = "passive checker of Python programs" +category = "dev" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "pytest" +version = "7.2.0" +description = "pytest: simple powerful testing with Python" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +attrs = ">=19.2.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "translitcodec" +version = "0.7.0" +description = "Unicode to 8-bit charset transliteration codec" +category = "main" +optional = false +python-versions = ">=3" + +[metadata] +lock-version = "1.1" +python-versions = "^3.8.1" +content-hash = "16cad2aec6c68828bb18c0a8dba2c99da46b5727b1fe92f1f4a78be7db9a319c" + +[metadata.files] +attrs = [ + {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, + {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, +] +colorama = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] +exceptiongroup = [ + {file = "exceptiongroup-1.0.4-py3-none-any.whl", hash = "sha256:542adf9dea4055530d6e1279602fa5cb11dab2395fa650b8674eaec35fc4a828"}, + {file = "exceptiongroup-1.0.4.tar.gz", hash = "sha256:bd14967b79cd9bdb54d97323216f8fdf533e278df937aa2a90089e7d6e06e5ec"}, +] +flake8 = [ + {file = "flake8-6.0.0-py2.py3-none-any.whl", hash = "sha256:3833794e27ff64ea4e9cf5d410082a8b97ff1a06c16aa3d2027339cd0f1195c7"}, + {file = "flake8-6.0.0.tar.gz", hash = "sha256:c61007e76655af75e6785a931f452915b371dc48f56efd765247c8fe68f2b181"}, +] +iniconfig = [ + {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, + {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, +] +mccabe = [ + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, +] +packaging = [ + {file = "packaging-22.0-py3-none-any.whl", hash = "sha256:957e2148ba0e1a3b282772e791ef1d8083648bc131c8ab0c1feba110ce1146c3"}, + {file = "packaging-22.0.tar.gz", hash = "sha256:2198ec20bd4c017b8f9717e00f0c8714076fc2fd93816750ab48e2c41de2cfd3"}, +] +pluggy = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] +pycodestyle = [ + {file = "pycodestyle-2.10.0-py2.py3-none-any.whl", hash = "sha256:8a4eaf0d0495c7395bdab3589ac2db602797d76207242c17d470186815706610"}, + {file = "pycodestyle-2.10.0.tar.gz", hash = "sha256:347187bdb476329d98f695c213d7295a846d1152ff4fe9bacb8a9590b8ee7053"}, +] +pyflakes = [ + {file = "pyflakes-3.0.1-py2.py3-none-any.whl", hash = "sha256:ec55bf7fe21fff7f1ad2f7da62363d749e2a470500eab1b555334b67aa1ef8cf"}, + {file = "pyflakes-3.0.1.tar.gz", hash = "sha256:ec8b276a6b60bd80defed25add7e439881c19e64850afd9b346283d4165fd0fd"}, +] +pytest = [ + {file = "pytest-7.2.0-py3-none-any.whl", hash = "sha256:892f933d339f068883b6fd5a459f03d85bfcb355e4981e146d2c7616c21fef71"}, + {file = "pytest-7.2.0.tar.gz", hash = "sha256:c4014eb40e10f11f355ad4e3c2fb2c6c6d1919c73f3b5a433de4708202cade59"}, +] +tomli = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] +translitcodec = [ + {file = "translitcodec-0.7.0.tar.gz", hash = "sha256:3be7975c630ec0f1dd5b3712160c991a9776132985aed2588cba083ba00fa3c8"}, +] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..334e2c7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,21 @@ +[tool.poetry] +name = "voctosched" +version = "0.9.0" +description = "" +authors = ["c3voc <voc@c3voc.de>"] +license = "GPL-3.0" + +[tool.poetry.dependencies] +python = "^3.8.1" +translitcodec = "^0.7.0" + +[tool.poetry.dev-dependencies] +flake8 = "^6.0.0" +pytest = "^7.2.0" + +[tool.poetry.scripts] +voctosched = "schedule:main" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/schedule.py b/schedule.py index df4bdda..9327a7d 100755 --- a/schedule.py +++ b/schedule.py @@ -13,8 +13,8 @@ from handlers.directory import resolve_import_handler, resolve_export_handler -if sys.version_info < (3, 6): - print("At least python version 3.6 is required to run.") +if sys.version_info < (3, 8): + print("At least python version 3.8 is required to run.") sys.exit(1) diff --git a/util.py b/util.py index bb04675..6b507ac 100644 --- a/util.py +++ b/util.py @@ -15,7 +15,7 @@ def looks_like_url(path: str): def write_output(path: str, content: str) -> bool: if path == 'STDOUT': - log.info(f'Writing to stdout.') + log.info('Writing to stdout.') sys.stdout.write(content) sys.stdout.flush() return True