diff --git a/knv_cli/__init__.py b/knv_cli/__init__.py index d594479..cb6822c 100644 --- a/knv_cli/__init__.py +++ b/knv_cli/__init__.py @@ -1,8 +1,8 @@ from .algorithms.contacts import get_contacts from .algorithms.matching import Matching from .algorithms.ranking import get_ranking -from .processors.paypal import process_payments -from .processors.shopkonfigurator import process_orders, process_infos +from .processors.paypal import Paypal +from .processors.shopkonfigurator import Orders, Infos from .utils import dedupe, group_data __all__ = [ @@ -12,9 +12,9 @@ 'get_contacts', # Processors - 'process_payments', - 'process_orders', - 'process_infos', + 'Paypal', + 'Orders', + 'Infos', # Utilities 'dedupe', diff --git a/knv_cli/database.py b/knv_cli/database.py index 2f0f9cb..9b00a75 100644 --- a/knv_cli/database.py +++ b/knv_cli/database.py @@ -7,10 +7,10 @@ from shutil import move from zipfile import ZipFile -from .processors.paypal import process_payments -from .processors.shopkonfigurator import process_orders, process_infos -from .utils import load_csv, load_json, dump_json -from .utils import build_path, dedupe, group_data +from .processors.paypal import Paypal +from .processors.shopkonfigurator import Orders, Infos +from .utils import load_json, dump_json +from .utils import build_path, dedupe, group_data, invoice2number class Database: @@ -35,25 +35,15 @@ def import_payments(self) -> None: import_files = build_path(self.config.import_dir, self.config.payment_regex) # Generate payment data by .. - # (1) .. fetching their content - import_data = load_csv(import_files, 'utf-8', ',') + # (1) .. extracting information from import files + handler = Paypal() + handler.load_csv(import_files) - # (2) .. removing duplicates - # (3) .. extracting information - import_data, _ = process_payments(dedupe(import_data)) - - # Load database files - db_files = build_path(self.config.payment_dir) - payments = load_json(db_files) - - # Compare existing & imported data if database was built before .. - payments = self.merge_data(payments, import_data, 'Transaktion') - - # Sort payments by date - payments.sort(key=itemgetter('Datum')) + # (2) .. merging with existing data + handler.load_json(build_path(self.config.payment_dir)) # Split payments per-month & export them - for code, data in group_data(payments).items(): + for code, data in group_data(handler.payments()).items(): dump_json(data, join(self.config.payment_dir, code + '.json')) @@ -62,25 +52,16 @@ def import_orders(self) -> None: import_files = build_path(self.config.import_dir, self.config.order_regex) # Generate order data by .. - # (1) .. fetching their content - import_data = load_csv(import_files) - - # (2) .. removing duplicates - # (3) .. extracting information - import_data = process_orders(dedupe(import_data)) - - # Load database files - db_files = build_path(self.config.order_dir) - orders = load_json(db_files) + # (1) .. extracting information from import files + handler = Orders() + handler.load_csv(import_files) - # Compare existing & imported data if database was built before .. - orders = self.merge_data(orders, import_data, 'ID') + # (2) .. merging with existing data - # Sort orders by date - orders.sort(key=itemgetter('Datum')) + handler.load_json(build_path(self.config.order_dir)) # Split orders per-month & export them - for code, data in group_data(orders).items(): + for code, data in group_data(handler.orders()).items(): dump_json(data, join(self.config.order_dir, code + '.json')) @@ -88,26 +69,16 @@ def import_infos(self) -> None: # Select info files to be imported import_files = build_path(self.config.import_dir, self.config.info_regex) - # Generate info data by .. - # (1) .. fetching their content - import_data = load_csv(import_files) - - # (2) .. removing duplicates - # (3) .. extracting information - import_data = process_infos(dedupe(import_data)) - - # Load database files - db_files = build_path(self.config.info_dir) - infos = load_json(db_files) - - # Compare existing & imported data if database was built before .. - infos = self.merge_data(infos, import_data, 'ID') + # Generate order data by .. + # (1) .. extracting information from import files + handler = Infos() + handler.load_csv(import_files) - # Sort infos by date - infos.sort(key=itemgetter('Datum')) + # (2) .. merging with existing data + handler.load_json(build_path(self.config.info_dir)) # Split infos per-month & export them - for code, data in group_data(infos).items(): + for code, data in group_data(handler.infos()).items(): dump_json(data, join(self.config.info_dir, code + '.json')) @@ -117,14 +88,14 @@ def import_invoices(self) -> None: # Check invoices currently in database invoices = build_path(self.config.invoice_dir, '*.pdf') - invoices = [basename(invoice) for invoice in invoices] + invoices = {invoice2number(invoice): invoice for invoice in invoices} for invoice_file in invoice_files: try: with ZipFile(invoice_file) as archive: for zipped_invoice in archive.namelist(): # Import only invoices not already in database - if not zipped_invoice in invoices: + if not invoice2number(zipped_invoice) in invoices: archive.extract(zipped_invoice, self.config.invoice_dir) except: diff --git a/knv_cli/processors/base.py b/knv_cli/processors/base.py new file mode 100644 index 0000000..9e94392 --- /dev/null +++ b/knv_cli/processors/base.py @@ -0,0 +1,84 @@ +import json + +from abc import ABCMeta, abstractmethod +from hashlib import md5 + +from pandas import concat, read_csv + + +class BaseClass(metaclass=ABCMeta): + # Props + data = None + identifier = None + + # CSV options + encoding='iso-8859-1' + delimiter=';' + skiprows=None + + + def load_csv(self, csv_files) -> None: + try: + df = concat(map(lambda file: read_csv( + file, + sep=self.delimiter, + encoding=self.encoding, + low_memory=False, + skiprows=self.skiprows + ), csv_files)) + + except ValueError: + return [] + + self.load_data(self.process_data(df.to_dict('records'))) + + + def load_json(self, json_files) -> None: + data = [] + + for json_file in json_files: + try: + with open(json_file, 'r') as file: + data.extend(json.load(file)) + + except json.decoder.JSONDecodeError: + raise Exception + + except FileNotFoundError: + pass + + self.load_data(data) + + + def load_data(self, data: list) -> None: + if self.data: + # Permit only unique entries, either by .. + if self.identifier is not None: + # .. (1) using a unique identifier + codes = {item[self.identifier] for item in self.data} + + # Merge only data not already in database + for item in data: + if item[self.identifier] not in codes: + codes.add(item[self.identifier]) + self.data.append(item) + + else: + # (2) .. hashing the whole item + codes = set() + + for item in data: + hash_digest = md5(str(item).encode('utf-8')).hexdigest() + + if hash_digest not in codes: + codes.add(hash_digest) + self.data.append(item) + + # .. otherwise, start from scratch + else: + self.data = data + + + @abstractmethod + def process_data(self, data: list) -> list: + pass diff --git a/knv_cli/processors/payments.py b/knv_cli/processors/payments.py new file mode 100644 index 0000000..8af7a93 --- /dev/null +++ b/knv_cli/processors/payments.py @@ -0,0 +1,26 @@ +from abc import abstractmethod +from operator import itemgetter + +from .base import BaseClass + +class Payments(BaseClass): + # Props + _blocked_payments = [] + + + def process_data(self, data: list) -> list: + return self.process_payments(data) + + + @abstractmethod + def process_payments(self, data: list) -> tuple: + pass + + + def payments(self): + # Sort payments by date + return sorted(self.data, key=itemgetter('Datum')) + + + def blocked_payments(self): + return sorted(self.blocked_payments, key=itemgetter('Datum')) diff --git a/knv_cli/processors/paypal.py b/knv_cli/processors/paypal.py index e93af0a..e62535f 100644 --- a/knv_cli/processors/paypal.py +++ b/knv_cli/processors/paypal.py @@ -1,47 +1,56 @@ -# ~*~ coding=utf-8 ~*~ - -# PAYPAL™ -# This module contains functions for processing 'Aktivitäten' +# This module contains a class for processing & working with +# 'Aktivitäten', as exported from PayPal™ # See https://www.paypal.com/de/smarthelp/article/FAQ1007 from .helpers import convert_number, convert_date +from .payments import Payments -# Processes 'Download*.CSV' files -def process_payments(data) -> list: - codes = set() - payments = [] - blocked_payments = [] +class Paypal(Payments): + # Props + identifier = 'Transaktion' - for item in data: - # Skip withdrawals - if item['Brutto'][:1] == '-': - continue + # CSV options + encoding='utf-8' + delimiter=',' - # Assign identifier - code = item['Transaktionscode'] - payment = {} + def process_payments(self, data) -> list: + ''' + Processes 'Download*.CSV' files + ''' + codes = set() + payments = [] - payment['ID'] = 'nicht zugeordnet' - payment['Transaktion'] = code - payment['Datum'] = convert_date(item['Datum']) - payment['Vorgang'] = 'nicht zugeordnet' - payment['Name'] = item['Name'] - payment['Email'] = item['Absender E-Mail-Adresse'] - payment['Brutto'] = convert_number(item['Brutto']) - payment['Gebühr'] = convert_number(item['Gebühr']) - payment['Netto'] = convert_number(item['Netto']) - payment['Währung'] = item['Währung'] + for item in data: + # Skip withdrawals + if item['Brutto'][:1] == '-': + continue - if code not in codes: - codes.add(code) + # Assign identifier + code = item['Transaktionscode'] - # Sort out regular payments - if item['Typ'] == 'Allgemeine Zahlung': - blocked_payments.append(payment) - continue + payment = {} + + payment['ID'] = 'nicht zugeordnet' + payment['Transaktion'] = code + payment['Datum'] = convert_date(item['Datum']) + payment['Vorgang'] = 'nicht zugeordnet' + payment['Name'] = item['Name'] + payment['Email'] = item['Absender E-Mail-Adresse'] + payment['Brutto'] = convert_number(item['Brutto']) + payment['Gebühr'] = convert_number(item['Gebühr']) + payment['Netto'] = convert_number(item['Netto']) + payment['Währung'] = item['Währung'] + + if code not in codes: + codes.add(code) + + # Sort out regular payments + if item['Typ'] == 'Allgemeine Zahlung': + self._blocked_payments.append(payment) + continue - payments.append(payment) + payments.append(payment) - return (payments, blocked_payments) + return payments diff --git a/knv_cli/processors/shopkonfigurator.py b/knv_cli/processors/shopkonfigurator.py index 60c178b..80d8cde 100644 --- a/knv_cli/processors/shopkonfigurator.py +++ b/knv_cli/processors/shopkonfigurator.py @@ -1,104 +1,131 @@ -# ~*~ coding=utf-8 ~*~ - # SHOPKONFIGURATOR -# This module contains functions for processing 'Auftragsdaten' +# This module contains classes for processing & working with +# 'Auftragsdaten' & 'Ausführungen, as exported from Shopkonfigurator # See http://www.knv-info.de/wp-content/uploads/2020/04/Auftragsdatenexport2.pdf -from .helpers import convert_number, convert_date +from abc import abstractmethod +from operator import itemgetter + +from .base import BaseClass +from .helpers import convert_number + + +class Orders(BaseClass): + # Props + identifier = 'ID' + + + def process_data(self, order_data: list) -> list: + ''' + Processes 'Orders_*.csv' files + ''' + orders = {} + + for item in order_data: + # Create reliable article number .. + clean_isbn = item['isbn'] + + # .. since ISBNs are not always ISBNs + if str(clean_isbn) == 'nan' or str(clean_isbn)[:3] != '978': + clean_isbn = item['knvnumber'] + + # .. and - more often than not - formatted as floats with a trailing zero + clean_isbn = str(clean_isbn).replace('.0', '') + + # Populate set with identifiers + codes = {order for order in orders.keys()} -# Processes 'Orders_*.csv' files -def process_orders(order_data) -> list: - orders = {} + # Assign identifier + code = item['ormorderid'] - for item in order_data: - # Create reliable article number .. - clean_isbn = item['isbn'] + if code not in codes: + order = {} - # .. since ISBNs are not always ISBNs - if str(clean_isbn) == 'nan' or str(clean_isbn)[:3] != '978': - clean_isbn = item['knvnumber'] + order['ID'] = code + order['Datum'] = item['timeplaced'][:10] + order['Anrede'] = item['rechnungaddresstitle'] + order['Vorname'] = item['rechnungaddressfirstname'] + order['Nachname'] = item['rechnungaddresslastname'] + order['Name'] = ' '.join([item['rechnungaddressfirstname'], item['rechnungaddresslastname']]) + order['Email'] = item['rechnungaddressemail'] + order['Bestellung'] = {'Summe': convert_number(item['totalproductcost'])} + order['Versand'] = convert_number(item['totalshipping']) + order['Betrag'] = convert_number(item['totalordercost']) + order['Währung'] = item['currency'] + order['Abwicklung'] = {'Zahlungsart': 'keine Angabe', 'Transaktionscode': 'keine Angabe'} - # .. and - more often than not - formatted as floats with a trailing zero - clean_isbn = str(clean_isbn).replace('.0', '') + orders[code] = order + codes.add(code) - # Populate set with identifiers - codes = {order for order in orders.keys()} + # Add information about each purchased article + orders[code]['Bestellung'][clean_isbn] = { + 'Anzahl': int(item['quantity']), + 'Preis': convert_number(item['orderitemunitprice']), + 'Steuersatz': convert_number(item['vatpercent']), + 'Steueranteil': convert_number(item['vatprice']), + } - # Assign identifier - code = item['ormorderid'] + # Add information about .. + # (1) .. method of payment + if str(item['paymenttype']) != 'nan': + orders[code]['Abwicklung']['Zahlungsart'] = item['paymenttype'] - if code not in codes: - order = {} + # (2) .. transaction number (Paypal™ only) + if str(item['transactionid']) != 'nan': + orders[code]['Abwicklung']['Transaktionscode'] = str(item['transactionid']) - order['ID'] = code - order['Datum'] = item['timeplaced'][:10] - order['Anrede'] = item['rechnungaddresstitle'] - order['Vorname'] = item['rechnungaddressfirstname'] - order['Nachname'] = item['rechnungaddresslastname'] - order['Name'] = ' '.join([item['rechnungaddressfirstname'], item['rechnungaddresslastname']]) - order['Email'] = item['rechnungaddressemail'] - order['Bestellung'] = {'Summe': item['totalproductcost']} - order['Versand'] = convert_number(item['totalshipping']) - order['Betrag'] = convert_number(item['totalordercost']) - order['Währung'] = item['currency'] - order['Abwicklung'] = {'Zahlungsart': 'keine Angabe', 'Transaktionscode': 'keine Angabe'} + return list(orders.values()) - orders[code] = order - codes.add(code) - # Add information about each purchased article - orders[code]['Bestellung'][clean_isbn] = { - 'Anzahl': int(item['quantity']), - 'Preis': convert_number(item['orderitemunitprice']), - 'Steuersatz': convert_number(item['vatpercent']), - 'Steueranteil': convert_number(item['vatprice']), - } + def orders(self): + # Sort orders by date + return sorted(self.data, key=itemgetter('Datum')) - # Add information about .. - # (1) .. method of payment - if str(item['paymenttype']) != 'nan': - orders[code]['Abwicklung']['Zahlungsart'] = item['paymenttype'] - # (2) .. transaction number (Paypal™ only) - if str(item['transactionid']) != 'nan': - orders[code]['Abwicklung']['Transaktionscode'] = str(item['transactionid']) +class Infos(BaseClass): + # Props + identifier = 'ID' - return list(orders.values()) + def process_data(self, info_data: list) -> list: + ''' + Processes 'OrdersInfo_*.csv' files + ''' + infos = {} + for item in info_data: + # Create reliable invoice number .. + clean_number = None -# Processes 'OrdersInfo_*.csv' files -def process_infos(info_data) -> list: - infos = {} + if str(item['Invoice Number']) != 'nan': + clean_number = str(item['Invoice Number']).replace('.0', '') - for item in info_data: - # Create reliable invoice number .. - clean_number = None + # Populate set with identifiers + codes = {info for info in infos.keys()} - if str(item['Invoice Number']) != 'nan': - clean_number = str(item['Invoice Number']).replace('.0', '') + # Assign identifier + code = item['OrmNumber'] - # Populate set with identifiers - codes = {info for info in infos.keys()} + if code not in codes: + info = {} - # Assign identifier - code = item['OrmNumber'] + info['ID'] = code + info['Datum'] = item['Creation Date'][:10] + info['Rechnungen'] = [] - if code not in codes: - info = {} + if clean_number: + info['Rechnungen'].append(clean_number) - info['ID'] = code - info['Datum'] = item['Creation Date'][:10] - info['Rechnungen'] = [] + codes.add(code) + infos[code] = info - if clean_number: - info['Rechnungen'].append(clean_number) + else: + if clean_number and clean_number not in infos[code]['Rechnungen']: + infos[code]['Rechnungen'].append(clean_number) - codes.add(code) - infos[code] = info + return list(infos.values()) - else: - if clean_number and clean_number not in infos[code]['Rechnungen']: - infos[code]['Rechnungen'].append(clean_number) - return list(infos.values()) + def infos(self): + # Sort infos by date + return sorted(self.data, key=itemgetter('Datum')) diff --git a/knv_cli/utils.py b/knv_cli/utils.py index ce2eb44..d83ce3b 100644 --- a/knv_cli/utils.py +++ b/knv_cli/utils.py @@ -3,26 +3,17 @@ import json +from datetime import datetime from glob import glob from hashlib import md5 from os import makedirs -from os.path import exists, dirname, join +from os.path import basename, exists, dirname, join -from pandas import DataFrame, concat, read_csv +from pandas import DataFrame # CSV tasks -def load_csv(csv_files, encoding='iso-8859-1', delimiter=';', skiprows=None) -> list: - try: - df = concat(map(lambda file: read_csv(file, sep=delimiter, encoding=encoding, low_memory=False, skiprows=skiprows), csv_files)) - - except ValueError: - return [] - - return df.to_dict('records') - - def dump_csv(data, csv_file) -> None: # Create directory if necessary create_path(csv_file) @@ -57,6 +48,34 @@ def dump_json(data, json_file) -> None: json.dump(data, file, ensure_ascii=False, indent=4) +# INVOICE functions + +def invoice2date(string: str) -> str: + # Distinguish between delimiters .. + # (1) .. hyphen ('Shopkonfigurator') + delimiter = '-' + + # (2) .. underscore ('Barsortiment') + if delimiter not in string: + delimiter = '_' + + date_string = basename(string).split(delimiter)[1][:-4] + + return datetime.strptime(date_string, '%Y%m%d').strftime('%Y-%m-%d') + + +def invoice2number(string: str) -> str: + # Distinguish between delimiters .. + # (1) .. hyphen ('Shopkonfigurator') + delimiter = '-' + + # (2) .. underscore ('Barsortiment') + if delimiter not in string: + delimiter = '_' + + return basename(string).split(delimiter)[-1][:-4] + + # Helper functions def build_path(