diff --git a/README.md b/README.md index 0b928b6..4c5e079 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # mwpdfify -Batch download pages from MediaWiki sites (All pages or pages of a category) as printable PDFs. +Batch download multiple pages from MediaWiki sites (All pages or pages of a category) to printable PDFs. ## Install / Run `pip install mwpdfify` @@ -8,7 +8,7 @@ Batch download pages from MediaWiki sites (All pages or pages of a category) as ...or directly download and run `src/mwpdfify.py` -There are two PDF rendering backends to choose from: `pdfkit` (default) or `weasyprint`. Use `pip install -r requirements.txt` to install both or choose one yourself. If using the former remember to also install `wkhtmltopdf` on your system. +There are two PDF rendering backends to choose from: `pdfkit` (installed as a dependency by default) or `weasyprint`. Use `pip install -r requirements.txt` to install both or choose one yourself. **If using the former remember to also install `wkhtmltopdf` on your system.** ## Usage 1. Get the address of the **root** of your wiki, where its `api.php` and `index.php` resides. Typically it's identical to the site's root (`/`). For Wikipedia it's at `/w/`; tell me if there are other exceptions ;) @@ -22,7 +22,7 @@ The downloaded PDFs should be avaliable in a folder marked with the site's domai See below for other parameters: ``` -usage: mwpdfify [-h] [-c CATEGORY] [-p] [-t THREADS] [-l LIMIT] [-b BACKEND] url +usage: mwpdfify [-h] [-c CATEGORY] [-p] [-t THREADS] [-l LIMIT] [-w] url positional arguments: url site root of destination site @@ -37,14 +37,15 @@ options: -l LIMIT, --limit LIMIT Limit of JSON info returned at once, defaults to maximum (0) - -b BACKEND, --backend BACKEND - PDF rendering backend to use, defaults to 'pdfkit' + -w, --use-weasyprint Use weasyprint as PDF rendering backend ``` ## Known issues - `&printable=yes` is deprecated in recent versions of MediaWiki (while no substitute API solutions are provided) so there might be layout issues when used with certain wikis; *especially* Fandom wikis as they also contain ads. - Recursively download pages from subcategories of a category is currently not supported. ## Changelog +- v1.1.2 (2022/09/30): + - Set `pdfkit` as required dependency - v1.1 (2022/09/04): - Changed address handling logic - Bug fixes @@ -52,4 +53,4 @@ options: - Initial release ## License -LGPLv3 +LGPLv3 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 6581443..2336b73 100644 --- a/setup.cfg +++ b/setup.cfg @@ -22,9 +22,11 @@ classifiers = [options] packages = src +install_requires = + pdfkit + [options.extras_require] PDF = - pdfkit weasyprint [options.entry_points] diff --git a/src/__init__.py b/src/__init__.py index 778de05..5da0b9a 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1 +1 @@ -__version__ = 1.1 \ No newline at end of file +__version__ = "1.1.2" \ No newline at end of file diff --git a/src/mwpdfify.py b/src/mwpdfify.py index 1b9ff51..57cb7ae 100644 --- a/src/mwpdfify.py +++ b/src/mwpdfify.py @@ -102,14 +102,14 @@ def output(futures, cnt): print(f"Done. {curpos-1} pages processed, {errcnt} errors") def main(): - parser = argparse.ArgumentParser(description="Batch download printable PDF from MediaWiki sites") + parser = argparse.ArgumentParser(description="Utility for batch downloading (certain) pages from MediaWiki sites as printable PDFs") parser.add_argument('url', help='site root of destination site') parser.add_argument('-c', '--category', help='Download only a specified category', type=str) # parser.add_argument('-r', '--recursive', help='Download through subcategories recursively, only to be used with -c', action='store_true') parser.add_argument('-p', '--no-printable', help='Force normal instead of printable version of pages', action='store_true') parser.add_argument('-t', '--threads', help='Number of download threads, defaults to %(default)s', type=int, default=8) parser.add_argument('-l', '--limit', help='Limit of JSON info returned at once, defaults to maximum (%(default)s)', type=int, default=0) - parser.add_argument('-b', '--backend', help='PDF rendering backend to use, defaults to \'%(default)s\'', type=str, default="pdfkit") + parser.add_argument('-w', '--use-weasyprint', help='Use weasyprint as PDF rendering backend', action='store_true') args = parser.parse_args() folder_name = site_url(args.url, True) @@ -118,8 +118,8 @@ def main(): args.recursive = None # if args.recursive and not args.category: # print("ERROR: -r/--recursive option is meant to only be used with -c/--category") -# exit(1) - if (args.backend == "pdfkit" and not pdfkit) or (args.backend == "weasyprint" and not weasyprint): +# exit(1) + if ((not args.use_weasyprint) and not pdfkit) or (args.use_weasyprint and not weasyprint): print(f"ERROR: Backend '{args.backend}' unavailable; please install it first or switch to another") exit(1) if urllib.request.urlopen(root_address).code != 200: @@ -128,6 +128,6 @@ def main(): if args.category: folder_name = folder_name + f" ({re.sub(r':', '=', args.category)})" init_dir(folder_name) - download(root_address, args.backend, args.threads, args.limit, args.category, args.no_printable, args.recursive) + download(root_address, ('weasyprint' if args.use_weasyprint else 'pdfkit'), args.threads, args.limit, args.category, args.no_printable, args.recursive) if __name__ == '__main__': main()