From 10afd3e7367aaf7c7efa717de139e42398ad45f9 Mon Sep 17 00:00:00 2001 From: Titusz Pan Date: Tue, 18 Feb 2020 11:21:38 +0100 Subject: [PATCH 1/5] Update dependencies. --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index f419382..3a93ba9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -396,11 +396,11 @@ marker = "python_version < \"3.8\"" name = "zipp" optional = false python-versions = ">=3.6" -version = "2.1.0" +version = "3.0.0" [package.extras] docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"] -testing = ["jaraco.itertools"] +testing = ["jaraco.itertools", "func-timeout"] [metadata] content-hash = "90ea6e187f8d88079f50424575275f6ba0979305d7d4afc73518da3e9c09a05b" @@ -685,6 +685,6 @@ xxhash = [ {file = "xxhash-1.4.3.tar.gz", hash = "sha256:8b6b1afe7731d7d9cbb0398b4a811ebb5e6be5c174f72c68abf81f919a435de9"}, ] zipp = [ - {file = "zipp-2.1.0-py3-none-any.whl", hash = "sha256:ccc94ed0909b58ffe34430ea5451f07bc0c76467d7081619a454bf5c98b89e28"}, - {file = "zipp-2.1.0.tar.gz", hash = "sha256:feae2f18633c32fc71f2de629bfb3bd3c9325cd4419642b1f1da42ee488d9b98"}, + {file = "zipp-3.0.0-py3-none-any.whl", hash = "sha256:12248a63bbdf7548f89cb4c7cda4681e537031eda29c02ea29674bc6854460c2"}, + {file = "zipp-3.0.0.tar.gz", hash = "sha256:7c0f8e91abc0dc07a5068f315c52cb30c66bfbc581e5b50704c8a2f6ebae794a"}, ] From c0d24deb31d3c27ea36a886b5d5f56cb4943d042 Mon Sep 17 00:00:00 2001 From: Titusz Pan Date: Tue, 18 Feb 2020 11:29:04 +0100 Subject: [PATCH 2/5] Add failing test for xhtml document. --- tests/test_formats.py | 13 +++++++++++++ tests/text/demo.xthml | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 tests/test_formats.py create mode 100644 tests/text/demo.xthml diff --git a/tests/test_formats.py b/tests/test_formats.py new file mode 100644 index 0000000..432a358 --- /dev/null +++ b/tests/test_formats.py @@ -0,0 +1,13 @@ +import os +from tests import ROOT_DIR +from iscc_cli.cli import cli +from click.testing import CliRunner + + +os.chdir(ROOT_DIR) +r = CliRunner() + + +def test_xhtml(): + result = r.invoke(cli, ["gen", "tests/text/demo.xhtml"]) + assert result.exit_code == 0 diff --git a/tests/text/demo.xthml b/tests/text/demo.xthml new file mode 100644 index 0000000..4f4d263 --- /dev/null +++ b/tests/text/demo.xthml @@ -0,0 +1,34 @@ + + + + + + + demo + + + + +

ISCC Test Document

+

Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.

+

Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.

+

Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi.

+

Nam liber tempor cum soluta nobis eleifend option congue nihil imperdiet doming id quod mazim placerat facer possim assum. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat.

+

Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis.

+

At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, At accusam aliquyam diam diam dolore dolores duo eirmod eos erat, et nonumy sed tempor et et invidunt justo labore Stet clita ea et gubergren, kasd magna no rebum. sanctus sea sed takimata ut vero voluptua. est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat.

+

Consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus.

+

Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.

+

Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.

+

Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi.

+

Nam liber tempor cum soluta nobis eleifend option congue nihil imperdiet doming id quod mazim placerat facer possim assum. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat.

+ + From 4d0aa5b29c28b62f79156f945d2c4c5fd13333e8 Mon Sep 17 00:00:00 2001 From: Titusz Pan Date: Tue, 18 Feb 2020 11:46:38 +0100 Subject: [PATCH 3/5] Fix UnboundLocalError, add XHTML support. --- iscc_cli/batch.py | 3 +++ iscc_cli/const.py | 1 + iscc_cli/gen.py | 3 +++ tests/test_formats.py | 1 + tests/text/{demo.xthml => demo.xhtml} | 0 5 files changed, 8 insertions(+) rename tests/text/{demo.xthml => demo.xhtml} (100%) diff --git a/iscc_cli/batch.py b/iscc_cli/batch.py index 8176798..009c0e4 100644 --- a/iscc_cli/batch.py +++ b/iscc_cli/batch.py @@ -62,6 +62,9 @@ def batch(path, recursive, guess): elif gmt == GMT.VIDEO: features = video_id.get_frame_vectors(abspath(f)) cid = video_id.content_id_video(features) + else: + click.echo("Could not generate ISCC") + continue did = iscc.data_id(f) iid, tophash = iscc.instance_id(f) diff --git a/iscc_cli/const.py b/iscc_cli/const.py index 2dc1ba9..560f238 100644 --- a/iscc_cli/const.py +++ b/iscc_cli/const.py @@ -18,6 +18,7 @@ class GMT: "application/pdf": {"gmt": GMT.TEXT, "ext": "pdf"}, "application/epub+zip": {"gmt": GMT.TEXT, "ext": "epub"}, "application/xml": {"gmt": GMT.TEXT, "ext": "xml"}, + "application/xhtml+xml": {"gmt": GMT.TEXT, "ext": "xhtml"}, "application/vnd.oasis.opendocument.text": {"gmt": GMT.TEXT, "ext": "odt"}, "text/html": {"gmt": GMT.TEXT, "ext": "html"}, "text/plain": {"gmt": GMT.TEXT, "ext": "txt"}, diff --git a/iscc_cli/gen.py b/iscc_cli/gen.py index 4b2ed1f..2f40f24 100644 --- a/iscc_cli/gen.py +++ b/iscc_cli/gen.py @@ -56,6 +56,9 @@ def gen(file, guess, title, extra, verbose): elif gmt == GMT.VIDEO: features = video_id.get_frame_vectors(abspath(file.name)) cid = video_id.content_id_video(features) + else: + click.echo("Could not generate ISCC") + return did = iscc.data_id(file.name) iid, tophash = iscc.instance_id(file.name) diff --git a/tests/test_formats.py b/tests/test_formats.py index 432a358..8bc5660 100644 --- a/tests/test_formats.py +++ b/tests/test_formats.py @@ -11,3 +11,4 @@ def test_xhtml(): result = r.invoke(cli, ["gen", "tests/text/demo.xhtml"]) assert result.exit_code == 0 + assert "CTMjk4o5H96BV" in result.output diff --git a/tests/text/demo.xthml b/tests/text/demo.xhtml similarity index 100% rename from tests/text/demo.xthml rename to tests/text/demo.xhtml From b788dff9f596a69b68ff9fa365707ea5dfd05594 Mon Sep 17 00:00:00 2001 From: Titusz Pan Date: Tue, 18 Feb 2020 11:54:51 +0100 Subject: [PATCH 4/5] Add test for unsupported formats. --- tests/test_formats.py | 6 ++++++ tests/text/demo.json | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 tests/text/demo.json diff --git a/tests/test_formats.py b/tests/test_formats.py index 8bc5660..e20a548 100644 --- a/tests/test_formats.py +++ b/tests/test_formats.py @@ -8,6 +8,12 @@ r = CliRunner() +def test_unsupported(): + result = r.invoke(cli, ["gen", "tests/text/demo.json"]) + assert result.exit_code == 0 + assert "Unsupported media type" in result.output + + def test_xhtml(): result = r.invoke(cli, ["gen", "tests/text/demo.xhtml"]) assert result.exit_code == 0 diff --git a/tests/text/demo.json b/tests/text/demo.json new file mode 100644 index 0000000..5a4b624 --- /dev/null +++ b/tests/text/demo.json @@ -0,0 +1,18 @@ +{ + "title": "The Neverending Story", + "meta": + [{ + "schema": "schema.org", + "mediatype": "application/ld+json", + "data": + { + "@context": "http://schema.org", + "@type": "Movie", + "name": "The Neverending Story", + "dateCreated": "6 April, 1984", + "director": "Wolfgang Petersen", + "actors": ["Noah Hathaway", "Barret Oliver", "Tami Stronach"], + "duration": "1:42:00" + } + }] +} From 0b12d68c80cdb83eb43a470d1aa3aeb5b4024842 Mon Sep 17 00:00:00 2001 From: Titusz Pan Date: Tue, 18 Feb 2020 11:58:19 +0100 Subject: [PATCH 5/5] Update changelog and bump version. --- README.md | 4 ++++ iscc_cli/__init__.py | 2 +- pyproject.toml | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b732c73..857f6d9 100644 --- a/README.md +++ b/README.md @@ -199,6 +199,10 @@ You may also want join our developer chat on Telegram at ## Change Log +### [0.9.3] - 2020-02-18 +- Add support for XHTML +- Fix error on unsupported media types + ### [0.9.2] - 2020-01-30 - Add support for bmp, psd, xls, xlsx - Add tika server live testing diff --git a/iscc_cli/__init__.py b/iscc_cli/__init__.py index b759411..76bee58 100644 --- a/iscc_cli/__init__.py +++ b/iscc_cli/__init__.py @@ -3,7 +3,7 @@ import click from tika import tika -__version__ = "0.9.2" +__version__ = "0.9.3" APP_NAME = "iscc-cli" APP_DIR = click.get_app_dir(APP_NAME, roaming=False) diff --git a/pyproject.toml b/pyproject.toml index b0ce5a1..6d247e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "iscc-cli" -version = "0.9.2" +version = "0.9.3" description = "ISCC CLI - Creates ISCC Codes from Media Files" authors = ["Titusz Pan "] license = "MIT"