From 14b22769b97888979c8d8d5c02a918a8bc685c15 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Fri, 15 Nov 2024 06:44:12 -0500 Subject: [PATCH] Add isle-leptonica to stack (#52) --- Dockerfile | 2 +- examples/imagemagick/README.md | 8 ++++++++ examples/mergepdf/Dockerfile | 35 ++++++++++++++++++---------------- examples/mergepdf/README.md | 7 +++++++ examples/mergepdf/cmd.sh | 4 ++-- examples/tesseract/Dockerfile | 7 +++++-- examples/tesseract/README.md | 8 ++++++++ 7 files changed, 50 insertions(+), 21 deletions(-) create mode 100644 examples/imagemagick/README.md create mode 100644 examples/tesseract/README.md diff --git a/Dockerfile b/Dockerfile index dd598d5..75d40af 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,7 +25,7 @@ RUN adduser -S -G nobody scyllaridae RUN apk update && \ apk add --no-cache \ - curl==8.10.1-r0 \ + curl==8.11.0-r2 \ bash==5.2.26-r0 \ ca-certificates==20240705-r0 \ openssl==3.3.2-r1 diff --git a/examples/imagemagick/README.md b/examples/imagemagick/README.md new file mode 100644 index 0000000..979be7a --- /dev/null +++ b/examples/imagemagick/README.md @@ -0,0 +1,8 @@ +# imagemagick + +imagemagick as a service + +## Attribution + +- [imagemagick](https://github.com/ImageMagick/ImageMagick) for image file manipulation +- [islandora/imagemagick](https://github.com/Islandora-Devops/isle-imagemagick/) for a custom Alpine APK imagemagick package. diff --git a/examples/mergepdf/Dockerfile b/examples/mergepdf/Dockerfile index edd437b..c71b84b 100644 --- a/examples/mergepdf/Dockerfile +++ b/examples/mergepdf/Dockerfile @@ -1,23 +1,26 @@ ARG TAG=main ARG DOCKER_REPOSITORY=local +FROM islandora/leptonica:alpine-3.20.2-leptonica-1.84.1-r0 AS leptonica FROM ${DOCKER_REPOSITORY}/scyllaridae-imagemagick:${TAG} AS scyllaridae -RUN apk update && \ - apk add --no-cache \ - ghostscript==10.04.0-r0 \ - jq==1.7.1-r0 \ - leptonica-dev==1.84.1-r0 \ - tesseract-ocr==5.3.4-r0 \ - tesseract-ocr-data-eng==5.3.4-r0 \ - tesseract-ocr-data-fra==5.3.4-r0 \ - tesseract-ocr-data-spa==5.3.4-r0 \ - tesseract-ocr-data-ita==5.3.4-r0 \ - tesseract-ocr-data-por==5.3.4-r0 \ - tesseract-ocr-data-hin==5.3.4-r0 \ - tesseract-ocr-data-deu==5.3.4-r0 \ - tesseract-ocr-data-jpn==5.3.4-r0 \ - tesseract-ocr-data-rus==5.3.4-r0 \ - poppler-utils==24.02.0-r1 +RUN --mount=type=bind,from=leptonica,source=/packages,target=/packages \ + --mount=type=bind,from=leptonica,source=/etc/apk/keys,target=/etc/apk/keys \ + apk update && \ + apk add --no-cache \ + /packages/leptonica-*.apk \ + ghostscript==10.04.0-r0 \ + jq==1.7.1-r0 \ + tesseract-ocr==5.3.4-r0 \ + tesseract-ocr-data-eng==5.3.4-r0 \ + tesseract-ocr-data-fra==5.3.4-r0 \ + tesseract-ocr-data-spa==5.3.4-r0 \ + tesseract-ocr-data-ita==5.3.4-r0 \ + tesseract-ocr-data-por==5.3.4-r0 \ + tesseract-ocr-data-hin==5.3.4-r0 \ + tesseract-ocr-data-deu==5.3.4-r0 \ + tesseract-ocr-data-jpn==5.3.4-r0 \ + tesseract-ocr-data-rus==5.3.4-r0 \ + poppler-utils==24.02.0-r1 COPY . /app diff --git a/examples/mergepdf/README.md b/examples/mergepdf/README.md index fad2c1f..9eee2a1 100644 --- a/examples/mergepdf/README.md +++ b/examples/mergepdf/README.md @@ -1,3 +1,10 @@ # mergepdf Merge children of a paged content item into a single PDF + +## Attribution + +- [imagemagick](https://github.com/ImageMagick/ImageMagick) for image file manipulation +- [tesseract](https://github.com/tesseract-ocr/tesseract) OCR engine +- [islandora/imagemagick](https://github.com/Islandora-Devops/isle-imagemagick/) for a custom Alpine APK imagemagick package. +- [islandora/leptonica](https://github.com/Islandora-Devops/isle-leptonica/) for a custom Alpine APK leptonica package. diff --git a/examples/mergepdf/cmd.sh b/examples/mergepdf/cmd.sh index dc84797..4ed3564 100755 --- a/examples/mergepdf/cmd.sh +++ b/examples/mergepdf/cmd.sh @@ -4,11 +4,11 @@ set -eou pipefail TMP_DIR=$(mktemp -d) I=0 -MAX_THREADS=7 +MAX_THREADS=${MAX_THREADS:-5} PIDS=() # iterate over all images in the IIIF manifest -URLS=$(curl -s "$1/book-manifest" | jq -r '.sequences[0].canvases[].images[0].resource."@id"') +URLS=$(curl -s "$1/book-manifest" | jq -r '.sequences[0].canvases[].images[0].resource."@id"' | awk -F '/' '{print $7}'|sed -e 's/%2F/\//g' -e 's/%3A/:/g') while read -r URL; do # If we have reached the max thread limit, wait for any one job to finish if [ "${#PIDS[@]}" -ge "$MAX_THREADS" ]; then diff --git a/examples/tesseract/Dockerfile b/examples/tesseract/Dockerfile index 434dde1..3811632 100644 --- a/examples/tesseract/Dockerfile +++ b/examples/tesseract/Dockerfile @@ -1,10 +1,13 @@ ARG TAG=main ARG DOCKER_REPOSITORY=local +FROM islandora/leptonica:alpine-3.20.2-leptonica-1.84.1-r0 AS leptonica FROM ${DOCKER_REPOSITORY}/scyllaridae:${TAG} -RUN apk update && \ +RUN --mount=type=bind,from=leptonica,source=/packages,target=/packages \ + --mount=type=bind,from=leptonica,source=/etc/apk/keys,target=/etc/apk/keys \ + apk update && \ apk add --no-cache \ - leptonica-dev==1.84.1-r0 \ + /packages/leptonica-*.apk \ tesseract-ocr==5.3.4-r0 \ tesseract-ocr-data-eng==5.3.4-r0 \ tesseract-ocr-data-fra==5.3.4-r0 \ diff --git a/examples/tesseract/README.md b/examples/tesseract/README.md new file mode 100644 index 0000000..4ba308c --- /dev/null +++ b/examples/tesseract/README.md @@ -0,0 +1,8 @@ +# tesseract + +Tesseract as a service + +## Attribution + +- [tesseract](https://github.com/tesseract-ocr/tesseract) OCR engine +- [islandora/leptonica](https://github.com/Islandora-Devops/isle-leptonica/) for a custom Alpine APK leptonica package.