Skip to content

Commit

Permalink
Merge pull request #261 from metanorma/image_extract
Browse files Browse the repository at this point in the history
Image extract
  • Loading branch information
Intelligent2013 authored Aug 23, 2024
2 parents 339a5a5 + b1ecd72 commit b1615ff
Show file tree
Hide file tree
Showing 9 changed files with 35,111 additions and 18,018 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ SHELL ?= /bin/bash
endif

#JAR_VERSION := $(shell mvn -q -Dexec.executable="echo" -Dexec.args='$${project.version}' --non-recursive exec:exec -DforceStdout)
JAR_VERSION := 1.96
JAR_VERSION := 1.97
JAR_FILE := mn2pdf-$(JAR_VERSION).jar

all: target/$(JAR_FILE)
Expand Down
10 changes: 5 additions & 5 deletions README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ You will need the Java Development Kit (JDK) version 8, Update 241 (8u241) or hi

[source,sh]
----
java -Xss5m -Xmx2048m -jar target/mn2pdf-1.96.jar --xml-file <XML-FileName> --xsl-file <XSLT-FileName> --pdf-file <Output-PDF-FileName> [--syntax-highlight]
java -Xss5m -Xmx2048m -jar target/mn2pdf-1.97.jar --xml-file <XML-FileName> --xsl-file <XSLT-FileName> --pdf-file <Output-PDF-FileName> [--syntax-highlight]
----

e.g.

[source,sh]
----
java -Xss5m -Xmx2048m -jar target/mn2pdf-1.96.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf
java -Xss5m -Xmx2048m -jar target/mn2pdf-1.97.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf
----

=== PDF encryption features
Expand Down Expand Up @@ -100,7 +100,7 @@ Update version in `pom.xml`, e.g.:
----
<groupId>org.metanorma.fop</groupId>
<artifactId>mn2pdf</artifactId>
<version>1.96</version>
<version>1.97</version>
<name>Metanorma XML to PDF converter</name>
----

Expand All @@ -111,8 +111,8 @@ Tag the same version in Git:

[source,xml]
----
git tag v1.96
git push origin v1.96
git tag v1.97
git push origin v1.97
----

Then the corresponding GitHub release will be automatically created at:
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.metanorma.fop</groupId>
<artifactId>mn2pdf</artifactId>
<version>1.96</version>
<version>1.97</version>
<name>Metanorma XML to PDF converter</name>
<packaging>jar</packaging>
<url>https://www.metanorma.org</url>
Expand Down
6 changes: 5 additions & 1 deletion src/main/java/org/metanorma/fop/PDFGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,10 @@ public boolean process() {
logger.info(String.format(OUTPUT_LOG, PDF_OUTPUT, fPDF));
logger.info("");

File fPresentationPartXML = getPresentationPartXML(fXML, fPDF.getParent());
PDFResult pdfResult = PDFResult.PDFResult(fPDF);

//File fPresentationPartXML = getPresentationPartXML(fXML, fPDF.getParent());
File fPresentationPartXML = getPresentationPartXML(fXML, pdfResult.getOutFolder());

sourceXMLDocument = new SourceXMLDocument(fPresentationPartXML);

Expand Down Expand Up @@ -341,6 +344,7 @@ public boolean process() {
}
xsltConverter.deleteTmpXSL();
fontcfg.deleteConfigFile();
pdfResult.flushOutTmpImagesFolder();
}

logger.info("Success!");
Expand Down
64 changes: 64 additions & 0 deletions src/main/java/org/metanorma/fop/PDFResult.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package org.metanorma.fop;

import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Comparator;
import java.util.UUID;

public class PDFResult {

private static PDFResult PDFResultSingleInstance = null;

private String outFolder;

private Path outTmpImagesPath;

private PDFResult() {
}

private PDFResult(File pdfFile) {
String parentFolder = pdfFile.getParent();
if (parentFolder == null) {
parentFolder = pdfFile.getAbsoluteFile().getParent();
} else {
parentFolder = new File(parentFolder).getAbsolutePath();
}
outTmpImagesPath = Paths.get(parentFolder, "_tmp_images_" + UUID.randomUUID().toString());
outFolder = parentFolder;
}

public static PDFResult PDFResult(File pdfFile)
{
if (PDFResultSingleInstance == null) {
PDFResultSingleInstance = new PDFResult(pdfFile);
}
return PDFResultSingleInstance;
}

public String getOutFolder() {
return outFolder;
}

public Path getOutTmpImagesPath() {
return outTmpImagesPath;
}


public void flushOutTmpImagesFolder () {
if (Files.exists(outTmpImagesPath)) {
try {
Files.walk(outTmpImagesPath)
.sorted(Comparator.reverseOrder())
.map(Path::toFile)
.forEach(File::delete);
Files.deleteIfExists(outTmpImagesPath);
} catch (Exception ex) {
ex.printStackTrace();
}
}

}

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.metanorma.fop.ifhandler;

import org.apache.commons.lang3.StringEscapeUtils;
import org.metanorma.fop.PDFResult;
import org.metanorma.fop.Util;
import org.metanorma.utils.LoggerHelper;
import org.w3c.dom.Document;
Expand All @@ -18,15 +19,20 @@
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.logging.Logger;

/*
* This class is intended for removing the semantic part from Metanorma XML
* This class is intended for:
* - removing the semantic part from Metanorma XML
* - extract embedded images in base64 to binary format into temporary folder on disk
*/

public class FOPXMLPresentationHandler extends DefaultHandler {
Expand All @@ -39,6 +45,8 @@ public class FOPXMLPresentationHandler extends DefaultHandler {

private StringBuilder sbResult = new StringBuilder();

private String currentElement;

Stack<Character> stackChar = new Stack<>();

Stack<Boolean> skipElements = new Stack<>();
Expand All @@ -51,6 +59,8 @@ public void startDocument() {
@Override
public void startElement(String uri, String lName, String qName, Attributes attr) throws SAXException {

currentElement = qName;

if (qName.startsWith("semantic__") || qName.equals("emf")) {
// skip
skipElements.push(true);
Expand Down Expand Up @@ -82,11 +92,53 @@ private String copyAttributes(Attributes attr) {
StringBuilder sbTmp = new StringBuilder();
for (int i = 0; i < attr.getLength(); i++) {
sbTmp.append(" ");
sbTmp.append(attr.getLocalName(i));
String attrName = attr.getLocalName(i);
String attrValue = attr.getValue(i);
sbTmp.append(attrName);
sbTmp.append("=\"");
String value = StringEscapeUtils.escapeXml(attr.getValue(i));

String value = StringEscapeUtils.escapeXml(attrValue);;

boolean isExtractedImage = false;

if (currentElement.equals("image") && attrName.equals("src") &&
(attrValue.startsWith("data:image/") || attrValue.startsWith("data:application/")) &&
!(attrValue.startsWith("data:image/svg+xml;"))) {
String dataPrefix = "data:image/";
if (attrValue.startsWith("data:application/")) {
dataPrefix = "data:application/";
}
// extract embedded images in base64 to binary format into temporary folder on disk
int startPos = attrValue.indexOf(";base64,") + 8;
String base64data = attrValue.substring(startPos);
byte[] decodedBytes = Base64.getDecoder().decode(base64data);

String imageFormat = attrValue.substring(attrValue.indexOf(dataPrefix) + dataPrefix.length(), attrValue.indexOf(";base64,"));
PDFResult pdfResult = PDFResult.PDFResult(null);
String imageTmpName = UUID.randomUUID().toString() + "." + imageFormat;
Path imagePath = Paths.get(pdfResult.getOutTmpImagesPath().toString(), imageTmpName);
try {
Files.createDirectories(pdfResult.getOutTmpImagesPath());
Files.write(imagePath, decodedBytes);
// relative path to PDF out file
//File imageFile = new File(imagePath.toString());
//String imageFileParentFolder = imageFile.getParentFile().getName();
//value = Paths.get(imageFileParentFolder, imageTmpName).toString();
// absolutepath
value = imagePath.toAbsolutePath().toString();
isExtractedImage = true;
} catch (IOException ex) {
logger.severe("Can't save the image on disk '" + imagePath.toString() + "':");
logger.severe(ex.getMessage());
ex.printStackTrace();
}
}
sbTmp.append(value);
sbTmp.append("\"");

if (isExtractedImage) {
sbTmp.append(" extracted=\"true\"");
}
}
return sbTmp.toString();
}
Expand Down
Loading

0 comments on commit b1615ff

Please sign in to comment.