diff --git a/Makefile b/Makefile index 9ec320d..8902093 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ SHELL ?= /bin/bash endif #JAR_VERSION := $(shell mvn -q -Dexec.executable="echo" -Dexec.args='$${project.version}' --non-recursive exec:exec -DforceStdout) -JAR_VERSION := 1.86 +JAR_VERSION := 1.87 JAR_FILE := mn2pdf-$(JAR_VERSION).jar all: target/$(JAR_FILE) diff --git a/README.adoc b/README.adoc index b7ce43c..3233998 100644 --- a/README.adoc +++ b/README.adoc @@ -17,14 +17,14 @@ You will need the Java Development Kit (JDK) version 8, Update 241 (8u241) or hi [source,sh] ---- -java -Xss5m -Xmx2048m -jar target/mn2pdf-1.86.jar --xml-file --xsl-file --pdf-file [--syntax-highlight] +java -Xss5m -Xmx2048m -jar target/mn2pdf-1.87.jar --xml-file --xsl-file --pdf-file [--syntax-highlight] ---- e.g. [source,sh] ---- -java -Xss5m -Xmx2048m -jar target/mn2pdf-1.86.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf +java -Xss5m -Xmx2048m -jar target/mn2pdf-1.87.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf ---- === PDF encryption features @@ -100,7 +100,7 @@ Update version in `pom.xml`, e.g.: ---- org.metanorma.fop mn2pdf -1.86 +1.87 Metanorma XML to PDF converter ---- @@ -111,8 +111,8 @@ Tag the same version in Git: [source,xml] ---- -git tag v1.86 -git push origin v1.86 +git tag v1.87 +git push origin v1.87 ---- Then the corresponding GitHub release will be automatically created at: diff --git a/pom.xml b/pom.xml index 851f7e6..2c84f13 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ 4.0.0 org.metanorma.fop mn2pdf - 1.86 + 1.87 Metanorma XML to PDF converter jar https://www.metanorma.org diff --git a/src/main/java/org/metanorma/fop/PDFGenerator.java b/src/main/java/org/metanorma/fop/PDFGenerator.java index afe419b..ba9e7ed 100644 --- a/src/main/java/org/metanorma/fop/PDFGenerator.java +++ b/src/main/java/org/metanorma/fop/PDFGenerator.java @@ -7,11 +7,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; +import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.*; @@ -1193,24 +1189,70 @@ private void setTablesWidths(fontConfig fontcfg, XSLTconverter xsltConverter, Fi SourceXMLDocument sourceXMLDocumentTablesOnly = new SourceXMLDocument(xmlTablesOnly); - // transform XML to XSL-FO (XML .fo file) - xsltConverter.transform(sourceXMLDocumentTablesOnly, false); + int countTableCells = sourceXMLDocumentTablesOnly.getCountTableCells(); + if (countTableCells < 30000) { + // transform XML to XSL-FO (XML .fo file) + xsltConverter.transform(sourceXMLDocumentTablesOnly, false); - String xmlFO = sourceXMLDocumentTablesOnly.getXMLFO(); + String xmlFO = sourceXMLDocumentTablesOnly.getXMLFO(); - //debug - debugSaveXML(xmlFO, pdf.getAbsolutePath() + ".fo.tables.xml"); + //debug + debugSaveXML(xmlFO, pdf.getAbsolutePath() + ".fo.tables.xml"); - fontcfg.outputFontManifestLog(Paths.get(pdf.getAbsolutePath() + ".tables.fontmanifest.log.txt")); + fontcfg.outputFontManifestLog(Paths.get(pdf.getAbsolutePath() + ".tables.fontmanifest.log.txt")); - fontcfg.setSourceDocumentFontList(sourceXMLDocumentTablesOnly.getDocumentFonts()); + fontcfg.setSourceDocumentFontList(sourceXMLDocumentTablesOnly.getDocumentFonts()); - Source sourceFO = new StreamSource(new StringReader(xmlFO)); + Source sourceFO = new StreamSource(new StringReader(xmlFO)); - logger.info("[INFO] Generation of Intermediate Format with information about the table's widths ..."); - String xmlIF = generateFOPIntermediateFormat(sourceFO, fontcfg.getConfig(), pdf, true, ".tables"); + logger.info("[INFO] Generation of Intermediate Format with information about the table's widths ..."); + String xmlIF = generateFOPIntermediateFormat(sourceFO, fontcfg.getConfig(), pdf, true, ".tables"); - xmlTableIF = createTableIF(xmlIF); + xmlTableIF = createTableIF(xmlIF); + + } else { // for large tables, or large number of tables + + List tablesIds = sourceXMLDocumentTablesOnly.readElementsIds("//*[local-name() = 'table' or local-name() = 'dl']"); + + List xmlTablesIF = new ArrayList<>(); + // process each table separatery for memory consumption optimization + int tableCounter = 0; + int tableCount = tablesIds.size(); + for (String tableId : tablesIds) { + tableCounter++; + logger.info("[INFO] Generation of XSL-FO (" + tableCounter + "/" + tableCount + ") with information about the table widths with id='" + tableId + "'..."); + + // process table with id=tableId only + xsltConverter.setParam("table_only_with_id", tableId); + + // transform XML to XSL-FO (XML .fo file) + xsltConverter.transform(sourceXMLDocumentTablesOnly, false); + + String xmlFO = sourceXMLDocumentTablesOnly.getXMLFO(); + + //debug + debugSaveXML(xmlFO, pdf.getAbsolutePath() + "." + tableId + ".fo.tables.xml"); + + fontcfg.outputFontManifestLog(Paths.get(pdf.getAbsolutePath() + "." + tableId + ".tables.fontmanifest.log.txt")); + + fontcfg.setSourceDocumentFontList(sourceXMLDocumentTablesOnly.getDocumentFonts()); + + Source sourceFO = new StreamSource(new StringReader(xmlFO)); + + logger.info("[INFO] Generation of Intermediate Format (" + tableCounter + "/" + tableCount + ") with information about the table's widths with id='" + tableId + "'..."); + String xmlIF = generateFOPIntermediateFormat(sourceFO, fontcfg.getConfig(), pdf, true, "." + tableId + ".tables"); + + xmlTableIF = createTableIF(xmlIF); + + debugSaveXML(xmlTableIF, pdf.getAbsolutePath() + "." + tableId + ".tables.xml"); + + xmlTableIF = tableWidthsCleanup(xmlTableIF); + + xmlTablesIF.add(xmlTableIF); + } + xmlTableIF = tablesWidthsUnion(xmlTablesIF); + xsltConverter.setParam("table_only_with_id", ""); // further process all tables + } debugSaveXML(xmlTableIF, pdf.getAbsolutePath() + ".tables.xml"); @@ -1261,6 +1303,7 @@ private void debugSaveXML(String xmlString, String pathTo) { } } + private int getIFPageCount(String xmlIF) { int pagecount = 0; if (xmlIF != null) { @@ -1280,4 +1323,27 @@ private void saveDebugFO(String debugXSLFO) { } } + private String tableWidthsCleanup(String table) { + int startPos = table.indexOf(""); + table = table.substring(startPos, endPos); + int startPosTbody = table.indexOf(""); + table = table.substring(0,startPosTbody) + "
"; + return table; + } + + private String tablesWidthsUnion(List tables) { + StringBuilder sbTablesIF = new StringBuilder(); + if (!tables.isEmpty()) { + sbTablesIF.append(""); + } + for (String itemTableIF: tables) { + sbTablesIF.append(itemTableIF); + } + if (!tables.isEmpty()) { + sbTablesIF.append(""); + } + return sbTablesIF.toString(); + } + } diff --git a/src/main/java/org/metanorma/fop/SourceXMLDocument.java b/src/main/java/org/metanorma/fop/SourceXMLDocument.java index 3ffff16..dbcb96d 100644 --- a/src/main/java/org/metanorma/fop/SourceXMLDocument.java +++ b/src/main/java/org/metanorma/fop/SourceXMLDocument.java @@ -75,19 +75,13 @@ public SourceXMLDocument(File fXML) { DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); InputStream xmlstream = new FileInputStream(fXML); sourceXML = dBuilder.parse(xmlstream); - - String element_review = readValue("//*[local-name() = 'review'][1]"); - this.hasAnnotations = element_review.length() != 0; - String element_table = readValue("//*[local-name() = 'table' or local-name() = 'dl'][1]"); - this.hasTables = element_table.length() != 0; - String element_math = readValue("//*[local-name() = 'math'][1]"); - this.hasMath = element_math.length() != 0; + readMetaInformation(); } catch (Exception ex) { logger.severe("Can't read source XML."); ex.printStackTrace(); } } - + public SourceXMLDocument(String strXML) { try { this.sourceXMLstr = strXML; @@ -100,7 +94,17 @@ public SourceXMLDocument(String strXML) { ex.printStackTrace(); } } - + + private void readMetaInformation() { + String element_review = readValue("//*[local-name() = 'review'][1]"); + this.hasAnnotations = element_review.length() != 0; + String element_table = readValue("//*[local-name() = 'table' or local-name() = 'dl'][1]"); + this.hasTables = element_table.length() != 0; + String element_math = readValue("//*[local-name() = 'math'][1]"); + this.hasMath = element_math.length() != 0; + } + + public StreamSource getStreamSource() { if (sourceXMLstr.isEmpty()) { try { @@ -410,6 +414,38 @@ private String readValue(String xpath) { return value; } + private int readTableCellsCount(){ + int count = 0; + try { + XPath xPath = XPathFactory.newInstance().newXPath(); + XPathExpression query = xPath.compile("//*[local-name() = 'td' or local-name() = 'th' or local-name() = 'dt' or local-name() = 'dd']"); + NodeList nodes = (NodeList)query.evaluate(sourceXML, XPathConstants.NODESET); + count = nodes.getLength(); + } catch (Exception ex) { + logger.severe(ex.toString()); + } + return count; + } + + public List readElementsIds(String xpath) { + List values = new ArrayList<>(); + try { + XPath xPath = XPathFactory.newInstance().newXPath(); + XPathExpression query = xPath.compile(xpath); + NodeList nodes = (NodeList)query.evaluate(sourceXML, XPathConstants.NODESET); + for (int i = 0; i < nodes.getLength(); i++) { + Node node_id = nodes.item(i).getAttributes().getNamedItem("id"); + if (node_id != null) { + String id = node_id.getTextContent(); + values.add(id); + } + } + } catch (Exception ex) { + logger.severe(ex.toString()); + } + return values; + } + public boolean hasAnnotations() { return hasAnnotations; } @@ -423,6 +459,11 @@ public boolean hasMath() { return hasMath; } + public int getCountTableCells() { + int countTableCells = readTableCellsCount(); + return countTableCells; + } + public void flushResources() { sourceXML = null; sourceXMLstr = ""; diff --git a/src/main/java/org/metanorma/fop/Util.java b/src/main/java/org/metanorma/fop/Util.java index 6d9cabc..5d48d93 100644 --- a/src/main/java/org/metanorma/fop/Util.java +++ b/src/main/java/org/metanorma/fop/Util.java @@ -5,16 +5,7 @@ import java.awt.font.TextLayout; import java.awt.geom.Rectangle2D; import java.awt.image.BufferedImage; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.StringReader; +import java.io.*; import java.net.HttpURLConnection; import java.net.URI; import java.net.URISyntaxException; @@ -61,7 +52,12 @@ import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParserFactory; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; @@ -794,4 +790,18 @@ public static Node parseCSS(String cssString) { } return node; } + + private static String nodeToString(Node node) { + StringWriter sw = new StringWriter(); + try { + Transformer t = TransformerFactory.newInstance().newTransformer(); + t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); + t.setOutputProperty(OutputKeys.INDENT, "yes"); + t.transform(new DOMSource(node), new StreamResult(sw)); + } catch (TransformerException e) { + System.out.println("nodeToString Transformer Exception: " + e.toString()); + } + return sw.toString(); + } + } diff --git a/src/main/resources/table_if.xsl b/src/main/resources/table_if.xsl index bd22ff8..60b98cf 100644 --- a/src/main/resources/table_if.xsl +++ b/src/main/resources/table_if.xsl @@ -5,7 +5,7 @@ xmlns:xalan="http://xml.apache.org/xalan" xmlns:java="http://xml.apache.org/xalan/java" xmlns:str="http://exslt.org/strings" - exclude-result-prefixes="java str" + exclude-result-prefixes="fo if xalan java str" version="1.0"> @@ -54,7 +54,7 @@ - + @@ -136,6 +136,8 @@ + + @@ -164,6 +166,7 @@ + @@ -177,6 +180,8 @@ + + diff --git a/src/main/resources/tables_only.xsl b/src/main/resources/tables_only.xsl index 19e5ee3..0310c23 100644 --- a/src/main/resources/tables_only.xsl +++ b/src/main/resources/tables_only.xsl @@ -110,6 +110,8 @@ + +