diff --git a/Makefile b/Makefile index 93acdcf..e7c8a78 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ SHELL ?= /bin/bash endif #JAR_VERSION := $(shell mvn -q -Dexec.executable="echo" -Dexec.args='$${project.version}' --non-recursive exec:exec -DforceStdout) -JAR_VERSION := 1.88 +JAR_VERSION := 1.90 JAR_FILE := mn2pdf-$(JAR_VERSION).jar all: target/$(JAR_FILE) diff --git a/README.adoc b/README.adoc index a5b4db2..4e96605 100644 --- a/README.adoc +++ b/README.adoc @@ -17,14 +17,14 @@ You will need the Java Development Kit (JDK) version 8, Update 241 (8u241) or hi [source,sh] ---- -java -Xss5m -Xmx2048m -jar target/mn2pdf-1.89.jar --xml-file --xsl-file --pdf-file [--syntax-highlight] +java -Xss5m -Xmx2048m -jar target/mn2pdf-1.90.jar --xml-file --xsl-file --pdf-file [--syntax-highlight] ---- e.g. [source,sh] ---- -java -Xss5m -Xmx2048m -jar target/mn2pdf-1.89.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf +java -Xss5m -Xmx2048m -jar target/mn2pdf-1.90.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf ---- === PDF encryption features @@ -100,7 +100,7 @@ Update version in `pom.xml`, e.g.: ---- org.metanorma.fop mn2pdf -1.89 +1.90 Metanorma XML to PDF converter ---- @@ -111,8 +111,8 @@ Tag the same version in Git: [source,xml] ---- -git tag v1.89 -git push origin v1.89 +git tag v1.90 +git push origin v1.90 ---- Then the corresponding GitHub release will be automatically created at: diff --git a/pom.xml b/pom.xml index bf6dc3d..6e7d5c4 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ 4.0.0 org.metanorma.fop mn2pdf - 1.89 + 1.90 Metanorma XML to PDF converter jar https://www.metanorma.org diff --git a/src/main/java/org/metanorma/fop/PDFGenerator.java b/src/main/java/org/metanorma/fop/PDFGenerator.java index 7981c59..4776b69 100644 --- a/src/main/java/org/metanorma/fop/PDFGenerator.java +++ b/src/main/java/org/metanorma/fop/PDFGenerator.java @@ -10,6 +10,7 @@ import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; import javax.xml.parsers.*; import javax.xml.transform.OutputKeys; import javax.xml.transform.Result; @@ -277,7 +278,8 @@ public boolean process() { isAddMathAsText = xsltConverter.hasParamAddMathAsText() && isMathExists; isAddMathAsAttachment = xsltConverter.hasParamAddMathAsAttachment(); - isApplyAutolayoutAlgorithm = xsltConverter.isApplyAutolayoutAlgorithm(); + isApplyAutolayoutAlgorithm = xsltConverter.isApplyAutolayoutAlgorithm(); + if (isSyntaxHighlight) { xsltParams.put("syntax-highlight", "true"); @@ -1173,6 +1175,8 @@ private void readEncryptionParameters(File fEncryptionParameters) { private void setTablesWidths(fontConfig fontcfg, XSLTconverter xsltConverter, File pdf) { + int TABLE_CELLS_COUNT_MAX = 30000; + String methodName = getClass().getSimpleName() + "." + (new Object(){}.getClass().getEnclosingMethod().getName()); Profiler.addMethodCall(methodName); long startMethodTime = System.currentTimeMillis(); @@ -1196,7 +1200,7 @@ private void setTablesWidths(fontConfig fontcfg, XSLTconverter xsltConverter, Fi SourceXMLDocument sourceXMLDocumentTablesOnly = new SourceXMLDocument(xmlTablesOnly); int countTableCells = sourceXMLDocumentTablesOnly.getCountTableCells(); - if (countTableCells < 30000) { + if (countTableCells < TABLE_CELLS_COUNT_MAX) { // transform XML to XSL-FO (XML .fo file) xsltConverter.transform(sourceXMLDocumentTablesOnly, false); @@ -1218,18 +1222,37 @@ private void setTablesWidths(fontConfig fontcfg, XSLTconverter xsltConverter, Fi } else { // for large tables, or large number of tables - List tablesIds = sourceXMLDocumentTablesOnly.readElementsIds("//*[local-name() = 'table' or local-name() = 'dl']"); - List xmlTablesIF = new ArrayList<>(); - // process each table separatery for memory consumption optimization - int tableCounter = 0; - int tableCount = tablesIds.size(); - for (String tableId : tablesIds) { - tableCounter++; - logger.info("[INFO] Generation of XSL-FO (" + tableCounter + "/" + tableCount + ") with information about the table widths with id='" + tableId + "'..."); - // process table with id=tableId only - xsltConverter.setParam("table_only_with_id", tableId); + Map tablesCellsCountMap = sourceXMLDocumentTablesOnly.getTablesCellsCountMap(); + + int portion = 1; + while(!tablesCellsCountMap.isEmpty()) { + int totalCells = 0; + List tablesProcessed = new ArrayList<>(); + + Iterator> iterator = tablesCellsCountMap.entrySet().iterator(); + while (iterator.hasNext() && totalCells < TABLE_CELLS_COUNT_MAX) { + Map.Entry entry = iterator.next(); + if (totalCells == 0 || totalCells + entry.getValue() < TABLE_CELLS_COUNT_MAX) { + totalCells += entry.getValue(); + tablesProcessed.add(entry.getKey()); + } + } + + /*for (Map.Entry entry : tablesCellsCountMap.entrySet()) { + else { + break; + } + }*/ + logger.info("[INFO] Generation of XSL-FO (portion " + portion + ") with information about the table widths..."); + + // "table1 table2 table3 " (with space at the end) + String tableIds = tablesProcessed.stream().collect(Collectors.joining(" ")) + " "; + // call XSLT and pass the tables ids + + // process table with ids=tableIds only + xsltConverter.setParam("table_only_with_ids", tableIds); // transform XML to XSL-FO (XML .fo file) xsltConverter.transform(sourceXMLDocumentTablesOnly, false); @@ -1237,27 +1260,41 @@ private void setTablesWidths(fontConfig fontcfg, XSLTconverter xsltConverter, Fi String xmlFO = sourceXMLDocumentTablesOnly.getXMLFO(); //debug - debugSaveXML(xmlFO, pdf.getAbsolutePath() + "." + tableId + ".fo.tables.xml"); + debugSaveXML(xmlFO, pdf.getAbsolutePath() + ".portion_" + portion + ".fo.tables.xml"); - fontcfg.outputFontManifestLog(Paths.get(pdf.getAbsolutePath() + "." + tableId + ".tables.fontmanifest.log.txt")); + fontcfg.outputFontManifestLog(Paths.get(pdf.getAbsolutePath() + ".portion_" + portion + ".tables.fontmanifest.log.txt")); fontcfg.setSourceDocumentFontList(sourceXMLDocumentTablesOnly.getDocumentFonts()); Source sourceFO = new StreamSource(new StringReader(xmlFO)); - logger.info("[INFO] Generation of Intermediate Format (" + tableCounter + "/" + tableCount + ") with information about the table's widths with id='" + tableId + "'..."); - String xmlIF = generateFOPIntermediateFormat(sourceFO, fontcfg.getConfig(), pdf, true, "." + tableId + ".tables"); + logger.info("[INFO] Generation of Intermediate Format with information about the table's widths (portion " + portion + ") ..."); + String xmlIF = generateFOPIntermediateFormat(sourceFO, fontcfg.getConfig(), pdf, true, ".portion_" + portion + ".tables"); xmlTableIF = createTableIF(xmlIF); - debugSaveXML(xmlTableIF, pdf.getAbsolutePath() + "." + tableId + ".tables.xml"); + debugSaveXML(xmlTableIF, pdf.getAbsolutePath() + ".portion_" + portion + ".tables.xml"); xmlTableIF = tableWidthsCleanup(xmlTableIF); xmlTablesIF.add(xmlTableIF); + + // remove processed tables + tablesCellsCountMap.keySet().removeAll(tablesProcessed); + portion++; } + + /*List tablesIds = sourceXMLDocumentTablesOnly.readElementsIds("//*[local-name() = 'table' or local-name() = 'dl']"); + // process each table separatery for memory consumption optimization + int tableCounter = 0; + int tableCount = tablesIds.size(); + for (String tableId : tablesIds) { + tableCounter++; + logger.info("[INFO] Generation of XSL-FO (" + tableCounter + "/" + tableCount + ") with information about the table widths with id='" + tableId + "'..."); + }*/ xmlTableIF = tablesWidthsUnion(xmlTablesIF); xsltConverter.setParam("table_only_with_id", ""); // further process all tables + xsltConverter.setParam("table_only_with_ids", ""); // further process all tables } debugSaveXML(xmlTableIF, pdf.getAbsolutePath() + ".tables.xml"); @@ -1330,11 +1367,17 @@ private void saveDebugFO(String debugXSLFO) { } private String tableWidthsCleanup(String table) { - int startPos = table.indexOf(""); table = table.substring(startPos, endPos); int startPosTbody = table.indexOf(""); - table = table.substring(0,startPosTbody) + "
"; + table = table.substring(0,startPosTbody) + "";*/ return table; } @@ -1344,6 +1387,9 @@ private String tablesWidthsUnion(List tables) { sbTablesIF.append(""); } for (String itemTableIF: tables) { + int startPos = itemTableIF.indexOf(""); + itemTableIF = itemTableIF.substring(startPos, endPos); sbTablesIF.append(itemTableIF); } if (!tables.isEmpty()) { diff --git a/src/main/java/org/metanorma/fop/SourceXMLDocument.java b/src/main/java/org/metanorma/fop/SourceXMLDocument.java index aafc248..c7ce933 100644 --- a/src/main/java/org/metanorma/fop/SourceXMLDocument.java +++ b/src/main/java/org/metanorma/fop/SourceXMLDocument.java @@ -1,7 +1,6 @@ package org.metanorma.fop; import java.io.*; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -14,10 +13,7 @@ import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.*; -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Source; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerFactory; +import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; @@ -26,7 +22,7 @@ import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; -import static org.metanorma.fop.PDFGenerator.logger; + import static org.metanorma.fop.Util.getStreamFromResources; import org.metanorma.utils.LoggerHelper; @@ -52,6 +48,7 @@ public class SourceXMLDocument { private boolean hasAnnotations = false; private boolean hasTables = false; + private Map tablesCellsCountMap = new HashMap<>(); private boolean hasMath = false; static final String TMPDIR = System.getProperty("java.io.tmpdir"); @@ -89,6 +86,7 @@ public SourceXMLDocument(String strXML) { DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); InputSource xmlIFIS = new InputSource(new StringReader(strXML)); sourceXML = dBuilder.parse(xmlIFIS); + readMetaInformation(); } catch (Exception ex) { logger.severe("Can't parse source XML."); ex.printStackTrace(); @@ -97,14 +95,41 @@ public SourceXMLDocument(String strXML) { private void readMetaInformation() { String element_review = readValue("//*[local-name() = 'review'][1]"); - this.hasAnnotations = element_review.length() != 0; - // check table without colgroup/col (width) or dl - String element_table = readValue("//*[(local-name() = 'table' and not(*[local-name() = 'colgroup']/*[local-name() = 'col'])) or local-name() = 'dl'][1]"); - this.hasTables = element_table.length() != 0; + hasAnnotations = element_review.length() != 0; String element_math = readValue("//*[local-name() = 'math'][1]"); - this.hasMath = element_math.length() != 0; + hasMath = element_math.length() != 0; + //tables without colgroup/col (width) or dl + //String element_table = readValue("//*[(local-name() = 'table' and not(*[local-name() = 'colgroup']/*[local-name() = 'col'])) or local-name() = 'dl'][1]"); + //hasTables = element_table.length() != 0; + obtainTablesCellsCount(); + hasTables = !tablesCellsCountMap.isEmpty(); } + private void obtainTablesCellsCount() { + try { + XPath xPathAllTable = XPathFactory.newInstance().newXPath(); + // select all tables (without colgroup) and definitions lists (dl) + XPathExpression queryAllTables = xPathAllTable.compile("//*[(local-name() = 'table' and not(*[local-name() = 'colgroup']/*[local-name() = 'col'])) or local-name() = 'dl']"); + NodeList nodesTables = (NodeList)queryAllTables.evaluate(sourceXML, XPathConstants.NODESET); + for (int i = 0; i < nodesTables.getLength(); i++) { + Node nodeTable = nodesTables.item(i); + String tableId = ""; + Node nodeId = nodeTable.getAttributes().getNamedItem("id"); + if (nodeId != null) { + tableId =nodeId.getTextContent(); + } + if (!tableId.isEmpty()) { + XPath xPathTableCountCells = XPathFactory.newInstance().newXPath(); + XPathExpression queryTableCountCells = xPathTableCountCells.compile(".//*[local-name() = 'td' or local-name() = 'th' or local-name() = 'dt' or local-name() = 'dd']"); + NodeList nodesCells = (NodeList) queryTableCountCells.evaluate(nodeTable, XPathConstants.NODESET); + int countCells = nodesCells.getLength(); + tablesCellsCountMap.put(tableId, countCells); + } + } + } catch (XPathExpressionException ex) { + logger.severe(ex.toString()); + } + } public StreamSource getStreamSource() { if (sourceXMLstr.isEmpty()) { @@ -411,7 +436,6 @@ public String getDocumentFilePath() { return documentFilePath; } - private String updatePreprocessXSLT(Document docXML) throws Exception { Source srcXSL = new StreamSource(getStreamFromResources(getClass().getClassLoader(), "update_preprocess_xslt.xsl")); @@ -441,19 +465,6 @@ private String readValue(String xpath) { return value; } - private int readTableCellsCount(){ - int count = 0; - try { - XPath xPath = XPathFactory.newInstance().newXPath(); - XPathExpression query = xPath.compile("//*[local-name() = 'td' or local-name() = 'th' or local-name() = 'dt' or local-name() = 'dd']"); - NodeList nodes = (NodeList)query.evaluate(sourceXML, XPathConstants.NODESET); - count = nodes.getLength(); - } catch (Exception ex) { - logger.severe(ex.toString()); - } - return count; - } - public List readElementsIds(String xpath) { List values = new ArrayList<>(); try { @@ -487,7 +498,12 @@ public boolean hasMath() { } public int getCountTableCells() { - int countTableCells = readTableCellsCount(); + int countTableCells = 0; + try { + countTableCells = tablesCellsCountMap.values().stream().mapToInt(Integer::intValue).sum(); + } catch (Exception ex) { + logger.severe(ex.toString()); + }; return countTableCells; } @@ -496,4 +512,8 @@ public void flushResources() { sourceXMLstr = ""; xmlFO = null; } + + public Map getTablesCellsCountMap() { + return tablesCellsCountMap; + } } diff --git a/src/test/java/org/metanorma/fop/SourceXMLDocumentTests.java b/src/test/java/org/metanorma/fop/SourceXMLDocumentTests.java index 327e960..55483cf 100644 --- a/src/test/java/org/metanorma/fop/SourceXMLDocumentTests.java +++ b/src/test/java/org/metanorma/fop/SourceXMLDocumentTests.java @@ -5,7 +5,9 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import javax.xml.transform.TransformerException; import org.apache.commons.cli.ParseException; @@ -92,4 +94,15 @@ public void testGetDocumentPreprocessXSLT() { assertTrue(strProcessXSLT.equals(strProcessXSLTEtalon)); } + + @Test + public void testTablesCellsCount() { + ClassLoader classLoader = getClass().getClassLoader(); + String xml = classLoader.getResource("G.191.xml").getFile(); + SourceXMLDocument sourceXMLDocument = new SourceXMLDocument(new File(xml)); + Map tablesCellsCount = sourceXMLDocument.getTablesCellsCountMap(); + int countCells = sourceXMLDocument.getCountTableCells(); + assertTrue(tablesCellsCount.size() == 27); + assertTrue(countCells == 725); + } }