Skip to content

Commit

Permalink
Merge pull request #307 from metanorma/fix/annotation
Browse files Browse the repository at this point in the history
Fix/annotation
  • Loading branch information
Intelligent2013 authored Nov 1, 2024
2 parents 6273917 + ee0b902 commit 5ad4b3c
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 32 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ SHELL ?= /bin/bash
endif

#JAR_VERSION := $(shell mvn -q -Dexec.executable="echo" -Dexec.args='$${project.version}' --non-recursive exec:exec -DforceStdout)
JAR_VERSION := 2.06
JAR_VERSION := 2.07
JAR_FILE := mn2pdf-$(JAR_VERSION).jar

all: target/$(JAR_FILE)
Expand Down
10 changes: 5 additions & 5 deletions README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ You will need the Java Development Kit (JDK) version 8, Update 241 (8u241) or hi

[source,sh]
----
java -Xss5m -Xmx2048m -jar target/mn2pdf-2.06.jar --xml-file <XML-FileName> --xsl-file <XSLT-FileName> --pdf-file <Output-PDF-FileName> [--syntax-highlight]
java -Xss5m -Xmx2048m -jar target/mn2pdf-2.07.jar --xml-file <XML-FileName> --xsl-file <XSLT-FileName> --pdf-file <Output-PDF-FileName> [--syntax-highlight]
----

e.g.

[source,sh]
----
java -Xss5m -Xmx2048m -jar target/mn2pdf-2.06.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf
java -Xss5m -Xmx2048m -jar target/mn2pdf-2.07.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf
----

=== PDF encryption features
Expand Down Expand Up @@ -100,7 +100,7 @@ Update version in `pom.xml`, e.g.:
----
<groupId>org.metanorma.fop</groupId>
<artifactId>mn2pdf</artifactId>
<version>2.06</version>
<version>2.07</version>
<name>Metanorma XML to PDF converter</name>
----

Expand All @@ -111,8 +111,8 @@ Tag the same version in Git:

[source,xml]
----
git tag v2.06
git push origin v2.06
git tag v2.07
git push origin v2.07
----

Then the corresponding GitHub release will be automatically created at:
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.metanorma.fop</groupId>
<artifactId>mn2pdf</artifactId>
<version>2.06</version>
<version>2.07</version>
<name>Metanorma XML to PDF converter</name>
<packaging>jar</packaging>
<url>https://www.metanorma.org</url>
Expand Down
249 changes: 224 additions & 25 deletions src/main/java/org/metanorma/fop/annotations/Annotation.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,12 @@
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.List;
import java.util.ArrayList;
import java.util.*;
import java.io.StringReader;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Logger;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
Expand All @@ -31,7 +27,20 @@
import javax.xml.xpath.XPathException;
import javax.xml.xpath.XPathFactory;

import org.apache.fop.pdf.PDFObject;
import org.apache.pdfbox.cos.*;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.COSObjectable;
import org.apache.pdfbox.pdmodel.common.PDNumberTreeNode;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDObjectReference;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDParentTreeValue;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot;
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList;
import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.StandardStructureTypes;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.xml.sax.InputSource;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.fdf.FDFAnnotation;
Expand All @@ -56,7 +65,12 @@ public class Annotation {
protected static final Logger logger = Logger.getLogger(LoggerHelper.LOGGER_NAME);

private boolean DEBUG = false;


private final String ANNOT_PREFIX = "Annot___";
private HashMap<String,PDAnnotation> hashMapDocumentAnnotations = new HashMap<>();

private PDStructureTreeRoot structureTreeRoot;

public void process(File pdf, String xmlReview) throws IOException {
PDDocument document = null;

Expand Down Expand Up @@ -114,9 +128,9 @@ public void process(File pdf, String xmlReview) throws IOException {


if (DEBUG) {
System.out.println("page=" + page);
/*System.out.println("page=" + page);
System.out.println("x=" + x);
System.out.println("y=" + y);
System.out.println("y=" + y);*/
}

AnnotationArea annotationArea = new AnnotationArea();
Expand Down Expand Up @@ -225,7 +239,7 @@ public void process(File pdf, String xmlReview) throws IOException {
}

if (DEBUG) {
System.out.println("postItPopup position=" + Arrays.toString(annotationArea.getPosition()));
//System.out.println("postItPopup position=" + Arrays.toString(annotationArea.getPosition()));
}

Node node_popup = ((Element)node_annotation).getElementsByTagName("popup").item(0);
Expand Down Expand Up @@ -253,55 +267,240 @@ public void process(File pdf, String xmlReview) throws IOException {
xmlwriter.write(updatedXMLReview);
}
}



// import XFDF annotation xml

FDFDocument fdfDoc = FDFDocument.loadXFDF(new ByteArrayInputStream(updatedXMLReview.getBytes(StandardCharsets.UTF_8)));
List<FDFAnnotation> fdfAnnots = fdfDoc.getCatalog().getFDF().getAnnotations();

// group annotations relate to one page and add them into page
HashMap<Integer,List<PDAnnotation>> map_pdfannots = new HashMap<>();

HashMap<Integer,List<PDAnnotation>> mapPDFannots = new HashMap<>();
for (int i=0; i<fdfDoc.getCatalog().getFDF().getAnnotations().size(); i++) {
FDFAnnotation fdfannot = fdfAnnots.get(i);
int page = fdfannot.getPage();

PDAnnotation pdfannot = PDAnnotation.createAnnotation(fdfannot.getCOSObject());

pdfannot.constructAppearances(); // requires for PDF/A

if (map_pdfannots.get(page) == null) {
map_pdfannots.put(page, new ArrayList<PDAnnotation>());
if (mapPDFannots.get(page) == null) {
mapPDFannots.put(page, new ArrayList<PDAnnotation>());
}
map_pdfannots.get(page).add(pdfannot);
mapPDFannots.get(page).add(pdfannot);
}
for (Map.Entry<Integer,List<PDAnnotation>> set: map_pdfannots.entrySet()) {

for (Map.Entry<Integer,List<PDAnnotation>> set: mapPDFannots.entrySet()) {
PDPage page = document.getPage(set.getKey());
List<PDAnnotation> pageAnotations = page.getAnnotations();
// merge existing annotations (including hyperlinks) and new annotations
pageAnotations.addAll(set.getValue());
//document.getPage(set.getKey()).setAnnotations(set.getValue());
document.getPage(set.getKey()).setAnnotations(pageAnotations);
}

fdfDoc.close();

document.save(pdf);

} catch (IOException | NumberFormatException | ParserConfigurationException | DOMException | TransformerException | SAXException | XPathException ex) {
logger.severe("Can't read annotation data from xml.");
ex.printStackTrace();
}



// add Annot tag for the text annotation
try {
document = PDDocument.load(pdf); // important
hashMapDocumentAnnotations = getAnnotationIDmap(document);

structureTreeRoot = document.getDocumentCatalog().getStructureTreeRoot();
COSArray aDocument = (COSArray) structureTreeRoot.getK();
fixAnnotationTags(aDocument, null, 0);

clearEmptyAnnotations(document);

document.save(pdf);
} catch (IOException ex) {
logger.severe("Can't enclose the annotation into the Annot tag.");
ex.printStackTrace();
}
// END Annot tag adding

} finally {
if( document != null ) {
document.close();
}
}

}


private HashMap<String,PDAnnotation> getAnnotationIDmap(PDDocument document) throws IOException {
HashMap<String,PDAnnotation> hashMapDocumentAnnotations = new HashMap<>();
for(int i = 0; i< document.getNumberOfPages(); i++) {
PDPage page = document.getPage(i);
for (PDAnnotation pdAnnotation: page.getAnnotations()){
COSDictionary pdAnnotationDict = pdAnnotation.getCOSObject();
if (pdAnnotationDict != null) {
// subject contains id 'Annot___', see xfdf_simple.xsl, attribute 'subject'
String subj = pdAnnotationDict.getString(COSName.SUBJ);
if (subj != null && subj.startsWith(ANNOT_PREFIX)) {
hashMapDocumentAnnotations.put(subj, pdAnnotation);
}
}
}
}
return hashMapDocumentAnnotations;
}

private void fixAnnotationTags(COSArray oArray, COSObject parentObject, int level) throws IOException {

if (oArray != null) {
for(int i = 0; i < oArray.size(); i++) {
COSObject oArrayItem = (COSObject) oArray.get(i);

COSName cName = (COSName) oArrayItem.getItem(COSName.S);
if (cName != null) {
String tagName = cName.getName();

String levelPrefix = Collections.nCopies(level, " ").toString()
.replace("[", "")
.replace("]", "")
.replace(", ", "");
if (DEBUG) {
System.out.println(levelPrefix + tagName);
}

if (tagName.equals("Annot")) {
COSBase cbAlt = oArrayItem.getItem(COSName.ALT);
if (cbAlt != null) {
String tagAlt = ((COSString)cbAlt).toString();
String COSSTRING_PREFIX = "COSString{";
if (tagAlt.startsWith(COSSTRING_PREFIX + ANNOT_PREFIX)) {
// here replace exising tag Annot with new tag Annot

String annotationId = tagAlt.substring(COSSTRING_PREFIX.length(), tagAlt.length() - 1);

if (DEBUG) {
System.out.println(levelPrefix + "id=" + tagAlt);
}

// add the annotation element
COSDictionary anDict = new COSDictionary();
// set Tag name (S)
anDict.setItem(COSName.S, COSName.ANNOT);
// set Parent (P)
anDict.setItem(COSName.P, parentObject); //oArrayItem oArray
// set Page (PG)
COSArray oArrayK = (COSArray) oArrayItem.getItem(COSName.K);
anDict.setItem(COSName.PG, ((COSObject)oArrayK.get(0)).getItem(COSName.PG));

PDObjectReference objRef = new PDObjectReference();
anDict.setItem(COSName.K, objRef);

PDAnnotation foundAnnotation = hashMapDocumentAnnotations.get(annotationId);
objRef.setReferencedObject(foundAnnotation);

if (DEBUG) {
System.out.println(oArrayItem.getItem(COSName.K));
}

try {
oArrayItem.setObject(anDict);

// from https://stackoverflow.com/questions/79083813/how-to-add-the-annotation-tag-in-tagged-pdf-using-pdfbox

int parentTreeNextKey = structureTreeRoot.getParentTreeNextKey(); // -1, ignored here

// assign a number to the annotation and insert the annotation element into the parent tree, and set ParentTreeNextKey
PDNumberTreeNode parentTree = structureTreeRoot.getParentTree();
Map<Integer, COSObjectable> numberTreeAsMap = getNumberTreeAsMap(parentTree);
Set<Integer> keySet = numberTreeAsMap.keySet();

if (parentTreeNextKey == -1) {
parentTreeNextKey = keySet.stream().reduce(Integer::max).get() + 1;
}

foundAnnotation.setStructParent(parentTreeNextKey);
structureTreeRoot.setParentTreeNextKey(parentTreeNextKey + 1);
numberTreeAsMap.put(parentTreeNextKey, anDict);
parentTree = new PDNumberTreeNode(PDParentTreeValue.class);
parentTree.setNumbers(numberTreeAsMap);
structureTreeRoot.setParentTree(parentTree);
// END from stackoverflow
} catch (IOException e) {
logger.severe("ParentTreeKey update error:" + e.toString());
}
}
}
}
}
try {
COSArray oA_K = (COSArray) oArrayItem.getItem(COSName.K);
fixAnnotationTags(oA_K, oArrayItem, ++level);
} catch (Exception e) {
//
}
}
}
}

private void clearEmptyAnnotations(PDDocument document) throws IOException {
for (int i = 0; i < document.getNumberOfPages(); i++)
{
List<PDAnnotation> pageAnnotations = new ArrayList<>();
PDPage page = document.getPage(i);
for(PDAnnotation pageAnnotation: page.getAnnotations()) {
boolean process = true;
if(pageAnnotation.getContents() != null && pageAnnotation.getContents().startsWith(ANNOT_PREFIX)) {
process = false;
}
// if link with alt-text Annot___ placed near the clause block, then the Contents changed to something like '1 Scope'
// therefore need remove links with small difference between coordinates
if (process) {
COSArray rect = pageAnnotation.getCOSObject().getCOSArray(COSName.RECT);
if (rect != null) {
float x1 = ((COSFloat)rect.get(0)).floatValue();
float x2 = ((COSFloat)rect.get(2)).floatValue();
if (x2 - x1 < 0.07f) {
process = false;
}
}
}

if (process) {
// clear Subject field with 'Annot___', see xfdf_simple.xsl, attribute 'subject'
String subj = pageAnnotation.getCOSObject().getString(COSName.SUBJ);
if (subj != null && subj.startsWith(ANNOT_PREFIX)) {
pageAnnotation.getCOSObject().setItem(COSName.SUBJ, null);
}
pageAnnotations.add(pageAnnotation);
}
}
document.getPage(i).setAnnotations(pageAnnotations);
}
}

private Map<Integer, COSObjectable> getNumberTreeAsMap(PDNumberTreeNode tree) throws IOException {
if (tree == null)
{
return new LinkedHashMap<>();
}
Map<Integer, COSObjectable> numbers = tree.getNumbers();
if (numbers == null)
{
numbers = new LinkedHashMap<>();
}
else
{
// must copy because the map is read only
numbers = new LinkedHashMap<>(numbers);
}
List<PDNumberTreeNode> kids = tree.getKids();
if (kids != null)
{
for (PDNumberTreeNode kid : kids)
{
numbers.putAll(getNumberTreeAsMap(kid));
}
}
return numbers;
}

}
2 changes: 2 additions & 0 deletions src/main/resources/xfdf_simple.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@
<xsl:attribute name="page"><xsl:value-of select="$page - 1"/></xsl:attribute>
<xsl:attribute name="rect"><xsl:value-of select="concat($element_from/@x,',',$element_from/@y)"/></xsl:attribute>
<xsl:attribute name="title"><xsl:value-of select="$reviewer"/></xsl:attribute>
<!-- for relationship between common.xsl alt-text Annot___@id and this annotation -->
<xsl:attribute name="subject">Annot___<xsl:value-of select="@id"/></xsl:attribute>

<xsl:element name="contents-richtext">
<body xmlns="http://www.w3.org/1999/xhtml">
Expand Down

0 comments on commit 5ad4b3c

Please sign in to comment.