Skip to content

Commit

Permalink
Add XML, CSV and TSV result counting parser (#270)
Browse files Browse the repository at this point in the history
* Add xml parser

* Add link and ask

* Add csv and tsv parser
  • Loading branch information
nck-mlcnv authored Sep 6, 2024
1 parent 1281635 commit 0314a68
Show file tree
Hide file tree
Showing 13 changed files with 1,006 additions and 69 deletions.
17 changes: 14 additions & 3 deletions docs/configuration/response_body_processor.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,20 @@ The processing is done to extract relevant information from the responses and st

Iguana supports multiple response body processors that are defined by the content type of the response body they process.

Currently only the `application/sparql-results+json` content type is supported,
and it only uses the `SaxSparqlJsonResultCountingParser` language processor
to extract simple information from the responses.
The following content types are supported:
- `application/sparql-results+json`
- `application/sparql-results+xml`
- `text/csv`
- `text/tab-separated-values`


For the `json` and `xml` content types,
the response body processor counts for `SELECT` queries
the number of results and bindings and lists all variables and link attributes.
If the requested query was a `ASK` query, the response body processor stores the boolean result.

For the `csv` and `tsv` content types, only `SELECT` queries are supported.
The response body processor counts the number of results and bindings and lists all variables.

Workers send the response bodies to the response body processors,
after receiving the full response bodies from the HTTP requests.
Expand Down
5 changes: 4 additions & 1 deletion src/main/java/org/aksw/iguana/cc/lang/LanguageProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public abstract class LanguageProcessor {

public interface LanguageProcessingData extends Storable {
long hash();
Class<? extends LanguageProcessor> processor();
Exception exception();
}

public abstract LanguageProcessingData process(InputStream inputStream, long hash);
Expand All @@ -45,6 +45,9 @@ public interface LanguageProcessingData extends Storable {
// Register all available LanguageProcessors here.
static {
processors.put("application/sparql-results+json", org.aksw.iguana.cc.lang.impl.SaxSparqlJsonResultCountingParser.class);
processors.put("application/sparql-results+xml", org.aksw.iguana.cc.lang.impl.SaxSparqlXmlResultCountingParser.class);
processors.put("text/tab-separated-values", org.aksw.iguana.cc.lang.impl.SparqlTsvResultCountingParser.class);
processors.put("text/csv", org.aksw.iguana.cc.lang.impl.SparqlCsvResultCountingParser.class);
}

public static LanguageProcessor getInstance(String contentType) {
Expand Down
60 changes: 60 additions & 0 deletions src/main/java/org/aksw/iguana/cc/lang/impl/BooleanResultData.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package org.aksw.iguana.cc.lang.impl;

import org.aksw.iguana.cc.lang.LanguageProcessor;
import org.aksw.iguana.cc.storage.Storable;
import org.aksw.iguana.commons.rdf.IPROP;
import org.aksw.iguana.commons.rdf.IRES;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;

import java.util.List;

public record BooleanResultData(
long hash,
Boolean result,
List<String> links,
Exception exception
) implements LanguageProcessor.LanguageProcessingData, Storable.AsCSV, Storable.AsRDF {
final static String[] header = new String[]{ "responseBodyHash", "boolean", "links", "exception" };

@Override
public Storable.CSVData toCSV() {
String resultString = "";
String exceptionString = "";
String linksString = "";
if (result != null)
resultString = result.toString();
if (exception != null)
exceptionString = exception().toString();
if (links != null)
linksString = String.join("; ", links);

String[] content = new String[]{ String.valueOf(hash), resultString, linksString, exceptionString };
String[][] data = new String[][]{ header, content };

String folderName = "sparql-ask-result-data";
List<CSVData.CSVFileData> files = List.of(new Storable.CSVData.CSVFileData("sparql-ask-result.csv", data));
return new Storable.CSVData(folderName, files);
}

@Override
public Model toRDF() {
Model m = ModelFactory.createDefaultModel();
Resource responseBodyRes = IRES.getResponsebodyResource(this.hash);
if (this.result != null) {
m.add(responseBodyRes, IPROP.askBoolean, ResourceFactory.createTypedLiteral(this.result));
}
if (this.links != null) {
for (String link : this.links) {
m.add(responseBodyRes, IPROP.link, ResourceFactory.createTypedLiteral(link));
}
}
if (this.exception != null) {
m.add(responseBodyRes, IPROP.exception, ResourceFactory.createTypedLiteral(this.exception.toString()));
}

return m;
}
}
67 changes: 67 additions & 0 deletions src/main/java/org/aksw/iguana/cc/lang/impl/ResultCountData.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package org.aksw.iguana.cc.lang.impl;

import org.aksw.iguana.cc.lang.LanguageProcessor;
import org.aksw.iguana.cc.storage.Storable;
import org.aksw.iguana.commons.rdf.IPROP;
import org.aksw.iguana.commons.rdf.IRES;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;

import java.util.List;

public record ResultCountData (
long hash,
long results,
long bindings,
List<String> variables,
List<String> links,
Exception exception
) implements LanguageProcessor.LanguageProcessingData, Storable.AsCSV, Storable.AsRDF {
final static String[] header = new String[]{ "responseBodyHash", "results", "bindings", "variables", "links", "exception" };

@Override
public Storable.CSVData toCSV() {
String variablesString = "";
String exceptionString = "";
String linksString = "";
if (variables != null)
variablesString = String.join("; ", variables);
if (exception != null)
exceptionString = exception().toString();
if (links != null)
linksString = String.join("; ", links);

String[] content = new String[]{ String.valueOf(hash), String.valueOf(results), String.valueOf(bindings), variablesString, linksString, exceptionString };
String[][] data = new String[][]{ header, content };

String folderName = "result-count-data";
List<Storable.CSVData.CSVFileData> files = List.of(new Storable.CSVData.CSVFileData("result-count.csv", data));
return new Storable.CSVData(folderName, files);
}

@Override
public Model toRDF() {
Model m = ModelFactory.createDefaultModel();
Resource responseBodyRes = IRES.getResponsebodyResource(this.hash);
m.add(responseBodyRes, IPROP.results, ResourceFactory.createTypedLiteral(this.results))
.add(responseBodyRes, IPROP.bindings, ResourceFactory.createTypedLiteral(this.bindings));

if (this.variables != null) {
for (String variable : this.variables) {
m.add(responseBodyRes, IPROP.variable, ResourceFactory.createTypedLiteral(variable));
}
}
if (this.links != null) {
for (String link : this.links) {
m.add(responseBodyRes, IPROP.link, ResourceFactory.createTypedLiteral(link));
}
}
if (this.exception != null) {
m.add(responseBodyRes, IPROP.exception, ResourceFactory.createTypedLiteral(this.exception.toString()));
}

return m;
}
}
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
package org.aksw.iguana.cc.lang.impl;

import org.aksw.iguana.cc.lang.LanguageProcessor;
import org.aksw.iguana.cc.storage.Storable;
import org.aksw.iguana.commons.rdf.IPROP;
import org.aksw.iguana.commons.rdf.IRES;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;
import org.json.simple.parser.ContentHandler;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import org.slf4j.Logger;

import java.io.BufferedReader;
import java.io.IOException;
Expand All @@ -24,79 +18,34 @@

/**
* SAX Parser for SPARQL JSON Results.
* For correct SPARQL JSON Results it returns the number of solutions, bound values and the names of the variables.
* For correct SPARQL JSON results, it returns the number of solutions, bound values and the names of the variables.
* For malformed results it may or may not fail. For malformed JSON it fails if the underlying json.simple.parser fails.
* <p>
* Specification: <a href="https://www.w3.org/TR/sparql11-results-json/">https://www.w3.org/TR/sparql11-results-json/</a>
*/
@LanguageProcessor.ContentType("application/sparql-results+json")
public class SaxSparqlJsonResultCountingParser extends LanguageProcessor {

private static final Logger LOGGER = org.slf4j.LoggerFactory.getLogger(SaxSparqlJsonResultCountingParser.class);

@Override
public LanguageProcessingData process(InputStream inputStream, long hash) {
var parser = new JSONParser();
var handler = new SaxSparqlJsonResultContentHandler();
try {
parser.parse(new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8)), handler);
return new SaxSparqlJsonResultData(hash, handler.solutions(), handler.boundValues(), handler.variables(), null);
if (handler.isAskResult())
return new BooleanResultData(hash, handler.booleanResult(), handler.links(), null);
return new ResultCountData(hash, handler.solutions(), handler.boundValues(), handler.variables(), handler.links(), null);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (ParseException e) {
return new SaxSparqlJsonResultData(hash, -1, -1, null, e);
}
}

record SaxSparqlJsonResultData(
long hash,
long results,
long bindings,
List<String> variables,
Exception exception
) implements LanguageProcessingData, Storable.AsCSV, Storable.AsRDF {
final static String[] header = new String[]{ "responseBodyHash", "results", "bindings", "variables", "exception" };

@Override
public Class<? extends LanguageProcessor> processor() {
return SaxSparqlJsonResultCountingParser.class;
}

@Override
public CSVData toCSV() {
String variablesString = "";
String exceptionString = "";
if (variables != null)
variablesString = String.join("; ", variables);
if (exception != null)
exceptionString = exception().toString();

String[] content = new String[]{ String.valueOf(hash), String.valueOf(results), String.valueOf(bindings), variablesString, exceptionString};
String[][] data = new String[][]{ header, content };

String folderName = "application-sparql+json";
List<CSVData.CSVFileData> files = List.of(new CSVData.CSVFileData("sax-sparql-result-data.csv", data));
return new Storable.CSVData(folderName, files);
}

@Override
public Model toRDF() {
Model m = ModelFactory.createDefaultModel();
Resource responseBodyRes = IRES.getResponsebodyResource(this.hash);
m.add(responseBodyRes, IPROP.results, ResourceFactory.createTypedLiteral(this.results))
.add(responseBodyRes, IPROP.bindings, ResourceFactory.createTypedLiteral(this.bindings));

if (this.variables != null) {
for (String variable : this.variables) {
m.add(responseBodyRes, IPROP.variable, ResourceFactory.createTypedLiteral(variable));
}
}
if (this.exception != null) {
m.add(responseBodyRes, IPROP.exception, ResourceFactory.createTypedLiteral(this.exception.toString()));
}

return m;
LOGGER.error("Error while parsing SPARQL XML Results.", e);
return new ResultCountData(hash, -1, -1, null, null, e);
}
}

private static class SaxSparqlJsonResultContentHandler implements ContentHandler {
// TODO: add support for ask queries and link
// TODO: code is unnecessary complicated

private boolean headFound = false;
Expand All @@ -106,12 +55,14 @@ private static class SaxSparqlJsonResultContentHandler implements ContentHandler
private boolean inBindings = false;
private boolean inBindingsArray = false;
private boolean inVars = false;
private boolean inLink = false;
private boolean inBoolean = false;

private long boundValues = 0;

private long solutions = 0;

private Boolean booleanResult = null;
private final List<String> variables = new ArrayList<>();
private final List<String> links = new ArrayList<>();


@Override
Expand Down Expand Up @@ -165,6 +116,8 @@ public boolean startArray() {
public boolean endArray() {
if (inVars)
inVars = false;
if (inLink)
inLink = false;
if (objectDepth == 2 && inResults && inBindings && inBindingsArray) {
inBindingsArray = false;
}
Expand All @@ -182,6 +135,10 @@ public boolean startObjectEntry(String key) {
if (headFound)
inResults = true;
}
case "boolean" -> {
if (headFound)
inBoolean = true;
}
}
}
case 2 -> {
Expand All @@ -191,6 +148,9 @@ public boolean startObjectEntry(String key) {
if ("vars".compareTo(key) == 0) {
inVars = true;
}
if ("link".compareTo(key) == 0) {
inLink = true;
}
}
}
return true;
Expand All @@ -204,7 +164,10 @@ public boolean endObjectEntry() {
public boolean primitive(Object value) {
if (inVars)
variables.add(value.toString());

if (inLink)
links.add(value.toString());
if (inBoolean && value instanceof Boolean val)
booleanResult = val;
return true;
}

Expand All @@ -219,5 +182,17 @@ public long solutions() {
public List<String> variables() {
return variables;
}

public List<String> links() {
return links;
}

public Boolean booleanResult() {
return booleanResult;
}

public boolean isAskResult() {
return booleanResult != null;
}
}
}
Loading

0 comments on commit 0314a68

Please sign in to comment.