From 1150b1ab45ccba26fb9e96622fbf13b0d419a1ad Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Mon, 23 Sep 2024 17:44:33 +0200 Subject: [PATCH 01/34] Add QueryData class --- .../org/aksw/iguana/cc/query/QueryData.java | 50 +++++++++++++++++++ .../cc/query/list/FileBasedQueryList.java | 11 ++++ .../aksw/iguana/cc/query/list/QueryList.java | 3 ++ .../query/list/impl/FileCachingQueryList.java | 2 + .../query/list/impl/FileReadingQueryList.java | 6 +++ .../query/list/impl/StringListQueryList.java | 8 +++ 6 files changed, 80 insertions(+) create mode 100644 src/main/java/org/aksw/iguana/cc/query/QueryData.java diff --git a/src/main/java/org/aksw/iguana/cc/query/QueryData.java b/src/main/java/org/aksw/iguana/cc/query/QueryData.java new file mode 100644 index 000000000..22c870ce1 --- /dev/null +++ b/src/main/java/org/aksw/iguana/cc/query/QueryData.java @@ -0,0 +1,50 @@ +package org.aksw.iguana.cc.query; + +import org.aksw.iguana.cc.query.source.QuerySource; +import org.apache.jena.update.UpdateFactory; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +/** + * This class stores extra information about a query. + * At the moment, it only stores if the query is an update query or not. + * + * @param queryId The id of the query + * @param update If the query is an update query + */ +public record QueryData(int queryId, boolean update) { + public static List generate(Collection queries) { + final var queryData = new ArrayList(); + int i = 0; + for (InputStream query : queries) { + boolean update = true; + try { + UpdateFactory.read(query); // Throws an exception if the query is not an update query + } catch (Exception e) { + update = false; + } + queryData.add(new QueryData(i++, update)); + } + return queryData; + } + + public static List generate(QuerySource queries) throws IOException { + final var streams = new ArrayList(); + int bound = queries.size(); + for (int i = 0; i < bound; i++) { + InputStream queryStream = queries.getQueryStream(i); + streams.add(queryStream); + } + return generate(streams); + } + + public static List generate(List queries) { + final var streams = queries.stream().map(s -> (InputStream) new ByteArrayInputStream(s.getBytes())).toList(); + return generate(streams); + } +} diff --git a/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java index 0256fee53..f4412e85f 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java @@ -1,9 +1,11 @@ package org.aksw.iguana.cc.query.list; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.source.QuerySource; import java.io.IOException; import java.io.InputStream; +import java.util.List; /** * The abstract class for a QueryList. A query list provides the queries to the QueryHandler. @@ -16,6 +18,7 @@ public abstract class FileBasedQueryList implements QueryList{ * This is the QuerySource from which the queries should be retrieved. */ final protected QuerySource querySource; + protected List queryData; public FileBasedQueryList(QuerySource querySource) { if (querySource == null) { @@ -24,6 +27,10 @@ public FileBasedQueryList(QuerySource querySource) { this.querySource = querySource; } + protected void setQueryData(List queryData) { + this.queryData = queryData; + } + /** * This method returns the amount of queries in the query list. * @@ -52,4 +59,8 @@ public int hashCode() { public abstract String getQuery(int index) throws IOException; public abstract InputStream getQueryStream(int index) throws IOException; + + public QueryData getQueryData(int index) { + return queryData.get(index); + } } diff --git a/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java index 623a8c67f..877a03f9a 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java @@ -1,5 +1,6 @@ package org.aksw.iguana.cc.query.list; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.source.QuerySource; import java.io.IOException; @@ -35,4 +36,6 @@ public interface QueryList { String getQuery(int index) throws IOException; InputStream getQueryStream(int index) throws IOException; + + QueryData getQueryData(int index); } diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java index 8f6c3a38d..78dbb349c 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java @@ -1,5 +1,6 @@ package org.aksw.iguana.cc.query.list.impl; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.list.FileBasedQueryList; import org.aksw.iguana.cc.query.source.QuerySource; import org.aksw.iguana.commons.io.ByteArrayListInputStream; @@ -40,6 +41,7 @@ public FileCachingQueryList(QuerySource querySource) throws IOException { queries.add(balos); } } + setQueryData(QueryData.generate(queries.stream().map(ByteArrayListOutputStream::toInputStream).toList())); } @Override diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java index e3074be04..045a326df 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java @@ -1,5 +1,6 @@ package org.aksw.iguana.cc.query.list.impl; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.list.FileBasedQueryList; import org.aksw.iguana.cc.query.source.QuerySource; @@ -15,6 +16,11 @@ public class FileReadingQueryList extends FileBasedQueryList { public FileReadingQueryList(QuerySource querySource) { super(querySource); + try { + setQueryData(QueryData.generate(querySource)); + } catch (IOException e) { + throw new RuntimeException("Could not read queries from the source.", e); + } } @Override diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java index 1d448940e..8c3dcfc64 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java @@ -1,5 +1,6 @@ package org.aksw.iguana.cc.query.list.impl; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.list.QueryList; import java.io.ByteArrayInputStream; @@ -10,9 +11,11 @@ public class StringListQueryList implements QueryList { private final List queries; + private final List queryData; public StringListQueryList(List queries) { this.queries = queries; + this.queryData = QueryData.generate(queries); } @Override @@ -34,4 +37,9 @@ public int size() { public int hashCode() { return queries.hashCode(); } + + @Override + public QueryData getQueryData(int index) { + return queryData.get(index); + } } From 5bdf321dc7064132bba6addcd65f7e42dfd02c5b Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Mon, 23 Sep 2024 17:53:47 +0200 Subject: [PATCH 02/34] Add test --- .../aksw/iguana/cc/query/QueryDataTest.java | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java diff --git a/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java b/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java new file mode 100644 index 000000000..19d6a7481 --- /dev/null +++ b/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java @@ -0,0 +1,68 @@ +package org.aksw.iguana.cc.query; + +import org.aksw.iguana.cc.query.source.QuerySource; +import org.aksw.iguana.cc.query.source.impl.FileSeparatorQuerySource; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +class QueryDataTest { + + private static Path tempFile = null; + + @BeforeAll + public static void setup() throws IOException { + tempFile = Files.createTempFile("test", "txt"); + Files.writeString(tempFile, """ + SELECT ?s ?p ?o WHERE { + ?s ?p ?o + } + + INSERT DATA { + + } + + DELETE DATA { + + } + + SELECT ?s ?p ?o WHERE { + ?s ?p ?o + } + """); + } + + @AfterAll + public static void teardown() throws IOException { + Files.deleteIfExists(tempFile); + } + + @Test + void testGeneration() throws IOException { + final QuerySource querySource = new FileSeparatorQuerySource(tempFile, ""); + final var testStrings = querySource.getAllQueries(); + + List> generations = List.of( + QueryData.generate(testStrings), + QueryData.generate(testStrings.stream().map(s -> (InputStream) new ByteArrayInputStream(s.getBytes())).toList()), + QueryData.generate(querySource) + ); + for (List generation : generations) { + assertEquals(4, generation.size()); + assertFalse(generation.get(0).update()); + assertTrue(generation.get(1).update()); + assertTrue(generation.get(2).update()); + assertFalse(generation.get(3).update()); + } + } +} \ No newline at end of file From bdef045e8b4de1c9f8cc22be96cbcf27207ea5b7 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:14:32 +0200 Subject: [PATCH 03/34] Check for update queries --- .../iguana/cc/query/handler/QueryHandler.java | 9 +++++---- .../iguana/cc/utils/http/RequestFactory.java | 16 +++++++++++++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 6930d3f18..03a1f656b 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -145,8 +145,9 @@ public Template(URI endpoint, Long limit, Boolean save) { } } - public record QueryStringWrapper(int index, String query) {} - public record QueryStreamWrapper(int index, boolean cached, Supplier queryInputStreamSupplier) {} + public record QueryStringWrapper(int index, String query, boolean update) {} + + public record QueryStreamWrapper(int index, boolean cached, Supplier queryInputStreamSupplier, boolean update) {} protected static final Logger LOGGER = LoggerFactory.getLogger(QueryHandler.class); @@ -247,7 +248,7 @@ public QuerySelector getQuerySelectorInstance() { public QueryStringWrapper getNextQuery(QuerySelector querySelector) throws IOException { final var queryIndex = querySelector.getNextIndex(); - return new QueryStringWrapper(queryIndex, queryList.getQuery(queryIndex)); + return new QueryStringWrapper(queryIndex, queryList.getQuery(queryIndex), queryList.getQueryData(queryIndex).update()); } public QueryStreamWrapper getNextQueryStream(QuerySelector querySelector) { @@ -258,7 +259,7 @@ public QueryStreamWrapper getNextQueryStream(QuerySelector querySelector) { } catch (IOException e) { throw new RuntimeException(e); } - }); + }, queryList.getQueryData(queryIndex).update()); } @Override diff --git a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java index e29fc5331..3392c31b7 100644 --- a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java +++ b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java @@ -17,6 +17,7 @@ import java.io.IOException; import java.io.InputStream; +import java.net.URI; import java.net.URISyntaxException; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; @@ -113,9 +114,18 @@ public AsyncRequestProducer buildHttpRequest(QueryHandler.QueryStreamWrapper que throw new IOException(e); } + // check if the query is an update query, if yes, change the request type to similar update request type + RequestType actualRequestType = requestType; + if (requestType == RequestType.GET_QUERY || requestType == RequestType.POST_QUERY) + actualRequestType = queryHandle.update() ? RequestType.POST_UPDATE : requestType; + if (requestType == RequestType.POST_URL_ENC_QUERY) + actualRequestType = queryHandle.update() ? RequestType.POST_URL_ENC_UPDATE : requestType; + // if only one endpoint is set, use it for both queries and updates + URI updateEndpoint = connectionConfig.updateEndpoint() != null ? connectionConfig.updateEndpoint() : connectionConfig.endpoint(); + // If the query is bigger than 2^31 bytes (2GB) and the request type is set to GET_QUERY, POST_URL_ENC_QUERY or // POST_URL_ENC_UPDATE, the following code will throw an exception. - switch (requestType) { + switch (actualRequestType) { case GET_QUERY -> asyncRequestBuilder = AsyncRequestBuilder.get(new URIBuilder(connectionConfig.endpoint()) .addParameter("query", new String(queryStream.readAllBytes(), StandardCharsets.UTF_8)) .build() @@ -127,10 +137,10 @@ public AsyncRequestProducer buildHttpRequest(QueryHandler.QueryStreamWrapper que .setEntity(new BasicAsyncEntityProducer(urlEncode("query", new String(queryStream.readAllBytes(), StandardCharsets.UTF_8)), null, false)); case POST_QUERY -> asyncRequestBuilder = AsyncRequestBuilder.post(connectionConfig.endpoint()) .setEntity(new StreamEntityProducer(queryStreamSupplier, !caching, "application/sparql-query")); - case POST_URL_ENC_UPDATE -> asyncRequestBuilder = AsyncRequestBuilder.post(connectionConfig.endpoint()) + case POST_URL_ENC_UPDATE -> asyncRequestBuilder = AsyncRequestBuilder.post(updateEndpoint) .setHeader(HttpHeaders.CONTENT_TYPE, "application/x-www-form-urlencoded") .setEntity(new BasicAsyncEntityProducer(urlEncode("update", new String(queryStream.readAllBytes(), StandardCharsets.UTF_8)), null, false)); - case POST_UPDATE -> asyncRequestBuilder = AsyncRequestBuilder.post(connectionConfig.endpoint()) + case POST_UPDATE -> asyncRequestBuilder = AsyncRequestBuilder.post(updateEndpoint) .setEntity(new StreamEntityProducer(queryStreamSupplier, !caching, "application/sparql-update")); default -> throw new IllegalStateException("Unexpected value: " + requestType); } From 984cd18591e58e39c1e67a80dc5a9bb1e198f1c7 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Tue, 24 Sep 2024 13:50:19 +0200 Subject: [PATCH 04/34] Move responsibility of QueryData to QueryHandler --- .../aksw/iguana/cc/query/handler/QueryHandler.java | 14 ++++++++++++-- .../iguana/cc/query/list/FileBasedQueryList.java | 9 --------- .../org/aksw/iguana/cc/query/list/QueryList.java | 2 -- .../cc/query/list/impl/FileCachingQueryList.java | 1 - .../cc/query/list/impl/FileReadingQueryList.java | 5 ----- .../cc/query/list/impl/StringListQueryList.java | 7 ------- 6 files changed, 12 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 03a1f656b..be30268fa 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -5,6 +5,7 @@ import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.deser.std.StdDeserializer; +import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.list.impl.StringListQueryList; import org.aksw.iguana.cc.query.selector.QuerySelector; import org.aksw.iguana.cc.query.selector.impl.LinearQuerySelector; @@ -156,6 +157,7 @@ public record QueryStreamWrapper(int index, boolean cached, Supplier queryData; private int workerCount = 0; // give every worker inside the same worker config an offset seed @@ -169,6 +171,7 @@ protected QueryHandler() { config = null; queryList = null; hashCode = 0; + queryData = null; } @JsonCreator @@ -185,6 +188,13 @@ public QueryHandler(Config config) throws IOException { new FileReadingQueryList(querySource); } this.hashCode = queryList.hashCode(); + this.queryData = QueryData.generate(IntStream.range(0, queryList.size()).mapToObj(i -> { + try { + return queryList.getQueryStream(i); + } catch (IOException e) { + throw new RuntimeException("Couldn't read query stream", e); + } + }).collect(Collectors.toList())); } private QueryList initializeTemplateQueryHandler(QuerySource templateSource) throws IOException { @@ -248,7 +258,7 @@ public QuerySelector getQuerySelectorInstance() { public QueryStringWrapper getNextQuery(QuerySelector querySelector) throws IOException { final var queryIndex = querySelector.getNextIndex(); - return new QueryStringWrapper(queryIndex, queryList.getQuery(queryIndex), queryList.getQueryData(queryIndex).update()); + return new QueryStringWrapper(queryIndex, queryList.getQuery(queryIndex), queryData.get(queryIndex).update()); } public QueryStreamWrapper getNextQueryStream(QuerySelector querySelector) { @@ -259,7 +269,7 @@ public QueryStreamWrapper getNextQueryStream(QuerySelector querySelector) { } catch (IOException e) { throw new RuntimeException(e); } - }, queryList.getQueryData(queryIndex).update()); + }, queryData.get(queryIndex).update()); } @Override diff --git a/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java index f4412e85f..013093fe7 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/FileBasedQueryList.java @@ -18,7 +18,6 @@ public abstract class FileBasedQueryList implements QueryList{ * This is the QuerySource from which the queries should be retrieved. */ final protected QuerySource querySource; - protected List queryData; public FileBasedQueryList(QuerySource querySource) { if (querySource == null) { @@ -27,10 +26,6 @@ public FileBasedQueryList(QuerySource querySource) { this.querySource = querySource; } - protected void setQueryData(List queryData) { - this.queryData = queryData; - } - /** * This method returns the amount of queries in the query list. * @@ -59,8 +54,4 @@ public int hashCode() { public abstract String getQuery(int index) throws IOException; public abstract InputStream getQueryStream(int index) throws IOException; - - public QueryData getQueryData(int index) { - return queryData.get(index); - } } diff --git a/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java index 877a03f9a..7df4dd332 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/QueryList.java @@ -36,6 +36,4 @@ public interface QueryList { String getQuery(int index) throws IOException; InputStream getQueryStream(int index) throws IOException; - - QueryData getQueryData(int index); } diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java index 78dbb349c..a804702b4 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileCachingQueryList.java @@ -41,7 +41,6 @@ public FileCachingQueryList(QuerySource querySource) throws IOException { queries.add(balos); } } - setQueryData(QueryData.generate(queries.stream().map(ByteArrayListOutputStream::toInputStream).toList())); } @Override diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java index 045a326df..0999deba5 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/FileReadingQueryList.java @@ -16,11 +16,6 @@ public class FileReadingQueryList extends FileBasedQueryList { public FileReadingQueryList(QuerySource querySource) { super(querySource); - try { - setQueryData(QueryData.generate(querySource)); - } catch (IOException e) { - throw new RuntimeException("Could not read queries from the source.", e); - } } @Override diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java index 8c3dcfc64..71f93d740 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java @@ -11,11 +11,9 @@ public class StringListQueryList implements QueryList { private final List queries; - private final List queryData; public StringListQueryList(List queries) { this.queries = queries; - this.queryData = QueryData.generate(queries); } @Override @@ -37,9 +35,4 @@ public int size() { public int hashCode() { return queries.hashCode(); } - - @Override - public QueryData getQueryData(int index) { - return queryData.get(index); - } } From d63e4fb8016ab4edf4805d0c1b27da07b3ef9268 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Tue, 24 Sep 2024 13:51:25 +0200 Subject: [PATCH 05/34] Remove unused methods --- .../org/aksw/iguana/cc/query/QueryData.java | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/QueryData.java b/src/main/java/org/aksw/iguana/cc/query/QueryData.java index 22c870ce1..02858cb8f 100644 --- a/src/main/java/org/aksw/iguana/cc/query/QueryData.java +++ b/src/main/java/org/aksw/iguana/cc/query/QueryData.java @@ -1,10 +1,7 @@ package org.aksw.iguana.cc.query; -import org.aksw.iguana.cc.query.source.QuerySource; import org.apache.jena.update.UpdateFactory; -import java.io.ByteArrayInputStream; -import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; @@ -32,19 +29,4 @@ public static List generate(Collection queries) { } return queryData; } - - public static List generate(QuerySource queries) throws IOException { - final var streams = new ArrayList(); - int bound = queries.size(); - for (int i = 0; i < bound; i++) { - InputStream queryStream = queries.getQueryStream(i); - streams.add(queryStream); - } - return generate(streams); - } - - public static List generate(List queries) { - final var streams = queries.stream().map(s -> (InputStream) new ByteArrayInputStream(s.getBytes())).toList(); - return generate(streams); - } } From a382d3e51b191c7985cbeaa3827a816b7db38e5d Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Tue, 24 Sep 2024 18:05:40 +0200 Subject: [PATCH 06/34] Add tests --- .../aksw/iguana/cc/query/QueryDataTest.java | 4 +- .../worker/impl/SPARQLProtocolWorkerTest.java | 83 +++++++++++++++++++ 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java b/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java index 19d6a7481..259dd9aac 100644 --- a/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java @@ -53,9 +53,7 @@ void testGeneration() throws IOException { final var testStrings = querySource.getAllQueries(); List> generations = List.of( - QueryData.generate(testStrings), - QueryData.generate(testStrings.stream().map(s -> (InputStream) new ByteArrayInputStream(s.getBytes())).toList()), - QueryData.generate(querySource) + QueryData.generate(testStrings.stream().map(s -> (InputStream) new ByteArrayInputStream(s.getBytes())).toList()) ); for (List generation : generations) { assertEquals(4, generation.size()); diff --git a/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java b/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java index b7d4daf71..d6c5911f8 100644 --- a/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java +++ b/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java @@ -57,15 +57,19 @@ public class SPARQLProtocolWorkerTest { .build(); private final static String QUERY = "SELECT * WHERE { ?s ?p ?o }"; + private final static String UPDATE_QUERY = "INSERT DATA { }"; private final static int QUERY_MIXES = 1; private static Path queryFile; + private static Path updateFile; private static final Logger LOGGER = LoggerFactory.getLogger(SPARQLProtocolWorker.class); @BeforeAll public static void setup() throws IOException { queryFile = Files.createTempFile("iguana-test-queries", ".tmp"); + updateFile = Files.createTempFile("iguana-test-updates", ".tmp"); Files.writeString(queryFile, QUERY, StandardCharsets.UTF_8); + Files.writeString(updateFile, QUERY + "\n\n" + UPDATE_QUERY, StandardCharsets.UTF_8); } @BeforeEach @@ -77,6 +81,7 @@ public void reset() { @AfterAll public static void cleanup() throws IOException { Files.deleteIfExists(queryFile); + Files.deleteIfExists(updateFile); SPARQLProtocolWorker.closeHttpClient(); } @@ -120,6 +125,31 @@ public static Stream requestFactoryData() throws URISyntaxException { return workers.stream(); } + public static Stream updateWorkerData() throws IOException { + final var normalEndpoint = URI.create("http://localhost:" + wm.getPort() + "/ds/query"); + final var updateEndpoint = URI.create("http://localhost:" + wm.getPort() + "/ds/update"); + final var processor = new ResponseBodyProcessor("application/sparql-results+json"); + final var format = QueryHandler.Config.Format.SEPARATOR; + final var queryHandler = new QueryHandler(new QueryHandler.Config(updateFile.toAbsolutePath().toString(), format, null, true, QueryHandler.Config.Order.LINEAR, 0L, QueryHandler.Config.Language.SPARQL)); + final var datasetConfig = new DatasetConfig("TestDS", null); + final var connection = new ConnectionConfig("TestConn", "1", datasetConfig, normalEndpoint, new ConnectionConfig.Authentication("testUser", "password"), updateEndpoint, new ConnectionConfig.Authentication("updateUser", "password")); + final var workers = new ArrayDeque(); + for (var requestType : List.of(RequestFactory.RequestType.GET_QUERY, RequestFactory.RequestType.POST_URL_ENC_QUERY, RequestFactory.RequestType.POST_QUERY)) { + final var config = new SPARQLProtocolWorker.Config( + 1, + queryHandler, + new HttpWorker.QueryMixes(QUERY_MIXES), + connection, + Duration.parse("PT6S"), + "application/sparql-results+json", + requestType, + true + ); + workers.add(Arguments.of(Named.of(requestType.name(), new SPARQLProtocolWorker(0, processor, config)))); + } + return workers.stream(); + } + public static List completionTargets() { final var out = new ArrayList(); final var queryMixesAmount = List.of(1, 2, 5, 10, 100, 200); @@ -204,6 +234,59 @@ public void testRequestFactory(SPARQLProtocolWorker worker, boolean cached) { assertNotEquals(Duration.ZERO, result.executionStats().get(0).duration(), "Worker returned zero duration"); } + @ParameterizedTest + @MethodSource("updateWorkerData") + public void testSeparateUpdateEndpoint(SPARQLProtocolWorker worker) { + final var workerConfig = worker.config(); + switch (workerConfig.requestType()) { + case GET_QUERY -> { + wm.stubFor(get(urlPathEqualTo("/ds/query")) + .withQueryParam("query", equalTo(QUERY)) + .withBasicAuth("testUser", "password") + .willReturn(aResponse().withStatus(200).withBody("Non-Empty-Body"))); + wm.stubFor(post(urlPathEqualTo("/ds/update")) + .withHeader("Content-Type", equalTo("application/sparql-update")) + .withBasicAuth("updateUser", "password") + .withRequestBody(equalTo(UPDATE_QUERY)) + .willReturn(aResponse().withStatus(200).withBody("Non-Empty-Body"))); + } + case POST_URL_ENC_QUERY -> { + wm.stubFor(post(urlPathEqualTo("/ds/query")) + .withHeader("Content-Type", equalTo("application/x-www-form-urlencoded")) + .withBasicAuth("testUser", "password") + .withRequestBody(equalTo("query=" + URLEncoder.encode(QUERY, StandardCharsets.UTF_8))) + .willReturn(aResponse().withStatus(200).withBody("Non-Empty-Body"))); + wm.stubFor(post(urlPathEqualTo("/ds/update")) + .withHeader("Content-Type", equalTo("application/x-www-form-urlencoded")) + .withBasicAuth("updateUser", "password") + .withRequestBody(equalTo("update=" + URLEncoder.encode(UPDATE_QUERY, StandardCharsets.UTF_8))) + .willReturn(aResponse().withStatus(200).withBody("Non-Empty-Body"))); + } + case POST_QUERY -> { + wm.stubFor(post(urlPathEqualTo("/ds/query")) + .withHeader("Content-Type", equalTo("application/sparql-query")) + .withBasicAuth("testUser", "password") + .withRequestBody(equalTo(QUERY)) + .willReturn(aResponse().withStatus(200).withBody("Non-Empty-Body"))); + wm.stubFor(post(urlPathEqualTo("/ds/update")) + .withHeader("Content-Type", equalTo("application/sparql-update")) + .withBasicAuth("updateUser", "password") + .withRequestBody(equalTo(UPDATE_QUERY)) + .willReturn(aResponse().withStatus(200).withBody("Non-Empty-Body"))); + } + } + final HttpWorker.Result result = worker.start().join(); + assertEquals(result.executionStats().size(), QUERY_MIXES * 2, "Worker should have executed only 1 query"); + for (var res : result.executionStats()) { + assertNull(res.error().orElse(null), "Worker threw an exception, during execution"); + assertEquals(200, res.httpStatusCode().get(), "Worker returned wrong status code"); + assertNotEquals(0, res.responseBodyHash().getAsLong(), "Worker didn't return a response body hash"); + assertEquals("Non-Empty-Body".getBytes(StandardCharsets.UTF_8).length, res.contentLength().getAsLong(), "Worker returned wrong content length"); + assertNotEquals(Duration.ZERO, res.duration(), "Worker returned zero duration"); + } + + } + @DisplayName("Test Malformed Response Processing") @ParameterizedTest(name = "[{index}] fault = {0}") @EnumSource(Fault.class) From 21cc30410a208322329fc913151534cb4f22de94 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Tue, 24 Sep 2024 18:05:46 +0200 Subject: [PATCH 07/34] Fix authentication --- .../org/aksw/iguana/cc/utils/http/RequestFactory.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java index 3392c31b7..e0853166e 100644 --- a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java +++ b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java @@ -148,10 +148,15 @@ public AsyncRequestProducer buildHttpRequest(QueryHandler.QueryStreamWrapper que // set additional headers if (acceptHeader != null) asyncRequestBuilder.addHeader("Accept", acceptHeader); - if (connectionConfig.authentication() != null && connectionConfig.authentication().user() != null) + if (queryHandle.update() && connectionConfig.updateAuthentication() != null && connectionConfig.updateAuthentication().user() != null) { asyncRequestBuilder.addHeader("Authorization", - HttpWorker.basicAuth(connectionConfig.authentication().user(), - Optional.ofNullable(connectionConfig.authentication().password()).orElse(""))); + HttpWorker.basicAuth(connectionConfig.updateAuthentication().user(), + Optional.ofNullable(connectionConfig.updateAuthentication().password()).orElse(""))); + } else if (connectionConfig.authentication() != null && connectionConfig.authentication().user() != null) { + asyncRequestBuilder.addHeader("Authorization", + HttpWorker.basicAuth(connectionConfig.authentication().user(), + Optional.ofNullable(connectionConfig.authentication().password()).orElse(""))); + } // cache request if (caching) From 870d9d96a73aace5a1eb4470ff6c633c96643f9a Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Tue, 24 Sep 2024 18:06:20 +0200 Subject: [PATCH 08/34] Cleanup --- src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java | 1 - .../aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java b/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java index 259dd9aac..3db404fcf 100644 --- a/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/QueryDataTest.java @@ -11,7 +11,6 @@ import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.List; import static org.junit.jupiter.api.Assertions.*; diff --git a/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java b/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java index d6c5911f8..6d9842fa7 100644 --- a/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java +++ b/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java @@ -290,7 +290,7 @@ public void testSeparateUpdateEndpoint(SPARQLProtocolWorker worker) { @DisplayName("Test Malformed Response Processing") @ParameterizedTest(name = "[{index}] fault = {0}") @EnumSource(Fault.class) - public void testMalformedResponseProcessing(Fault fault) throws IOException, URISyntaxException { + public void testMalformedResponseProcessing(Fault fault) throws URISyntaxException { SPARQLProtocolWorker worker = (SPARQLProtocolWorker) ((Named)requestFactoryData().toList().get(0).get()[0]).getPayload(); wm.stubFor(get(urlPathEqualTo("/ds/query")) .willReturn(aResponse().withFault(fault))); @@ -300,7 +300,7 @@ public void testMalformedResponseProcessing(Fault fault) throws IOException, URI } @Test - public void testBadHttpCodeResponse() throws IOException, URISyntaxException { + public void testBadHttpCodeResponse() throws URISyntaxException { SPARQLProtocolWorker worker = (SPARQLProtocolWorker) ((Named)requestFactoryData().toList().get(0).get()[0]).getPayload(); wm.stubFor(get(urlPathEqualTo("/ds/query")) .willReturn(aResponse().withStatus(404))); From 3aa46ed31cbc769d6b1ee090383d7692211b44ed Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:16:05 +0200 Subject: [PATCH 09/34] Fix StringListQueryList --- .../aksw/iguana/cc/query/list/impl/StringListQueryList.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java b/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java index 71f93d740..809ff6ff5 100644 --- a/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java +++ b/src/main/java/org/aksw/iguana/cc/query/list/impl/StringListQueryList.java @@ -1,9 +1,8 @@ package org.aksw.iguana.cc.query.list.impl; -import org.aksw.iguana.cc.query.QueryData; import org.aksw.iguana.cc.query.list.QueryList; +import org.aksw.iguana.commons.io.ByteArrayListInputStream; -import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.List; @@ -23,7 +22,7 @@ public String getQuery(int index) throws IOException { @Override public InputStream getQueryStream(int index) throws IOException { - return new ByteArrayInputStream(queries.get(index).getBytes()); + return new ByteArrayListInputStream(List.of(queries.get(index).getBytes())); } @Override From df06fda9af53764b5a112300c7c6dbbe219c648b Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:20:01 +0200 Subject: [PATCH 10/34] Modify QueryHandler and QueryData --- .../org/aksw/iguana/cc/query/QueryData.java | 29 ++++- .../iguana/cc/query/handler/QueryHandler.java | 122 ++++++++++++++---- .../cc/worker/impl/SPARQLProtocolWorker.java | 15 ++- .../query/handler/QueryHandlerConfigTest.java | 2 +- .../cc/query/handler/TemplateQueriesTest.java | 4 +- 5 files changed, 132 insertions(+), 40 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/QueryData.java b/src/main/java/org/aksw/iguana/cc/query/QueryData.java index 02858cb8f..3f71c0b92 100644 --- a/src/main/java/org/aksw/iguana/cc/query/QueryData.java +++ b/src/main/java/org/aksw/iguana/cc/query/QueryData.java @@ -2,6 +2,7 @@ import org.apache.jena.update.UpdateFactory; +import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; @@ -12,9 +13,15 @@ * At the moment, it only stores if the query is an update query or not. * * @param queryId The id of the query - * @param update If the query is an update query */ -public record QueryData(int queryId, boolean update) { +public record QueryData(int queryId, QueryType type, Integer templateId) { + public enum QueryType { + DEFAULT, + UPDATE, + TEMPLATE, + TEMPLATE_INSTANCE + } + public static List generate(Collection queries) { final var queryData = new ArrayList(); int i = 0; @@ -25,8 +32,24 @@ public static List generate(Collection queries) { } catch (Exception e) { update = false; } - queryData.add(new QueryData(i++, update)); + queryData.add(new QueryData(i++, update ? QueryType.UPDATE : QueryType.DEFAULT, null)); + try { + query.close(); + } catch (IOException ignored) {} } return queryData; } + + public static boolean checkUpdate(InputStream query) { + try { + UpdateFactory.read(query); // Throws an exception if the query is not an update query + return true; + } catch (Exception e) { + return false; + } + } + + public boolean update() { + return type == QueryType.UPDATE; + } } diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index be30268fa..a685dd336 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -21,17 +21,20 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.IntStream; +import java.util.stream.Stream; /** * The QueryHandler is used by every worker that extends the AbstractWorker. @@ -137,18 +140,18 @@ public String value() { } } - public record Template(@JsonProperty(required = true) URI endpoint, Long limit, Boolean save) { - public Template(URI endpoint, Long limit, Boolean save) { + public record Template(@JsonProperty(required = true) URI endpoint, Long limit, Boolean save, Boolean individualResults) { + public Template(URI endpoint, Long limit, Boolean save, Boolean individualResults) { this.endpoint = endpoint; this.limit = limit == null ? 2000 : limit; this.save = save == null || save; + this.individualResults = individualResults != null; } } } - public record QueryStringWrapper(int index, String query, boolean update) {} - - public record QueryStreamWrapper(int index, boolean cached, Supplier queryInputStreamSupplier, boolean update) {} + public record QueryStringWrapper(int index, String query, boolean update, Integer resultId) {} + public record QueryStreamWrapper(int index, boolean cached, Supplier queryInputStreamSupplier, boolean update, Integer resultId) {} protected static final Logger LOGGER = LoggerFactory.getLogger(QueryHandler.class); @@ -157,7 +160,7 @@ public record QueryStreamWrapper(int index, boolean cached, Supplier queryData; + protected List queryData; private int workerCount = 0; // give every worker inside the same worker config an offset seed @@ -186,17 +189,20 @@ public QueryHandler(Config config) throws IOException { queryList = (config.caching()) ? new FileCachingQueryList(querySource) : new FileReadingQueryList(querySource); + queryData = QueryData.generate(IntStream.range(0, queryList.size()).mapToObj(i -> { + try { + return queryList.getQueryStream(i); + } catch (IOException e) { + throw new RuntimeException("Couldn't read query stream", e); + } + }).collect(Collectors.toList())); } this.hashCode = queryList.hashCode(); - this.queryData = QueryData.generate(IntStream.range(0, queryList.size()).mapToObj(i -> { - try { - return queryList.getQueryStream(i); - } catch (IOException e) { - throw new RuntimeException("Couldn't read query stream", e); - } - }).collect(Collectors.toList())); } + private record TemplateData(List queries, int templates, int[] indices, int[] instanceNumber, int instanceStart) {} + + // TODO: storing and reading of instance file private QueryList initializeTemplateQueryHandler(QuerySource templateSource) throws IOException { QuerySource querySource = templateSource; final var originalPath = templateSource.getPath(); @@ -205,16 +211,22 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr final Path instancePath = Files.isDirectory(originalPath) ? originalPath.resolveSibling(originalPath.getFileName() + postfix) : // if the source of the query templates is a folder, the instances will be saved in a file with the same name as the folder originalPath.resolveSibling(originalPath.getFileName().toString().split("\\.")[0] + postfix); // if the source of the query templates is a file, the instances will be saved in a file with the same name as the file + TemplateData templateData; + if (Files.exists(instancePath)) { LOGGER.info("Already existing query template instances have been found and will be reused. Delete the following file to regenerate them: {}", instancePath.toAbsolutePath()); + + // TODO: change this querySource = createQuerySource(instancePath); // if the instances already exist, use them } else { - final List instances = instantiateTemplateQueries(querySource, config.template); + templateData = instantiateTemplateQueries(querySource, config.template); + + // TODO: change this if (config.template.save) { // save the instances to a file Files.createFile(instancePath); try (var writer = Files.newBufferedWriter(instancePath)) { - for (String instance : instances) { + for (String instance : templateData.queries()) { writer.write(instance); writer.newLine(); } @@ -224,8 +236,40 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr } else { // query source isn't necessary, because queries aren't stored in a file, // directly return a list of the instances instead - return new StringListQueryList(instances); + // return new StringListQueryList(templateData.queries()); } + + AtomicInteger templateIndex = new AtomicInteger(0); // index of the next template + AtomicInteger index = new AtomicInteger(0); // index of the current query + AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template + queryData = templateData.queries.stream().map( + query -> { + // once the template instances start, the template index is reset and reused for the instances + // to track to which template the instances belong + if (index.get() == templateData.instanceStart) templateIndex.set(0); + + if (index.get() >= templateData.instanceStart) { + // query is an instance of a template + + // if the instance id is equal to the number of instances for the current template, + // the next template is used + if (instanceId.get() == templateData.instanceNumber[templateIndex.get()]) { + templateIndex.getAndIncrement(); + instanceId.set(0); + } + return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex.get()); + } else if (templateIndex.get() < templateData.templates && index.get() == templateData.indices[templateIndex.get()]) { + // query is a template + templateIndex.getAndIncrement(); + return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); + } else { + // query is neither a template nor an instance + final var update = QueryData.checkUpdate(new ByteArrayInputStream(query.getBytes())); + return new QueryData(index.getAndIncrement(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); + } + } + ).toList(); + return new StringListQueryList(templateData.queries); } return (config.caching()) ? new FileCachingQueryList(querySource) : // if caching is enabled, cache the instances @@ -257,19 +301,33 @@ public QuerySelector getQuerySelectorInstance() { } public QueryStringWrapper getNextQuery(QuerySelector querySelector) throws IOException { - final var queryIndex = querySelector.getNextIndex(); - return new QueryStringWrapper(queryIndex, queryList.getQuery(queryIndex), queryData.get(queryIndex).update()); + final var queryIndex = getNextQueryIndex(querySelector); + return new QueryStringWrapper(queryIndex[0], queryList.getQuery(queryIndex[0]), queryData.get(queryIndex[0]).update(), queryIndex[1]); } public QueryStreamWrapper getNextQueryStream(QuerySelector querySelector) { - final var queryIndex = querySelector.getNextIndex(); - return new QueryStreamWrapper(queryIndex, config.caching(), () -> { + final var queryIndex = getNextQueryIndex(querySelector); + return new QueryStreamWrapper(queryIndex[0], config.caching(), () -> { try { - return this.queryList.getQueryStream(queryIndex); + return this.queryList.getQueryStream(queryIndex[0]); } catch (IOException e) { throw new RuntimeException(e); } - }, queryData.get(queryIndex).update()); + }, queryData.get(queryIndex[0]).update(), queryIndex[1]); + } + + private Integer[] getNextQueryIndex(QuerySelector querySelector) { + int queryIndex; + do { + queryIndex = querySelector.getNextIndex(); + } while (queryData.get(queryIndex).type() == QueryData.QueryType.TEMPLATE); // query templates can't be executed directly + + // if individual results are disabled, the query instance will represent the template, by using its id + Integer resultId = null; + if (queryData.get(queryIndex).type() == QueryData.QueryType.TEMPLATE_INSTANCE && !config.template().individualResults) { + resultId = queryData.get(queryIndex).templateId(); + } + return new Integer[]{ queryIndex, resultId }; } @Override @@ -327,15 +385,22 @@ public Config getConfig() { * SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "12"}
* SELECT * WHERE {?s <http://prop/2> ?o . ?o <http://exa.com> "1234"}
*/ - private static List instantiateTemplateQueries(QuerySource querySource, Config.Template config) throws IOException { - // charset for generating random varia ble names + private static TemplateData instantiateTemplateQueries(QuerySource querySource, Config.Template config) throws IOException { + // charset for generating random variable names final String charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; final Random random = new Random(); final var templateQueries = new FileCachingQueryList(querySource); final Pattern template = Pattern.compile("%%[a-zA-Z0-9_]+%%"); + final var oldQueries = new ArrayList(); final var instances = new ArrayList(); + + int templateNumber = 0; + final var indices = new ArrayList(); + final var instanceNumber = new ArrayList(); + for (int i = 0; i < templateQueries.size(); i++) { + oldQueries.add(templateQueries.getQuery(i)); // replace all variables in the query template with SPARQL variables // and store the variable names var templateQueryString = templateQueries.getQuery(i); @@ -355,7 +420,6 @@ private static List instantiateTemplateQueries(QuerySource querySource, // if no placeholders are found, the query is already a valid SPARQL query if (variables.isEmpty()) { - instances.add(templateQueryString); continue; } @@ -368,13 +432,13 @@ private static List instantiateTemplateQueries(QuerySource querySource, selectQueryString.append(" LIMIT " + config.limit()); selectQueryString.setNsPrefixes(templateQuery.getPrefixMapping()); + int count = 0; // send request to SPARQL endpoint and instantiate the template based on results try (QueryExecution exec = QueryExecutionFactory.createServiceRequest(config.endpoint().toString(), selectQueryString.asQuery())) { ResultSet resultSet = exec.execSelect(); if (!resultSet.hasNext()) { LOGGER.warn("No results for query template: {}", templateQueryString); } - int count = 0; while (resultSet.hasNext() && count++ < config.limit()) { var instance = new ParameterizedSparqlString(templateQueryString); QuerySolution solution = resultSet.next(); @@ -385,7 +449,11 @@ private static List instantiateTemplateQueries(QuerySource querySource, instances.add(instance.toString()); } } + // store the number of instances and the index of the template query + templateNumber++; + indices.add(i); + instanceNumber.add(count); } - return instances; + return new TemplateData(Stream.concat(oldQueries.stream(), instances.stream()).toList(), templateNumber, indices.stream().mapToInt(Integer::intValue).toArray(), instanceNumber.stream().mapToInt(Integer::intValue).toArray(), oldQueries.size()); } } diff --git a/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java b/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java index 565763f68..ef0ffffb8 100644 --- a/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java +++ b/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java @@ -265,6 +265,7 @@ private HttpExecutionResult executeHttpRequest(Duration timeout) { // get the next query and request final var queryHandle = config().queries().getNextQueryStream(querySelector); final int queryIndex = queryHandle.index(); + final int resultIndex = queryHandle.resultId() == null ? queryIndex : queryHandle.resultId(); final AsyncRequestProducer request; try { @@ -358,7 +359,7 @@ protected HttpExecutionResult buildResult() { // check for http error if (response.getCode() / 100 != 2) { - return createFailedResultDuringResponse(queryIndex, response, timeStamp, duration, null); + return createFailedResultDuringResponse(resultIndex, response, timeStamp, duration, null); } // check content length @@ -370,18 +371,18 @@ protected HttpExecutionResult buildResult() { if (responseSize != responseBody.size()) LOGGER.error("Error during copying the response data. (expected written data size = {}, actual written data size = {}, Content-Length-Header = {})", responseSize, responseBody.size(), contentLengthHeader.getValue()); final var exception = new HttpException(String.format("Content-Length header value doesn't match actual content length. (Content-Length-Header = %s, written data size = %s)", contentLength, config.parseResults() ? responseBody.size() : responseSize)); - return createFailedResultDuringResponse(queryIndex, response, timeStamp, duration, exception); + return createFailedResultDuringResponse(resultIndex, response, timeStamp, duration, exception); } } // check timeout if (duration.compareTo(timeout) > 0) { - return createFailedResultDuringResponse(queryIndex, response, timeStamp, duration, new TimeoutException()); + return createFailedResultDuringResponse(resultIndex, response, timeStamp, duration, new TimeoutException()); } // return successful result return new HttpExecutionResult( - queryIndex, + resultIndex, Optional.of(response), timeStamp, Duration.ofNanos(responseEnd - requestStart), @@ -402,18 +403,18 @@ protected HttpExecutionResult buildResult() { } catch (InterruptedException | ExecutionException e) { // This will close the connection and cancel the request if it's still running. future.cancel(true); - return createFailedResultBeforeRequest(queryIndex, e); + return createFailedResultBeforeRequest(resultIndex, e); } catch (TimeoutException e) { if (future.isDone()) { LOGGER.warn("Request finished immediately after timeout but will still be counted as timed out."); try { return future.get(); } catch (InterruptedException | ExecutionException ex) { - return createFailedResultBeforeRequest(queryIndex, ex); + return createFailedResultBeforeRequest(resultIndex, ex); } } else { future.cancel(true); - return createFailedResultBeforeRequest(queryIndex, e); + return createFailedResultBeforeRequest(resultIndex, e); } } } diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java index 71e256055..243fade70 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java @@ -73,7 +73,7 @@ private static Stream testDeserializationData() { QueryHandler.Config.Order.RANDOM, 42L, QueryHandler.Config.Language.SPARQL, - new QueryHandler.Config.Template(URI.create("http://example.org/sparql"), 2000L, true) + new QueryHandler.Config.Template(URI.create("http://example.org/sparql"), 2000L, true, null) ), """ {"path":"some.queries","format":"separator", "separator": "\\n", "caching":true,"order":"random","seed":42,"lang":"SPARQL","template": {"endpoint": "http://example.org/sparql"}} diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java index 2beb8bf19..2c552bdf4 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java @@ -69,7 +69,7 @@ public void testTemplateQueries() throws IOException { QueryHandler.Config.Order.LINEAR, null, QueryHandler.Config.Language.SPARQL, - new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false) + new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false, true) ); wm.stubFor(get(anyUrl()) .withQueryParam("query", matching("PREFIX\\s+rdf:\\s+\\s+SELECT\\s+DISTINCT\\s+\\?var0\\s+\\?var1\\s+\\?var2\\s+WHERE\\s+\\{\\s*\\?s\\s+rdf:type\\s+\\?var0\\s*;\\s*\\?var1\\s+\\?var2\\s*\\.\\s*\\?var2\\s+\\?p\\s+\\s*}\\s+LIMIT\\s+2000\\s*")) @@ -99,7 +99,7 @@ public void testMalformedTemplateQuery() throws IOException { QueryHandler.Config.Order.LINEAR, null, QueryHandler.Config.Language.SPARQL, - new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false) + new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false, true) ); Assertions.assertThrows(QueryParseException.class, () -> new QueryHandler(queryHandlerConfig)); } From 5c50c6903708a2c840b64db0eec5581430218c52 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:20:59 +0200 Subject: [PATCH 11/34] Add executable query count and representative query count to QueryHandler --- .../impl/AggregatedExecutionStatistics.java | 2 +- .../metrics/impl/EachExecutionStatistic.java | 2 +- .../org/aksw/iguana/cc/metrics/impl/QMPH.java | 2 +- .../iguana/cc/query/handler/QueryHandler.java | 21 +++++++++++++++---- .../tasks/impl/StresstestResultProcessor.java | 11 +++++----- .../iguana/cc/utils/http/RequestFactory.java | 4 ++-- .../cc/worker/impl/SPARQLProtocolWorker.java | 4 ++-- .../iguana/cc/mockup/MockupQueryHandler.java | 7 ++++++- .../aksw/iguana/cc/mockup/MockupWorker.java | 2 +- .../cc/query/handler/QueryHandlerTest.java | 17 +++++++-------- .../cc/query/handler/TemplateQueriesTest.java | 2 +- 11 files changed, 46 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java b/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java index 8582f2020..844168044 100644 --- a/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java +++ b/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java @@ -29,7 +29,7 @@ public AggregatedExecutionStatistics() { public Model createMetricModel(List workers, List[][] data, IRES.Factory iresFactory) { Model m = ModelFactory.createDefaultModel(); for (var worker : workers) { - for (int i = 0; i < worker.config().queries().getQueryCount(); i++) { + for (int i = 0; i < worker.config().queries().getRepresentativeQueryCount(); i++) { Resource queryRes = iresFactory.getWorkerQueryResource(worker, i); m.add(createAggregatedModel(data[(int) worker.getWorkerID()][i], queryRes)); } diff --git a/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java b/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java index c6e1bf95a..b3c3f1de3 100644 --- a/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java +++ b/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java @@ -26,7 +26,7 @@ public EachExecutionStatistic() { public Model createMetricModel(List workers, List[][] data, IRES.Factory iresFactory) { Model m = ModelFactory.createDefaultModel(); for (var worker : workers) { - for (int i = 0; i < worker.config().queries().getQueryCount(); i++) { + for (int i = 0; i < worker.config().queries().getRepresentativeQueryCount(); i++) { Resource workerQueryResource = iresFactory.getWorkerQueryResource(worker, i); Resource queryRes = IRES.getResource(worker.config().queries().getQueryId(i)); BigInteger run = BigInteger.ONE; diff --git a/src/main/java/org/aksw/iguana/cc/metrics/impl/QMPH.java b/src/main/java/org/aksw/iguana/cc/metrics/impl/QMPH.java index d2ae19143..fe0232ba8 100644 --- a/src/main/java/org/aksw/iguana/cc/metrics/impl/QMPH.java +++ b/src/main/java/org/aksw/iguana/cc/metrics/impl/QMPH.java @@ -28,7 +28,7 @@ public Number calculateTaskMetric(List workers, List[] data) { BigDecimal successes = BigDecimal.ZERO; - BigDecimal noq = BigDecimal.valueOf(worker.queries().getQueryCount()); + BigDecimal noq = BigDecimal.valueOf(worker.queries().getExecutableQueryCount()); Duration totalTime = Duration.ZERO; for (List datum : data) { for (HttpWorker.ExecutionStats exec : datum) { diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index a685dd336..8ef2aa714 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -162,6 +162,9 @@ public record QueryStreamWrapper(int index, boolean cached, Supplier queryData; + int executableQueryCount = 0; + int representativeQueryCount = 0; + private int workerCount = 0; // give every worker inside the same worker config an offset seed final protected int hashCode; @@ -196,6 +199,8 @@ public QueryHandler(Config config) throws IOException { throw new RuntimeException("Couldn't read query stream", e); } }).collect(Collectors.toList())); + executableQueryCount = queryList.size(); + representativeQueryCount = queryList.size(); } this.hashCode = queryList.hashCode(); } @@ -269,6 +274,10 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr } } ).toList(); + this.executableQueryCount = templateData.queries.size() - templateData.templates; + this.representativeQueryCount = config.template.individualResults ? + templateData.queries.size() - templateData.templates : + templateData.queries.size() - templateData.instanceStart; return new StringListQueryList(templateData.queries); } return (config.caching()) ? @@ -335,8 +344,12 @@ public int hashCode() { return hashCode; } - public int getQueryCount() { - return this.queryList.size(); + public int getExecutableQueryCount() { + return executableQueryCount; + } + + public int getRepresentativeQueryCount() { + return representativeQueryCount; } public String getQueryId(int i) { @@ -350,8 +363,8 @@ public String getQueryId(int i) { * @return String[] of query ids */ public String[] getAllQueryIds() { - String[] out = new String[queryList.size()]; - for (int i = 0; i < queryList.size(); i++) { + String[] out = new String[getRepresentativeQueryCount()]; + for (int i = 0; i < getRepresentativeQueryCount(); i++) { out[i] = getQueryId(i); } return out; diff --git a/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java b/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java index c748f3244..e23fd4595 100644 --- a/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java +++ b/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java @@ -63,8 +63,8 @@ public StresstestResultProcessor(String suiteID, this.workerQueryExecutions = new ArrayList[workers.size()][]; for (int i = 0; i < workers.size(); i++) { - this.workerQueryExecutions[i] = new ArrayList[workers.get(i).config().queries().getQueryCount()]; - for (int j = 0; j < workers.get(i).config().queries().getQueryCount(); j++) { + this.workerQueryExecutions[i] = new ArrayList[workers.get(i).config().queries().getRepresentativeQueryCount()]; + for (int j = 0; j < workers.get(i).config().queries().getRepresentativeQueryCount(); j++) { this.workerQueryExecutions[i][j] = new ArrayList<>(); } } @@ -128,7 +128,8 @@ public void calculateAndSaveMetrics(Calendar start, Calendar end) { m.add(workerRes, RDF.type, IONT.worker); m.add(workerRes, IPROP.workerID, toInfinitePrecisionIntegerLiteral(worker.getWorkerID())); m.add(workerRes, IPROP.workerType, ResourceFactory.createTypedLiteral(worker.getClass().getSimpleName())); - m.add(workerRes, IPROP.noOfQueries, toInfinitePrecisionIntegerLiteral(config.queries().getQueryCount())); + // TODO: is it executable or representative? + m.add(workerRes, IPROP.noOfQueries, toInfinitePrecisionIntegerLiteral(config.queries().getExecutableQueryCount())); m.add(workerRes, IPROP.timeOut, TimeUtils.createTypedDurationLiteral(config.timeout())); if (config.completionTarget() instanceof HttpWorker.QueryMixes) m.add(workerRes, IPROP.noOfQueryMixes, toInfinitePrecisionIntegerLiteral(((HttpWorker.QueryMixes) config.completionTarget()).number())); @@ -155,7 +156,7 @@ public void calculateAndSaveMetrics(Calendar start, Calendar end) { for (var worker : workers) { var config = worker.config(); var workerQueryIDs = config.queries().getAllQueryIds(); - for (int i = 0; i < config.queries().getQueryCount(); i++) { + for (int i = 0; i < config.queries().getRepresentativeQueryCount(); i++) { Resource workerQueryRes = iresFactory.getWorkerQueryResource(worker, i); Resource queryRes = IRES.getResource(workerQueryIDs[i]); m.add(workerQueryRes, IPROP.queryID, queryRes); @@ -257,7 +258,7 @@ private Model createMetricModel(Metric metric) { if (metric instanceof QueryMetric) { // queries grouped by worker for (var worker : workers) { - for (int i = 0; i < worker.config().queries().getQueryCount(); i++) { + for (int i = 0; i < worker.config().queries().getRepresentativeQueryCount(); i++) { Number metricValue = ((QueryMetric) metric).calculateQueryMetric(workerQueryExecutions[(int) worker.getWorkerID()][i]); if (metricValue != null) { Literal lit = ResourceFactory.createTypedLiteral(metricValue); diff --git a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java index e0853166e..ca305a0db 100644 --- a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java +++ b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java @@ -173,8 +173,8 @@ public AsyncRequestProducer buildHttpRequest(QueryHandler.QueryStreamWrapper que * @param queryHandler the query handler to preload requests for */ public void preloadRequests(QueryHandler queryHandler) { - final var selector = new LinearQuerySelector(queryHandler.getQueryCount()); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + final var selector = new LinearQuerySelector(queryHandler.getExecutableQueryCount()); + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { try { // build request and discard it buildHttpRequest(queryHandler.getNextQueryStream(selector)); diff --git a/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java b/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java index ef0ffffb8..82f5da164 100644 --- a/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java +++ b/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java @@ -174,7 +174,7 @@ public CompletableFuture start() { List executionStats = new ArrayList<>(); if (config().completionTarget() instanceof QueryMixes queryMixes) { for (int i = 0; i < queryMixes.number(); i++) { - for (int j = 0; j < config().queries().getQueryCount(); j++) { + for (int j = 0; j < config().queries().getExecutableQueryCount(); j++) { ExecutionStats execution = executeQuery(config().timeout(), false); if (execution == null) throw new RuntimeException("Execution returned null at a place, where it should have never been null."); logExecution(execution); @@ -186,7 +186,7 @@ public CompletableFuture start() { final var startNanos = System.nanoTime(); long queryExecutionCount = 0; int queryMixExecutionCount = 0; - int queryMixSize = config().queries().getQueryCount(); + int queryMixSize = config().queries().getExecutableQueryCount(); long now; while ((now = System.nanoTime()) - startNanos < ((TimeLimit) config.completionTarget()).duration().toNanos()) { final var timeLeft = ((TimeLimit) config.completionTarget()).duration().toNanos() - (now - startNanos); diff --git a/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java b/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java index 6988f0ab9..6ba0989de 100644 --- a/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java +++ b/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java @@ -30,7 +30,12 @@ public String[] getAllQueryIds() { } @Override - public int getQueryCount() { + public int getExecutableQueryCount() { + return queryNumber; + } + + @Override + public int getRepresentativeQueryCount() { return queryNumber; } diff --git a/src/test/java/org/aksw/iguana/cc/mockup/MockupWorker.java b/src/test/java/org/aksw/iguana/cc/mockup/MockupWorker.java index 9950c9f9d..7c6d171b7 100644 --- a/src/test/java/org/aksw/iguana/cc/mockup/MockupWorker.java +++ b/src/test/java/org/aksw/iguana/cc/mockup/MockupWorker.java @@ -68,7 +68,7 @@ public static List createWorkerResults(QueryHandler queries, List sourceType) thro QueryHandler queryHandler = assertDoesNotThrow(() -> mapper.readValue(json, QueryHandler.class)); final var selector = queryHandler.getQuerySelectorInstance(); assertInstanceOf(LinearQuerySelector.class, selector); - assertEquals(queries.size(), queryHandler.getQueryCount()); + assertEquals(queries.size(), queryHandler.getExecutableQueryCount()); assertNotEquals(0, queryHandler.hashCode()); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { final var wrapper = queryHandler.getNextQuery(selector); assertEquals(i, selector.getCurrentIndex()); if (FolderQuerySource.class.isAssignableFrom(sourceType)) @@ -110,9 +109,9 @@ public void testQueryStreamWrapper(String json, Class sourceType) t QueryHandler queryHandler = assertDoesNotThrow(() -> mapper.readValue(json, QueryHandler.class)); final var selector = queryHandler.getQuerySelectorInstance(); assertTrue(selector instanceof LinearQuerySelector); - assertEquals(queries.size(), queryHandler.getQueryCount()); + assertEquals(queries.size(), queryHandler.getExecutableQueryCount()); assertNotEquals(0, queryHandler.hashCode()); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { final var wrapper = queryHandler.getNextQueryStream(selector); assertEquals(i, selector.getCurrentIndex()); final var acutalQuery = new String(wrapper.queryInputStreamSupplier().get().readAllBytes(), StandardCharsets.UTF_8); @@ -131,9 +130,9 @@ public void testQueryStringWrapper(String json, Class sourceType) t QueryHandler queryHandler = assertDoesNotThrow(() -> mapper.readValue(json, QueryHandler.class)); final var selector = queryHandler.getQuerySelectorInstance(); assertInstanceOf(LinearQuerySelector.class, selector); - assertEquals(queries.size(), queryHandler.getQueryCount()); + assertEquals(queries.size(), queryHandler.getExecutableQueryCount()); assertNotEquals(0, queryHandler.hashCode()); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { final var wrapper = queryHandler.getNextQuery(selector); assertEquals(i, selector.getCurrentIndex()); if (FolderQuerySource.class.isAssignableFrom(sourceType)) @@ -151,10 +150,10 @@ public void testQueryIDs(String json, Class sourceType) { QueryHandler queryHandler = assertDoesNotThrow(() -> mapper.readValue(json, QueryHandler.class)); final var selector = queryHandler.getQuerySelectorInstance(); assertInstanceOf(LinearQuerySelector.class, selector); - assertEquals(queries.size(), queryHandler.getQueryCount()); + assertEquals(queries.size(), queryHandler.getExecutableQueryCount()); assertNotEquals(0, queryHandler.hashCode()); final var allQueryIDs = queryHandler.getAllQueryIds(); - for (int i = 0; i < queryHandler.getQueryCount(); i++) { + for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { assertEquals(queryHandler.hashCode() + ":" + i, allQueryIDs[i]); assertEquals(allQueryIDs[i], queryHandler.getQueryId(i)); } diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java index 2c552bdf4..d68d5abdb 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java @@ -79,7 +79,7 @@ public void testTemplateQueries() throws IOException { .withBody(RESPONSE_JSON))); final var queryHandler = new QueryHandler(queryHandlerConfig); final var selector = queryHandler.getQuerySelectorInstance(); - Assertions.assertEquals(2, queryHandler.getQueryCount()); + Assertions.assertEquals(2, queryHandler.getExecutableQueryCount()); for (int i = 0; i < 2; i++) { final var query = queryHandler.getNextQuery(selector); Assertions.assertEquals("PREFIX rdf: SELECT * WHERE {?s rdf:type ; . ?p }", query.query()); From 5458233537bc9a3d080961bfbb40972481661c36 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 16:35:47 +0200 Subject: [PATCH 12/34] Update the saving template instances --- .../iguana/cc/query/handler/QueryHandler.java | 114 ++++++++++-------- 1 file changed, 65 insertions(+), 49 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 8ef2aa714..9012ea832 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -207,9 +207,7 @@ public QueryHandler(Config config) throws IOException { private record TemplateData(List queries, int templates, int[] indices, int[] instanceNumber, int instanceStart) {} - // TODO: storing and reading of instance file private QueryList initializeTemplateQueryHandler(QuerySource templateSource) throws IOException { - QuerySource querySource = templateSource; final var originalPath = templateSource.getPath(); final var postfix = String.format("_instances_f%s_l%s.txt", Integer.toUnsignedString(this.config.template.endpoint.hashCode()), Integer.toUnsignedString((int) this.config.template.limit.longValue())); @@ -221,68 +219,86 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr if (Files.exists(instancePath)) { LOGGER.info("Already existing query template instances have been found and will be reused. Delete the following file to regenerate them: {}", instancePath.toAbsolutePath()); - // TODO: change this - querySource = createQuerySource(instancePath); // if the instances already exist, use them + // read in the template data + // the header contains the number of templates and the index (index doesn't count headers) of the first instance + // afterward for each template the index of the template and the number of instances are stored + String header; + try (var reader = Files.newBufferedReader(instancePath)) { + header = reader.readLine(); + Pattern digitRegex = Pattern.compile("\\d+"); + Matcher matcher = digitRegex.matcher(header); + if (!matcher.find()) throw new IOException("Invalid instance file header"); + int templates = Integer.parseInt(matcher.group()); + if (!matcher.find()) throw new IOException("Invalid instance file header"); + int instanceStart = Integer.parseInt(matcher.group()); + final var indices = new int[templates]; + final var instanceNumber = new int[templates]; + for (int i = 0; i < templates; i++) { + if (!matcher.find()) throw new IOException("Invalid instance file header"); + indices[i] = Integer.parseInt(matcher.group()); + if (!matcher.find()) throw new IOException("Invalid instance file header"); + instanceNumber[i] = Integer.parseInt(matcher.group()); + } + templateData = new TemplateData(reader.lines().toList(), templates, indices, instanceNumber, instanceStart); + } } else { - templateData = instantiateTemplateQueries(querySource, config.template); + templateData = instantiateTemplateQueries(templateSource, config.template); - // TODO: change this if (config.template.save) { // save the instances to a file Files.createFile(instancePath); + try (var writer = Files.newBufferedWriter(instancePath)) { - for (String instance : templateData.queries()) { + // write header line + writer.write(String.format("templates: %d instances_start: %d ", templateData.templates, templateData.instanceStart)); + writer.write(String.format("%s", IntStream.range(0, templateData.templates) + .mapToObj(i -> "index: " + templateData.indices[i] + " instances_count: " + templateData.instanceNumber[i]) + .collect(Collectors.joining(" ")))); + writer.newLine(); + // write queries and instances + for (String instance : templateData.queries) { writer.write(instance); writer.newLine(); } } - // create a new query source based on the new instance file - querySource = createQuerySource(instancePath); - } else { - // query source isn't necessary, because queries aren't stored in a file, - // directly return a list of the instances instead - // return new StringListQueryList(templateData.queries()); } + } - AtomicInteger templateIndex = new AtomicInteger(0); // index of the next template - AtomicInteger index = new AtomicInteger(0); // index of the current query - AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template - queryData = templateData.queries.stream().map( - query -> { - // once the template instances start, the template index is reset and reused for the instances - // to track to which template the instances belong - if (index.get() == templateData.instanceStart) templateIndex.set(0); - - if (index.get() >= templateData.instanceStart) { - // query is an instance of a template - - // if the instance id is equal to the number of instances for the current template, - // the next template is used - if (instanceId.get() == templateData.instanceNumber[templateIndex.get()]) { - templateIndex.getAndIncrement(); - instanceId.set(0); - } - return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex.get()); - } else if (templateIndex.get() < templateData.templates && index.get() == templateData.indices[templateIndex.get()]) { - // query is a template + AtomicInteger templateIndex = new AtomicInteger(0); // index of the next template + AtomicInteger index = new AtomicInteger(0); // index of the current query + AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template + queryData = templateData.queries.stream().map( + query -> { + // once the template instances start, the template index is reset and reused for the instances + // to track to which template the instances belong + if (index.get() == templateData.instanceStart) templateIndex.set(0); + + if (index.get() >= templateData.instanceStart) { + // query is an instance of a template + + // if the instance id is equal to the number of instances for the current template, + // the next template is used + if (instanceId.get() == templateData.instanceNumber[templateIndex.get()]) { templateIndex.getAndIncrement(); - return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); - } else { - // query is neither a template nor an instance - final var update = QueryData.checkUpdate(new ByteArrayInputStream(query.getBytes())); - return new QueryData(index.getAndIncrement(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); + instanceId.set(0); } + return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex.get()); + } else if (templateIndex.get() < templateData.templates && index.get() == templateData.indices[templateIndex.get()]) { + // query is a template + templateIndex.getAndIncrement(); + return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); + } else { + // query is neither a template nor an instance + final var update = QueryData.checkUpdate(new ByteArrayInputStream(query.getBytes())); + return new QueryData(index.getAndIncrement(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); } - ).toList(); - this.executableQueryCount = templateData.queries.size() - templateData.templates; - this.representativeQueryCount = config.template.individualResults ? - templateData.queries.size() - templateData.templates : - templateData.queries.size() - templateData.instanceStart; - return new StringListQueryList(templateData.queries); - } - return (config.caching()) ? - new FileCachingQueryList(querySource) : // if caching is enabled, cache the instances - new FileReadingQueryList(querySource); // if caching is disabled, read the instances from the file every time + } + ).toList(); + this.executableQueryCount = templateData.queries.size() - templateData.templates; + this.representativeQueryCount = config.template.individualResults ? + templateData.queries.size() - templateData.templates : + templateData.instanceStart; + return new StringListQueryList(templateData.queries); } /** From bfa61c4a2844c29575d680eab93dfff73d7e0143 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:03:05 +0200 Subject: [PATCH 13/34] Fix individual template instances results --- .../iguana/cc/query/handler/QueryHandler.java | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 9012ea832..6bb275aa8 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -145,7 +145,7 @@ public Template(URI endpoint, Long limit, Boolean save, Boolean individualResult this.endpoint = endpoint; this.limit = limit == null ? 2000 : limit; this.save = save == null || save; - this.individualResults = individualResults != null; + this.individualResults = individualResults == null; } } } @@ -269,6 +269,9 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template queryData = templateData.queries.stream().map( query -> { + // If "individualResults" is turned on, give the query templates to last ids, so that there aren't + // any gaps in the ids and results. + // once the template instances start, the template index is reset and reused for the instances // to track to which template the instances belong if (index.get() == templateData.instanceStart) templateIndex.set(0); @@ -282,14 +285,26 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr templateIndex.getAndIncrement(); instanceId.set(0); } + + if (config.template.individualResults) { + return new QueryData(index.getAndIncrement() - templateData.templates, QueryData.QueryType.TEMPLATE_INSTANCE, templateData.queries.size() - templateData.templates + templateIndex.get()); + } return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex.get()); } else if (templateIndex.get() < templateData.templates && index.get() == templateData.indices[templateIndex.get()]) { // query is a template + if (config.template.individualResults) { + // give the templates the last ids, so that there aren't any gaps in the ids and results + index.incrementAndGet(); + return new QueryData(templateData.queries.size() - templateData.templates + templateIndex.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); + } templateIndex.getAndIncrement(); return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); } else { // query is neither a template nor an instance final var update = QueryData.checkUpdate(new ByteArrayInputStream(query.getBytes())); + if (config.template.individualResults) { + return new QueryData(index.getAndIncrement() - templateIndex.get(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); + } return new QueryData(index.getAndIncrement(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); } } @@ -327,12 +342,12 @@ public QuerySelector getQuerySelectorInstance() { public QueryStringWrapper getNextQuery(QuerySelector querySelector) throws IOException { final var queryIndex = getNextQueryIndex(querySelector); - return new QueryStringWrapper(queryIndex[0], queryList.getQuery(queryIndex[0]), queryData.get(queryIndex[0]).update(), queryIndex[1]); + return new QueryStringWrapper(queryData.get(queryIndex[0]).queryId(), queryList.getQuery(queryIndex[0]), queryData.get(queryIndex[0]).update(), queryIndex[1]); } public QueryStreamWrapper getNextQueryStream(QuerySelector querySelector) { final var queryIndex = getNextQueryIndex(querySelector); - return new QueryStreamWrapper(queryIndex[0], config.caching(), () -> { + return new QueryStreamWrapper(queryData.get(queryIndex[0]).queryId(), config.caching(), () -> { try { return this.queryList.getQueryStream(queryIndex[0]); } catch (IOException e) { From 9ee14ed05a2b394eec5e758a88229b6d4e1add0e Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:11:22 +0200 Subject: [PATCH 14/34] Add some comments --- .../org/aksw/iguana/cc/query/handler/QueryHandler.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 6bb275aa8..e0f4f0538 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -162,8 +162,8 @@ public record QueryStreamWrapper(int index, boolean cached, Supplier queryData; - int executableQueryCount = 0; - int representativeQueryCount = 0; + int executableQueryCount = 0; // stores the number of queries that can be executed + int representativeQueryCount = 0; // stores the number of queries that are represented in the results private int workerCount = 0; // give every worker inside the same worker config an offset seed @@ -264,6 +264,7 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr } } + // initialize queryData based on the template data AtomicInteger templateIndex = new AtomicInteger(0); // index of the next template AtomicInteger index = new AtomicInteger(0); // index of the current query AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template @@ -309,6 +310,9 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr } } ).toList(); + + // set the number of queries that can be executed and the number of queries + // that are represented in the results this.executableQueryCount = templateData.queries.size() - templateData.templates; this.representativeQueryCount = config.template.individualResults ? templateData.queries.size() - templateData.templates : From d0c99c0edebbe772db4dfc86bb58693c11c82a7b Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:15:54 +0200 Subject: [PATCH 15/34] Update schema --- schema/iguana-schema.json | 3 +++ src/main/resources/iguana-schema.json | 3 +++ 2 files changed, 6 insertions(+) diff --git a/schema/iguana-schema.json b/schema/iguana-schema.json index 0ad54c1b7..cf1539f96 100644 --- a/schema/iguana-schema.json +++ b/schema/iguana-schema.json @@ -351,6 +351,9 @@ }, "save": { "type": "boolean" + }, + "individualResults": { + "type": "boolean" } }, "required": [ diff --git a/src/main/resources/iguana-schema.json b/src/main/resources/iguana-schema.json index d92fb8d67..621bc3e10 100644 --- a/src/main/resources/iguana-schema.json +++ b/src/main/resources/iguana-schema.json @@ -351,6 +351,9 @@ }, "save": { "type": "boolean" + }, + "individualResults": { + "type": "boolean" } }, "required": [ From 2c78c062c76505266c69d9c602198504d25f4359 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:16:42 +0200 Subject: [PATCH 16/34] Change default behavior of query templates --- .../java/org/aksw/iguana/cc/query/handler/QueryHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index e0f4f0538..9de16c263 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -145,7 +145,7 @@ public Template(URI endpoint, Long limit, Boolean save, Boolean individualResult this.endpoint = endpoint; this.limit = limit == null ? 2000 : limit; this.save = save == null || save; - this.individualResults = individualResults == null; + this.individualResults = individualResults != null; } } } From f868736355f64336fbf67d56b9b5427a113f3f56 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:04:54 +0200 Subject: [PATCH 17/34] Update tests --- .../query/handler/QueryHandlerConfigTest.java | 15 +++++++- .../cc/query/handler/TemplateQueriesTest.java | 35 +++++++++++++++++++ .../suite-configs/valid/config-full.yaml | 1 + 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java index 243fade70..194214776 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java @@ -73,11 +73,24 @@ private static Stream testDeserializationData() { QueryHandler.Config.Order.RANDOM, 42L, QueryHandler.Config.Language.SPARQL, - new QueryHandler.Config.Template(URI.create("http://example.org/sparql"), 2000L, true, null) + new QueryHandler.Config.Template(URI.create("http://example.org/sparql"), 2000L, true, false) ), """ {"path":"some.queries","format":"separator", "separator": "\\n", "caching":true,"order":"random","seed":42,"lang":"SPARQL","template": {"endpoint": "http://example.org/sparql"}} """ + ), + Arguments.of(new QueryHandler.Config("some.queries", + QueryHandler.Config.Format.SEPARATOR, + "\n", + true, + QueryHandler.Config.Order.RANDOM, + 42L, + QueryHandler.Config.Language.SPARQL, + new QueryHandler.Config.Template(URI.create("http://example.org/sparql"), 10L, false, true) + ), + """ + {"path":"some.queries","format":"separator", "separator": "\\n", "caching":true,"order":"random","seed":42,"lang":"SPARQL","template": {"endpoint": "http://example.org/sparql", "limit": 10, "save": false, "individualResults": true }} + """ ) ); } diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java index d68d5abdb..63d3ae4ca 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java @@ -104,4 +104,39 @@ public void testMalformedTemplateQuery() throws IOException { Assertions.assertThrows(QueryParseException.class, () -> new QueryHandler(queryHandlerConfig)); } + @Test + public void testSubsumedQueries() throws IOException { + String templateQuery = "PREFIX rdf: SELECT * WHERE {?s rdf:type %%var0%% ; %%var1%% %%var2%%. %%var2%% ?p }"; + tempTemplateFile = Files.createTempFile(parentFolder, "Template", ".txt"); + Files.writeString(tempTemplateFile, templateQuery, StandardCharsets.UTF_8); + final var queryHandlerConfig = new QueryHandler.Config( + tempTemplateFile.toString(), + QueryHandler.Config.Format.ONE_PER_LINE, + null, + true, + QueryHandler.Config.Order.LINEAR, + null, + QueryHandler.Config.Language.SPARQL, + new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false, false) + ); + wm.stubFor(get(anyUrl()) + .withQueryParam("query", matching("PREFIX\\s+rdf:\\s+\\s+SELECT\\s+DISTINCT\\s+\\?var0\\s+\\?var1\\s+\\?var2\\s+WHERE\\s+\\{\\s*\\?s\\s+rdf:type\\s+\\?var0\\s*;\\s*\\?var1\\s+\\?var2\\s*\\.\\s*\\?var2\\s+\\?p\\s+\\s*}\\s+LIMIT\\s+2000\\s*")) + .willReturn(aResponse() + .withStatus(200) + .withHeader("Content-Type", "application/sparql-results+json") + .withBody(RESPONSE_JSON))); + final var queryHandler = new QueryHandler(queryHandlerConfig); + final var selector = queryHandler.getQuerySelectorInstance(); + Assertions.assertEquals(2, queryHandler.getExecutableQueryCount()); + Assertions.assertEquals(1, queryHandler.getRepresentativeQueryCount()); + var query = queryHandler.getNextQuery(selector); + Assertions.assertEquals(0, query.resultId()); + Assertions.assertEquals(1, query.index()); + Assertions.assertFalse(query.update()); + query = queryHandler.getNextQuery(selector); + Assertions.assertEquals(0, query.resultId()); + Assertions.assertEquals(2, query.index()); + Assertions.assertFalse(query.update()); + } + } diff --git a/src/test/resources/suite-configs/valid/config-full.yaml b/src/test/resources/suite-configs/valid/config-full.yaml index ad228cc29..260add8eb 100644 --- a/src/test/resources/suite-configs/valid/config-full.yaml +++ b/src/test/resources/suite-configs/valid/config-full.yaml @@ -57,6 +57,7 @@ tasks: endpoint: "http://localhost:3030/sp2b" limit: 2000 save: false + individualResults: false timeout: 2S connection: fuseki completionTarget: From bbcae532665ff6d7007d30cceb9fe4bcd5d86dea Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:05:07 +0200 Subject: [PATCH 18/34] Fix configuration --- .../java/org/aksw/iguana/cc/query/handler/QueryHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 9de16c263..b487f0455 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -145,7 +145,7 @@ public Template(URI endpoint, Long limit, Boolean save, Boolean individualResult this.endpoint = endpoint; this.limit = limit == null ? 2000 : limit; this.save = save == null || save; - this.individualResults = individualResults != null; + this.individualResults = individualResults != null && individualResults; } } } From 779bd341c6d7f70d90c3f4b9eb8442a9f3c28cb5 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:05:15 +0200 Subject: [PATCH 19/34] Update documentation --- docs/configuration/queries.md | 16 +++++++++++----- example-suite.yml | 3 ++- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/docs/configuration/queries.md b/docs/configuration/queries.md index ffb3bf90a..25758d0b6 100644 --- a/docs/configuration/queries.md +++ b/docs/configuration/queries.md @@ -130,16 +130,21 @@ The results may look like the following: ### Configuration The `template` attribute has the following properties: -| property | required | default | description | example | -|----------|----------|---------|---------------------------------------------------------------------|-----------------------------| -| endpoint | yes | | The endpoint to query. | `http://dbpedia.org/sparql` | -| limit | no | `2000` | The maximum number of instances per query template. | `100` | -| save | no | `true` | If set to `true`, query instances will be saved in a separate file. | `false` | +| property | required | default | description | example | +|-------------------|----------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------| +| endpoint | yes | | The endpoint to query. | `http://dbpedia.org/sparql` | +| limit | no | `2000` | The maximum number of instances per query template. | `100` | +| save | no | `true` | If set to `true`, query instances will be saved in a separate file. | `false` | +| individualResults | no | `false` | If set to `true`, the results of each individual template instance will be reported, otherwise if set to `false` their results will be subsumed for the query template. | `true` | If the `save` attribute is set to `true`, the instances will be saved in a separate file in the same directory as the query templates. If the query templates are stored in a folder, the instances will be saved in the parent directory. +If the `individualResults` attribute is set to `false`, +the results of the instances will be subsumed for the query template. +The query template will then be considered as an actual query in the results. + Example of query configuration with query templates: ```yaml queries: @@ -149,4 +154,5 @@ queries: endpoint: "http://dbpedia.org/sparql" limit: 100 save: true + individualResults: true ``` diff --git a/example-suite.yml b/example-suite.yml index c4b3d2803..50d65cef2 100644 --- a/example-suite.yml +++ b/example-suite.yml @@ -75,10 +75,11 @@ tasks: requestType: post query queries: path: "./example/query_pattern.txt" - pattern: + template: endpoint: "https://dbpedia.org/sparql" limit: 1000 save: false + individualResults: false timeout: 180s completionTarget: duration: 1000s From b3d980fb905f35565f50f7cb8168413500117b3d Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:19:44 +0200 Subject: [PATCH 20/34] Add some comments (to trigger GitHub actions) --- .../aksw/iguana/cc/query/handler/QueryHandler.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index b487f0455..efce866bf 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -270,8 +270,11 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template queryData = templateData.queries.stream().map( query -> { - // If "individualResults" is turned on, give the query templates to last ids, so that there aren't - // any gaps in the ids and results. + // If "individualResults" is turned on, move the query templates outside the range of + // "representativeQueryCount" to avoid them being represented in the results. + // Otherwise, if "individualResults" is turned off, the instances need to be moved outside the range + // of "representativeQueryCount", but because "instantiateTemplateQueries" already appends the + // instances to the end of the original queries, this will already be done. // once the template instances start, the template index is reset and reused for the instances // to track to which template the instances belong @@ -432,6 +435,10 @@ public Config getConfig() { * SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "123"}
* SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "12"}
* SELECT * WHERE {?s <http://prop/2> ?o . ?o <http://exa.com> "1234"}
+ * + * The template data that this method returns will contain a list of all queries, + * where the first queries are the original queries including the query templates. + * The query instances will be appended to the original queries. */ private static TemplateData instantiateTemplateQueries(QuerySource querySource, Config.Template config) throws IOException { // charset for generating random variable names From 1c9ec738b7b045ffdd7df9940b17d124bdef0e97 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Fri, 27 Sep 2024 15:27:08 +0200 Subject: [PATCH 21/34] Fix minor bug that caused an infinite loop --- graalvm/suite.yml | 9 +++++---- .../org/aksw/iguana/cc/query/handler/QueryHandler.java | 9 +++++++++ .../org/aksw/iguana/cc/utils/http/RequestFactory.java | 3 +-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/graalvm/suite.yml b/graalvm/suite.yml index 75c9bd4c7..c3ee89d30 100644 --- a/graalvm/suite.yml +++ b/graalvm/suite.yml @@ -6,7 +6,7 @@ connections: - name: "Blazegraph" version: "1.1.1" dataset: "DatasetName" - endpoint: "http://localhost:9999/blazegraph/sparql" + endpoint: "https://dbpedia.org/sparql" authentication: user: "user" password: "test" @@ -60,13 +60,14 @@ tasks: seed: 123 lang: "SPARQL" template: - endpoint: "http://dbpedia.org/sparql" + endpoint: "https://dbpedia.org/sparql" limit: 1 save: false + individualResults: false timeout: 2s connection: Blazegraph completionTarget: - duration: 1s + duration: 0.5s acceptHeader: "application/sparql-results+json" requestType: get query parseResults: true @@ -78,7 +79,7 @@ tasks: timeout: 3m connection: Blazegraph completionTarget: - duration: 1s + duration: 0.5s requestType: get query acceptHeader: "application/sparql-results+json" - number: 1 diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index efce866bf..f1ead708c 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -347,6 +347,15 @@ public QuerySelector getQuerySelectorInstance() { throw new IllegalStateException("Unknown query selection order: " + config.order()); } + public QuerySelector getQuerySelectorInstance(Config.Order type) { + switch (type) { + case LINEAR -> { return new LinearQuerySelector(queryList.size()); } + case RANDOM -> { return new RandomQuerySelector(queryList.size(), config.seed() + workerCount++); } + } + + throw new IllegalStateException("Unknown query selection order: " + type); + } + public QueryStringWrapper getNextQuery(QuerySelector querySelector) throws IOException { final var queryIndex = getNextQueryIndex(querySelector); return new QueryStringWrapper(queryData.get(queryIndex[0]).queryId(), queryList.getQuery(queryIndex[0]), queryData.get(queryIndex[0]).update(), queryIndex[1]); diff --git a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java index ca305a0db..9d44cb95a 100644 --- a/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java +++ b/src/main/java/org/aksw/iguana/cc/utils/http/RequestFactory.java @@ -4,7 +4,6 @@ import com.fasterxml.jackson.annotation.JsonValue; import org.aksw.iguana.cc.config.elements.ConnectionConfig; import org.aksw.iguana.cc.query.handler.QueryHandler; -import org.aksw.iguana.cc.query.selector.impl.LinearQuerySelector; import org.aksw.iguana.cc.worker.HttpWorker; import org.aksw.iguana.cc.worker.impl.SPARQLProtocolWorker; import org.apache.hc.core5.http.HttpHeaders; @@ -173,7 +172,7 @@ public AsyncRequestProducer buildHttpRequest(QueryHandler.QueryStreamWrapper que * @param queryHandler the query handler to preload requests for */ public void preloadRequests(QueryHandler queryHandler) { - final var selector = new LinearQuerySelector(queryHandler.getExecutableQueryCount()); + final var selector = queryHandler.getQuerySelectorInstance(QueryHandler.Config.Order.LINEAR); for (int i = 0; i < queryHandler.getExecutableQueryCount(); i++) { try { // build request and discard it From 0beef61ab00e01b772b669b5a0c846d1d69ca120 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Wed, 6 Nov 2024 10:55:44 +0100 Subject: [PATCH 22/34] Change sparql endpoint for testing --- graalvm/suite.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graalvm/suite.yml b/graalvm/suite.yml index c3ee89d30..5ae514bd3 100644 --- a/graalvm/suite.yml +++ b/graalvm/suite.yml @@ -6,7 +6,7 @@ connections: - name: "Blazegraph" version: "1.1.1" dataset: "DatasetName" - endpoint: "https://dbpedia.org/sparql" + endpoint: "https://query.wikidata.org/" authentication: user: "user" password: "test" From eab42dfccb2bde143550f68fcca3df29e5227e0b Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Wed, 6 Nov 2024 10:56:58 +0100 Subject: [PATCH 23/34] Add javadocs --- .../iguana/cc/query/handler/QueryHandler.java | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index a06462b30..565205434 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -154,7 +154,31 @@ public Template(URI endpoint, Long limit, Boolean save, Boolean individualResult } } + /** + * Wrapper for the next query that will be executed. + * The wrapper contains the query as a string. + * The result id is only set if the query is a template instance. + * They are used to aggregate the results of multiple queries by using the same id. + * + * @param index the index of the query + * @param query the query string + * @param update whether the query is an update query + * @param resultId the query id that should be used inside the result + */ public record QueryStringWrapper(int index, String query, boolean update, Integer resultId) {} + + /** + * Wrapper for the next query that will be executed. + * The wrapper contains the query as an input stream supplier, that generates an input stream with the query. + * The result id is only set if the query is a template instance. + * They are used to aggregate the results of multiple queries by using the same id. + * + * @param index the index of the query + * @param cached whether the query is cached in memory + * @param queryInputStreamSupplier the supplier that generates the input stream with the query + * @param update whether the query is an update query + * @param resultId the query id that should be used inside the result + */ public record QueryStreamWrapper(int index, boolean cached, Supplier queryInputStreamSupplier, boolean update, Integer resultId) {} From f595d225cdacf7ef44613806f7857a8184ec4df0 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Wed, 6 Nov 2024 10:58:28 +0100 Subject: [PATCH 24/34] Refactor attribute name --- .../impl/AggregatedExecutionStatistics.java | 2 +- .../metrics/impl/EachExecutionStatistic.java | 2 +- .../iguana/cc/query/handler/QueryHandler.java | 23 ++++++++----------- .../tasks/impl/StresstestResultProcessor.java | 8 +++---- .../iguana/cc/mockup/MockupQueryHandler.java | 2 +- .../cc/query/handler/TemplateQueriesTest.java | 2 +- 6 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java b/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java index 844168044..72ca4bf5e 100644 --- a/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java +++ b/src/main/java/org/aksw/iguana/cc/metrics/impl/AggregatedExecutionStatistics.java @@ -29,7 +29,7 @@ public AggregatedExecutionStatistics() { public Model createMetricModel(List workers, List[][] data, IRES.Factory iresFactory) { Model m = ModelFactory.createDefaultModel(); for (var worker : workers) { - for (int i = 0; i < worker.config().queries().getRepresentativeQueryCount(); i++) { + for (int i = 0; i < worker.config().queries().getRepresentedQueryCount(); i++) { Resource queryRes = iresFactory.getWorkerQueryResource(worker, i); m.add(createAggregatedModel(data[(int) worker.getWorkerID()][i], queryRes)); } diff --git a/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java b/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java index b3c3f1de3..ce45b63d7 100644 --- a/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java +++ b/src/main/java/org/aksw/iguana/cc/metrics/impl/EachExecutionStatistic.java @@ -26,7 +26,7 @@ public EachExecutionStatistic() { public Model createMetricModel(List workers, List[][] data, IRES.Factory iresFactory) { Model m = ModelFactory.createDefaultModel(); for (var worker : workers) { - for (int i = 0; i < worker.config().queries().getRepresentativeQueryCount(); i++) { + for (int i = 0; i < worker.config().queries().getRepresentedQueryCount(); i++) { Resource workerQueryResource = iresFactory.getWorkerQueryResource(worker, i); Resource queryRes = IRES.getResource(worker.config().queries().getQueryId(i)); BigInteger run = BigInteger.ONE; diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 565205434..34a3ebb1b 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -19,9 +19,6 @@ import org.aksw.iguana.cc.query.source.impl.FolderQuerySource; import org.apache.jena.query.*; import org.apache.jena.sparql.exec.http.QueryExecutionHTTP; -import org.apache.jena.sparql.exec.http.QueryExecutionHTTPBuilder; -import org.apache.jena.sparql.service.single.ServiceExecutor; -import org.apache.jena.sparql.service.single.ServiceExecutorHttp; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -190,8 +187,8 @@ public record QueryStreamWrapper(int index, boolean cached, Supplier queryData; - int executableQueryCount = 0; // stores the number of queries that can be executed - int representativeQueryCount = 0; // stores the number of queries that are represented in the results + private int executableQueryCount = 0; // stores the number of queries that can be executed + private int representedQueryCount = 0; // stores the number of queries that are represented in the results private int workerCount = 0; // give every worker inside the same worker config an offset seed private int totalWorkerCount = 0; @@ -229,7 +226,7 @@ public QueryHandler(Config config) throws IOException { } }).collect(Collectors.toList())); executableQueryCount = queryList.size(); - representativeQueryCount = queryList.size(); + representedQueryCount = queryList.size(); } this.hashCode = queryList.hashCode(); } @@ -304,9 +301,9 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr queryData = templateData.queries.stream().map( query -> { // If "individualResults" is turned on, move the query templates outside the range of - // "representativeQueryCount" to avoid them being represented in the results. + // "representedQueryCount" to avoid them being represented in the results. // Otherwise, if "individualResults" is turned off, the instances need to be moved outside the range - // of "representativeQueryCount", but because "instantiateTemplateQueries" already appends the + // of "representedQueryCount", but because "instantiateTemplateQueries" already appends the // instances to the end of the original queries, this will already be done. // once the template instances start, the template index is reset and reused for the instances @@ -350,7 +347,7 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr // set the number of queries that can be executed and the number of queries // that are represented in the results this.executableQueryCount = templateData.queries.size() - templateData.templates; - this.representativeQueryCount = config.template.individualResults ? + this.representedQueryCount = config.template.individualResults ? templateData.queries.size() - templateData.templates : templateData.instanceStart; return new StringListQueryList(templateData.queries); @@ -428,8 +425,8 @@ public int getExecutableQueryCount() { return executableQueryCount; } - public int getRepresentativeQueryCount() { - return representativeQueryCount; + public int getRepresentedQueryCount() { + return representedQueryCount; } public String getQueryId(int i) { @@ -443,8 +440,8 @@ public String getQueryId(int i) { * @return String[] of query ids */ public String[] getAllQueryIds() { - String[] out = new String[getRepresentativeQueryCount()]; - for (int i = 0; i < getRepresentativeQueryCount(); i++) { + String[] out = new String[getRepresentedQueryCount()]; + for (int i = 0; i < getRepresentedQueryCount(); i++) { out[i] = getQueryId(i); } return out; diff --git a/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java b/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java index e23fd4595..3b0cac041 100644 --- a/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java +++ b/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java @@ -63,8 +63,8 @@ public StresstestResultProcessor(String suiteID, this.workerQueryExecutions = new ArrayList[workers.size()][]; for (int i = 0; i < workers.size(); i++) { - this.workerQueryExecutions[i] = new ArrayList[workers.get(i).config().queries().getRepresentativeQueryCount()]; - for (int j = 0; j < workers.get(i).config().queries().getRepresentativeQueryCount(); j++) { + this.workerQueryExecutions[i] = new ArrayList[workers.get(i).config().queries().getRepresentedQueryCount()]; + for (int j = 0; j < workers.get(i).config().queries().getRepresentedQueryCount(); j++) { this.workerQueryExecutions[i][j] = new ArrayList<>(); } } @@ -156,7 +156,7 @@ public void calculateAndSaveMetrics(Calendar start, Calendar end) { for (var worker : workers) { var config = worker.config(); var workerQueryIDs = config.queries().getAllQueryIds(); - for (int i = 0; i < config.queries().getRepresentativeQueryCount(); i++) { + for (int i = 0; i < config.queries().getRepresentedQueryCount(); i++) { Resource workerQueryRes = iresFactory.getWorkerQueryResource(worker, i); Resource queryRes = IRES.getResource(workerQueryIDs[i]); m.add(workerQueryRes, IPROP.queryID, queryRes); @@ -258,7 +258,7 @@ private Model createMetricModel(Metric metric) { if (metric instanceof QueryMetric) { // queries grouped by worker for (var worker : workers) { - for (int i = 0; i < worker.config().queries().getRepresentativeQueryCount(); i++) { + for (int i = 0; i < worker.config().queries().getRepresentedQueryCount(); i++) { Number metricValue = ((QueryMetric) metric).calculateQueryMetric(workerQueryExecutions[(int) worker.getWorkerID()][i]); if (metricValue != null) { Literal lit = ResourceFactory.createTypedLiteral(metricValue); diff --git a/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java b/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java index 6ba0989de..804fb4bb9 100644 --- a/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java +++ b/src/test/java/org/aksw/iguana/cc/mockup/MockupQueryHandler.java @@ -35,7 +35,7 @@ public int getExecutableQueryCount() { } @Override - public int getRepresentativeQueryCount() { + public int getRepresentedQueryCount() { return queryNumber; } diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java index 63d3ae4ca..0415b0a5b 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java @@ -128,7 +128,7 @@ public void testSubsumedQueries() throws IOException { final var queryHandler = new QueryHandler(queryHandlerConfig); final var selector = queryHandler.getQuerySelectorInstance(); Assertions.assertEquals(2, queryHandler.getExecutableQueryCount()); - Assertions.assertEquals(1, queryHandler.getRepresentativeQueryCount()); + Assertions.assertEquals(1, queryHandler.getRepresentedQueryCount()); var query = queryHandler.getNextQuery(selector); Assertions.assertEquals(0, query.resultId()); Assertions.assertEquals(1, query.index()); From c93768d2e35fdce26ae2ae4e8649fff7ce001ab0 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Wed, 6 Nov 2024 11:05:14 +0100 Subject: [PATCH 25/34] Refactor method name --- src/main/java/org/aksw/iguana/cc/query/QueryData.java | 1 + .../java/org/aksw/iguana/cc/query/handler/QueryHandler.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/QueryData.java b/src/main/java/org/aksw/iguana/cc/query/QueryData.java index 3f71c0b92..617e8981b 100644 --- a/src/main/java/org/aksw/iguana/cc/query/QueryData.java +++ b/src/main/java/org/aksw/iguana/cc/query/QueryData.java @@ -41,6 +41,7 @@ public static List generate(Collection queries) { } public static boolean checkUpdate(InputStream query) { + public static boolean checkIfUpdate(InputStream query) { try { UpdateFactory.read(query); // Throws an exception if the query is not an update query return true; diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 34a3ebb1b..8401a3370 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -335,7 +335,7 @@ private QueryList initializeTemplateQueryHandler(QuerySource templateSource) thr return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); } else { // query is neither a template nor an instance - final var update = QueryData.checkUpdate(new ByteArrayInputStream(query.getBytes())); + final var update = QueryData.checkIfUpdate(new ByteArrayInputStream(query.getBytes())); if (config.template.individualResults) { return new QueryData(index.getAndIncrement() - templateIndex.get(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); } From 088419520adde93d66e6c32d44e63ec5ef353183 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Wed, 6 Nov 2024 11:05:25 +0100 Subject: [PATCH 26/34] Add more javadocs --- .../java/org/aksw/iguana/cc/query/QueryData.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/QueryData.java b/src/main/java/org/aksw/iguana/cc/query/QueryData.java index 617e8981b..70ecce540 100644 --- a/src/main/java/org/aksw/iguana/cc/query/QueryData.java +++ b/src/main/java/org/aksw/iguana/cc/query/QueryData.java @@ -22,6 +22,14 @@ public enum QueryType { TEMPLATE_INSTANCE } + /** + * Generates a list of QueryData objects for a collection of queries. + * The method uses the Jena library to check if the query is an update query. + * It only checks if the query is an update query or not and sets their index in the order they were given. + * + * @param queries collection of input streams of queries + * @return list of QueryData objects + */ public static List generate(Collection queries) { final var queryData = new ArrayList(); int i = 0; @@ -40,7 +48,13 @@ public static List generate(Collection queries) { return queryData; } - public static boolean checkUpdate(InputStream query) { + /** + * Checks if the given query is an update query. + * The method uses the Jena library to check if the query is an update query. + * + * @param query input stream of the query + * @return true if the query is an update query, false otherwise + */ public static boolean checkIfUpdate(InputStream query) { try { UpdateFactory.read(query); // Throws an exception if the query is not an update query From 8489f6ee9a1412475c2717cef61f9d5ff5d6e4db Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Wed, 6 Nov 2024 16:42:24 +0100 Subject: [PATCH 27/34] Update src/main/java/org/aksw/iguana/cc/query/QueryData.java Co-authored-by: Alexander Bigerl --- src/main/java/org/aksw/iguana/cc/query/QueryData.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/QueryData.java b/src/main/java/org/aksw/iguana/cc/query/QueryData.java index 70ecce540..a43799953 100644 --- a/src/main/java/org/aksw/iguana/cc/query/QueryData.java +++ b/src/main/java/org/aksw/iguana/cc/query/QueryData.java @@ -19,7 +19,8 @@ public enum QueryType { DEFAULT, UPDATE, TEMPLATE, - TEMPLATE_INSTANCE + TEMPLATE_INSTANCE_DEFAULT, + TEMPLATE_INSTANCE_UPDATE } /** From 156d3abfa759b4c62755de9a7c5ab307442f64ca Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Wed, 6 Nov 2024 16:46:40 +0100 Subject: [PATCH 28/34] Revert "Update src/main/java/org/aksw/iguana/cc/query/QueryData.java" This reverts commit 8489f6ee9a1412475c2717cef61f9d5ff5d6e4db. --- src/main/java/org/aksw/iguana/cc/query/QueryData.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/QueryData.java b/src/main/java/org/aksw/iguana/cc/query/QueryData.java index a43799953..70ecce540 100644 --- a/src/main/java/org/aksw/iguana/cc/query/QueryData.java +++ b/src/main/java/org/aksw/iguana/cc/query/QueryData.java @@ -19,8 +19,7 @@ public enum QueryType { DEFAULT, UPDATE, TEMPLATE, - TEMPLATE_INSTANCE_DEFAULT, - TEMPLATE_INSTANCE_UPDATE + TEMPLATE_INSTANCE } /** From 0480bae8bf839b9bd99034b2b999048c4ade112e Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Fri, 8 Nov 2024 15:21:21 +0100 Subject: [PATCH 29/34] Delegate handling of query templates to an extra class --- .../iguana/cc/query/handler/QueryHandler.java | 226 +-------------- .../cc/query/handler/TemplateHandler.java | 266 ++++++++++++++++++ 2 files changed, 271 insertions(+), 221 deletions(-) create mode 100644 src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 8401a3370..903f63d1a 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -6,7 +6,6 @@ import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.deser.std.StdDeserializer; import org.aksw.iguana.cc.query.QueryData; -import org.aksw.iguana.cc.query.list.impl.StringListQueryList; import org.aksw.iguana.cc.query.selector.QuerySelector; import org.aksw.iguana.cc.query.selector.impl.LinearQuerySelector; import org.aksw.iguana.cc.query.selector.impl.RandomQuerySelector; @@ -17,25 +16,17 @@ import org.aksw.iguana.cc.query.source.impl.FileLineQuerySource; import org.aksw.iguana.cc.query.source.impl.FileSeparatorQuerySource; import org.aksw.iguana.cc.query.source.impl.FolderQuerySource; -import org.apache.jena.query.*; -import org.apache.jena.sparql.exec.http.QueryExecutionHTTP; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URI; -import java.nio.file.Files; import java.nio.file.Path; import java.util.*; -import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.IntStream; -import java.util.stream.Stream; /** * The QueryHandler is used by every worker that extends the AbstractWorker. @@ -213,7 +204,11 @@ public QueryHandler(Config config) throws IOException { // initialize queryList based on the given configuration if (config.template() != null) { - queryList = initializeTemplateQueryHandler(querySource); + final var templateHandler = new TemplateHandler(config.template); + queryList = templateHandler.initializeTemplateQueryHandler(querySource); + queryData = templateHandler.getQueryData(); + executableQueryCount = templateHandler.getExecutableQueryCount(); + representedQueryCount = templateHandler.getRepresentedQueryCount(); } else { queryList = (config.caching()) ? new FileCachingQueryList(querySource) : @@ -235,124 +230,6 @@ public void setTotalWorkerCount(int workers) { this.totalWorkerCount = workers; } - private record TemplateData(List queries, int templates, int[] indices, int[] instanceNumber, int instanceStart) {} - - private QueryList initializeTemplateQueryHandler(QuerySource templateSource) throws IOException { - final var originalPath = templateSource.getPath(); - final var postfix = String.format("_instances_f%s_l%s.txt", - Integer.toUnsignedString(this.config.template.endpoint.hashCode()), Integer.toUnsignedString((int) this.config.template.limit.longValue())); - final Path instancePath = Files.isDirectory(originalPath) ? - originalPath.resolveSibling(originalPath.getFileName() + postfix) : // if the source of the query templates is a folder, the instances will be saved in a file with the same name as the folder - originalPath.resolveSibling(originalPath.getFileName().toString().split("\\.")[0] + postfix); // if the source of the query templates is a file, the instances will be saved in a file with the same name as the file - TemplateData templateData; - - if (Files.exists(instancePath)) { - LOGGER.info("Already existing query template instances have been found and will be reused. Delete the following file to regenerate them: {}", instancePath.toAbsolutePath()); - - // read in the template data - // the header contains the number of templates and the index (index doesn't count headers) of the first instance - // afterward for each template the index of the template and the number of instances are stored - String header; - try (var reader = Files.newBufferedReader(instancePath)) { - header = reader.readLine(); - Pattern digitRegex = Pattern.compile("\\d+"); - Matcher matcher = digitRegex.matcher(header); - if (!matcher.find()) throw new IOException("Invalid instance file header"); - int templates = Integer.parseInt(matcher.group()); - if (!matcher.find()) throw new IOException("Invalid instance file header"); - int instanceStart = Integer.parseInt(matcher.group()); - final var indices = new int[templates]; - final var instanceNumber = new int[templates]; - for (int i = 0; i < templates; i++) { - if (!matcher.find()) throw new IOException("Invalid instance file header"); - indices[i] = Integer.parseInt(matcher.group()); - if (!matcher.find()) throw new IOException("Invalid instance file header"); - instanceNumber[i] = Integer.parseInt(matcher.group()); - } - templateData = new TemplateData(reader.lines().toList(), templates, indices, instanceNumber, instanceStart); - } - } else { - templateData = instantiateTemplateQueries(templateSource, config.template); - - if (config.template.save) { - // save the instances to a file - Files.createFile(instancePath); - - try (var writer = Files.newBufferedWriter(instancePath)) { - // write header line - writer.write(String.format("templates: %d instances_start: %d ", templateData.templates, templateData.instanceStart)); - writer.write(String.format("%s", IntStream.range(0, templateData.templates) - .mapToObj(i -> "index: " + templateData.indices[i] + " instances_count: " + templateData.instanceNumber[i]) - .collect(Collectors.joining(" ")))); - writer.newLine(); - // write queries and instances - for (String instance : templateData.queries) { - writer.write(instance); - writer.newLine(); - } - } - } - } - - // initialize queryData based on the template data - AtomicInteger templateIndex = new AtomicInteger(0); // index of the next template - AtomicInteger index = new AtomicInteger(0); // index of the current query - AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template - queryData = templateData.queries.stream().map( - query -> { - // If "individualResults" is turned on, move the query templates outside the range of - // "representedQueryCount" to avoid them being represented in the results. - // Otherwise, if "individualResults" is turned off, the instances need to be moved outside the range - // of "representedQueryCount", but because "instantiateTemplateQueries" already appends the - // instances to the end of the original queries, this will already be done. - - // once the template instances start, the template index is reset and reused for the instances - // to track to which template the instances belong - if (index.get() == templateData.instanceStart) templateIndex.set(0); - - if (index.get() >= templateData.instanceStart) { - // query is an instance of a template - - // if the instance id is equal to the number of instances for the current template, - // the next template is used - if (instanceId.get() == templateData.instanceNumber[templateIndex.get()]) { - templateIndex.getAndIncrement(); - instanceId.set(0); - } - - if (config.template.individualResults) { - return new QueryData(index.getAndIncrement() - templateData.templates, QueryData.QueryType.TEMPLATE_INSTANCE, templateData.queries.size() - templateData.templates + templateIndex.get()); - } - return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex.get()); - } else if (templateIndex.get() < templateData.templates && index.get() == templateData.indices[templateIndex.get()]) { - // query is a template - if (config.template.individualResults) { - // give the templates the last ids, so that there aren't any gaps in the ids and results - index.incrementAndGet(); - return new QueryData(templateData.queries.size() - templateData.templates + templateIndex.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); - } - templateIndex.getAndIncrement(); - return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); - } else { - // query is neither a template nor an instance - final var update = QueryData.checkIfUpdate(new ByteArrayInputStream(query.getBytes())); - if (config.template.individualResults) { - return new QueryData(index.getAndIncrement() - templateIndex.get(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); - } - return new QueryData(index.getAndIncrement(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); - } - } - ).toList(); - - // set the number of queries that can be executed and the number of queries - // that are represented in the results - this.executableQueryCount = templateData.queries.size() - templateData.templates; - this.representedQueryCount = config.template.individualResults ? - templateData.queries.size() - templateData.templates : - templateData.instanceStart; - return new StringListQueryList(templateData.queries); - } - /** * Creates a QuerySource based on the given path and the format in the configuration. * @@ -457,97 +334,4 @@ public Config getConfig() { } - /** - * Query templates are queries containing placeholders for some terms. - * Replacement candidates are identified by querying a given endpoint. - * This is done in a way that the resulting queries will yield results against endpoints with the same data. - * The placeholders are written in the form of %%var[0-9]+%%, where [0-9]+ - * represents any number. - *

- * Exemplary template:
- * SELECT * WHERE {?s %%var1%% ?o . ?o <http://exa.com> %%var2%%}
- * This template will then be converted to:
- * SELECT ?var1 ?var2 {?s ?var1 ?o . ?o <http://exa.com> ?var2}
- * and will request query solutions from the given sparql endpoint (e.g DBpedia).
- * The solutions will then be instantiated into the template. - * The result may look like the following:
- * SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "123"}
- * SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "12"}
- * SELECT * WHERE {?s <http://prop/2> ?o . ?o <http://exa.com> "1234"}
- * - * The template data that this method returns will contain a list of all queries, - * where the first queries are the original queries including the query templates. - * The query instances will be appended to the original queries. - */ - private static TemplateData instantiateTemplateQueries(QuerySource querySource, Config.Template config) throws IOException { - // charset for generating random variable names - final String charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; - final Random random = new Random(); - - final var templateQueries = new FileCachingQueryList(querySource); - final Pattern template = Pattern.compile("%%[a-zA-Z0-9_]+%%"); - final var oldQueries = new ArrayList(); - final var instances = new ArrayList(); - - int templateNumber = 0; - final var indices = new ArrayList(); - final var instanceNumber = new ArrayList(); - - for (int i = 0; i < templateQueries.size(); i++) { - oldQueries.add(templateQueries.getQuery(i)); - // replace all variables in the query template with SPARQL variables - // and store the variable names - var templateQueryString = templateQueries.getQuery(i); - final Matcher matcher = template.matcher(templateQueryString); - final var variables = new LinkedHashMap(); // a set, that preserves insertion order - while (matcher.find()) { - final var match = matcher.group(); - if (variables.containsKey(match)) continue; - String variableName = match.replaceAll("%%", ""); - while (templateQueryString.contains("?" + variableName) || templateQueryString.contains("$" + variableName)) { // generate random variable name with 20 characters until it is unique - variableName = IntStream.range(0, 20).mapToObj(m -> String.valueOf(charset.charAt(random.nextInt(charset.length())))).collect(Collectors.joining()); - } - final var variable = "?" + variableName; - variables.put(match, variable); - templateQueryString = templateQueryString.replaceAll(match, variable); - } - - // if no placeholders are found, the query is already a valid SPARQL query - if (variables.isEmpty()) { - continue; - } - - // build SELECT query for finding bindings for the variables - final var templateQuery = QueryFactory.create(templateQueryString); - final var whereClause = "WHERE " + templateQuery.getQueryPattern(); - final var selectQueryString = new ParameterizedSparqlString(); - selectQueryString.setCommandText("SELECT DISTINCT " + String.join(" ", variables.values())); - selectQueryString.append(" " + whereClause); - selectQueryString.append(" LIMIT " + config.limit()); - selectQueryString.setNsPrefixes(templateQuery.getPrefixMapping()); - - int count = 0; - // send request to SPARQL endpoint and instantiate the template based on results - try (QueryExecution exec = QueryExecutionHTTP.service(config.endpoint().toString(), selectQueryString.asQuery())) { - ResultSet resultSet = exec.execSelect(); - if (!resultSet.hasNext()) { - LOGGER.warn("No results for query template: {}", templateQueryString); - } - while (resultSet.hasNext() && count++ < config.limit()) { - var instance = new ParameterizedSparqlString(templateQueryString); - QuerySolution solution = resultSet.next(); - for (String var : resultSet.getResultVars()) { - instance.clearParam(var); - instance.setParam(var, solution.get(var)); - } - instances.add(instance.toString()); - } - } - // store the number of instances and the index of the template query - templateNumber++; - indices.add(i); - instanceNumber.add(count); - } - return new TemplateData(Stream.concat(oldQueries.stream(), instances.stream()).toList(), templateNumber, indices.stream().mapToInt(Integer::intValue).toArray(), instanceNumber.stream().mapToInt(Integer::intValue).toArray(), oldQueries.size()); - } } diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java new file mode 100644 index 000000000..6525e5f58 --- /dev/null +++ b/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java @@ -0,0 +1,266 @@ +package org.aksw.iguana.cc.query.handler; + +import org.aksw.iguana.cc.query.QueryData; +import org.aksw.iguana.cc.query.list.QueryList; +import org.aksw.iguana.cc.query.list.impl.FileCachingQueryList; +import org.aksw.iguana.cc.query.list.impl.StringListQueryList; +import org.aksw.iguana.cc.query.source.QuerySource; +import org.apache.jena.query.*; +import org.apache.jena.sparql.exec.http.QueryExecutionHTTP; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Random; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +public class TemplateHandler { + private record TemplateData(List queries, int templates, int[] indices, int[] instanceNumber, int instanceStart) {} + + private static final Logger LOGGER = LoggerFactory.getLogger(TemplateHandler.class); + + private List queryData; + private int executableQueryCount = 0; // stores the number of queries that can be executed + private int representedQueryCount = 0; // stores the number of queries that are represented in the results + private final QueryHandler.Config.Template templateConfig; + + + public TemplateHandler(QueryHandler.Config.Template templateConfig) { + queryData = new ArrayList<>(); + this.templateConfig = templateConfig; + } + + public QueryList initializeTemplateQueryHandler(QuerySource templateSource) throws IOException { + final var originalPath = templateSource.getPath(); + final var postfix = String.format("_instances_f%s_l%s.txt", + Integer.toUnsignedString(templateConfig.endpoint().hashCode()), Integer.toUnsignedString((int) templateConfig.limit().longValue())); + final Path instancePath = Files.isDirectory(originalPath) ? + originalPath.resolveSibling(originalPath.getFileName() + postfix) : // if the source of the query templates is a folder, the instances will be saved in a file with the same name as the folder + originalPath.resolveSibling(originalPath.getFileName().toString().split("\\.")[0] + postfix); // if the source of the query templates is a file, the instances will be saved in a file with the same name as the file + TemplateData templateData; + + if (Files.exists(instancePath)) { + LOGGER.info("Already existing query template instances have been found and will be reused. Delete the following file to regenerate them: {}", instancePath.toAbsolutePath()); + + // read in the template data + // the header contains the number of templates and the index (index doesn't count headers) of the first instance + // afterward for each template the index of the template and the number of instances are stored + String header; + try (var reader = Files.newBufferedReader(instancePath)) { + header = reader.readLine(); + Pattern digitRegex = Pattern.compile("\\d+"); + Matcher matcher = digitRegex.matcher(header); + if (!matcher.find()) throw new IOException("Invalid instance file header"); + int templates = Integer.parseInt(matcher.group()); + if (!matcher.find()) throw new IOException("Invalid instance file header"); + int instanceStart = Integer.parseInt(matcher.group()); + final var indices = new int[templates]; + final var instanceNumber = new int[templates]; + for (int i = 0; i < templates; i++) { + if (!matcher.find()) throw new IOException("Invalid instance file header"); + indices[i] = Integer.parseInt(matcher.group()); + if (!matcher.find()) throw new IOException("Invalid instance file header"); + instanceNumber[i] = Integer.parseInt(matcher.group()); + } + templateData = new TemplateData(reader.lines().toList(), templates, indices, instanceNumber, instanceStart); + } + } else { + templateData = instantiateTemplateQueries(templateSource, templateConfig); + + if (templateConfig.save()) { + // save the instances to a file + Files.createFile(instancePath); + + try (var writer = Files.newBufferedWriter(instancePath)) { + // write header line + writer.write(String.format("templates: %d instances_start: %d ", templateData.templates, templateData.instanceStart)); + writer.write(String.format("%s", IntStream.range(0, templateData.templates) + .mapToObj(i -> "index: " + templateData.indices[i] + " instances_count: " + templateData.instanceNumber[i]) + .collect(Collectors.joining(" ")))); + writer.newLine(); + // write queries and instances + for (String instance : templateData.queries) { + writer.write(instance); + writer.newLine(); + } + } + } + } + + // initialize queryData based on the template data + AtomicInteger templateIndex = new AtomicInteger(0); // index of the next template + AtomicInteger index = new AtomicInteger(0); // index of the current query + AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template + queryData = templateData.queries.stream().map( + query -> { + // If "individualResults" is turned on, move the query templates outside the range of + // "representedQueryCount" to avoid them being represented in the results. + // Otherwise, if "individualResults" is turned off, the instances need to be moved outside the range + // of "representedQueryCount", but because "instantiateTemplateQueries" already appends the + // instances to the end of the original queries, this will already be done. + + // once the template instances start, the template index is reset and reused for the instances + // to track where the template instances belong + if (index.get() == templateData.instanceStart) templateIndex.set(0); + + if (index.get() >= templateData.instanceStart) { + // query is an instance of a template + + // if the instance id is equal to the number of instances for the current template, + // the next template is used + if (instanceId.get() == templateData.instanceNumber[templateIndex.get()]) { + templateIndex.getAndIncrement(); + instanceId.set(0); + } + + if (templateConfig.individualResults()) { + return new QueryData(index.getAndIncrement() - templateData.templates, QueryData.QueryType.TEMPLATE_INSTANCE, templateData.queries.size() - templateData.templates + templateIndex.get()); + } + return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex.get()); + } else if (templateIndex.get() < templateData.templates && index.get() == templateData.indices[templateIndex.get()]) { + // query is a template + if (templateConfig.individualResults()) { + // give the templates the last ids, so that there aren't any gaps in the ids and results + index.incrementAndGet(); + return new QueryData(templateData.queries.size() - templateData.templates + templateIndex.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); + } + templateIndex.getAndIncrement(); + return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); + } else { + // query is neither a template nor an instance + final var update = QueryData.checkIfUpdate(new ByteArrayInputStream(query.getBytes())); + if (templateConfig.individualResults()) { + return new QueryData(index.getAndIncrement() - templateIndex.get(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); + } + return new QueryData(index.getAndIncrement(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); + } + } + ).toList(); + + // set the number of queries that can be executed and the number of queries + // that are represented in the results + this.executableQueryCount = templateData.queries.size() - templateData.templates; + this.representedQueryCount = templateConfig.individualResults() ? + templateData.queries.size() - templateData.templates : + templateData.instanceStart; + return new StringListQueryList(templateData.queries); + } + + + /** + * Query templates are queries containing placeholders for some terms. + * Replacement candidates are identified by querying a given endpoint. + * This is done in a way that the resulting queries will yield results against endpoints with the same data. + * The placeholders are written in the form of %%var[0-9]+%%, where [0-9]+ + * represents any number. + *

+ * Exemplary template:
+ * SELECT * WHERE {?s %%var1%% ?o . ?o <http://exa.com> %%var2%%}
+ * This template will then be converted to:
+ * SELECT ?var1 ?var2 {?s ?var1 ?o . ?o <http://exa.com> ?var2}
+ * and will request query solutions from the given sparql endpoint (e.g DBpedia).
+ * The solutions will then be instantiated into the template. + * The result may look like the following:
+ * SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "123"}
+ * SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "12"}
+ * SELECT * WHERE {?s <http://prop/2> ?o . ?o <http://exa.com> "1234"}
+ * + * The template data that this method returns will contain a list of all queries, + * where the first queries are the original queries including the query templates. + * The query instances will be appended to the original queries. + */ + private static TemplateData instantiateTemplateQueries(QuerySource querySource, QueryHandler.Config.Template config) throws IOException { + // charset for generating random variable names + final String charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + final Random random = new Random(); + + final var templateQueries = new FileCachingQueryList(querySource); + final Pattern template = Pattern.compile("%%[a-zA-Z0-9_]+%%"); + final var oldQueries = new ArrayList(); + final var instances = new ArrayList(); + + int templateNumber = 0; + final var indices = new ArrayList(); + final var instanceNumber = new ArrayList(); + + for (int i = 0; i < templateQueries.size(); i++) { + oldQueries.add(templateQueries.getQuery(i)); + // replace all variables in the query template with SPARQL variables + // and store the variable names + var templateQueryString = templateQueries.getQuery(i); + final Matcher matcher = template.matcher(templateQueryString); + final var variables = new LinkedHashMap(); // a set, that preserves insertion order + while (matcher.find()) { + final var match = matcher.group(); + if (variables.containsKey(match)) continue; + String variableName = match.replaceAll("%%", ""); + while (templateQueryString.contains("?" + variableName) || templateQueryString.contains("$" + variableName)) { // generate random variable name with 20 characters until it is unique + variableName = IntStream.range(0, 20).mapToObj(m -> String.valueOf(charset.charAt(random.nextInt(charset.length())))).collect(Collectors.joining()); + } + final var variable = "?" + variableName; + variables.put(match, variable); + templateQueryString = templateQueryString.replaceAll(match, variable); + } + + // if no placeholders are found, the query is already a valid SPARQL query + if (variables.isEmpty()) { + continue; + } + + // build SELECT query for finding bindings for the variables + final var templateQuery = QueryFactory.create(templateQueryString); + final var whereClause = "WHERE " + templateQuery.getQueryPattern(); + final var selectQueryString = new ParameterizedSparqlString(); + selectQueryString.setCommandText("SELECT DISTINCT " + String.join(" ", variables.values())); + selectQueryString.append(" " + whereClause); + selectQueryString.append(" LIMIT " + config.limit()); + selectQueryString.setNsPrefixes(templateQuery.getPrefixMapping()); + + int count = 0; + // send request to SPARQL endpoint and instantiate the template based on results + try (QueryExecution exec = QueryExecutionHTTP.service(config.endpoint().toString(), selectQueryString.asQuery())) { + ResultSet resultSet = exec.execSelect(); + if (!resultSet.hasNext()) { + LOGGER.warn("No results for query template: {}", templateQueryString); + } + while (resultSet.hasNext() && count++ < config.limit()) { + var instance = new ParameterizedSparqlString(templateQueryString); + QuerySolution solution = resultSet.next(); + for (String var : resultSet.getResultVars()) { + instance.clearParam(var); + instance.setParam(var, solution.get(var)); + } + instances.add(instance.toString()); + } + } + // store the number of instances and the index of the template query + templateNumber++; + indices.add(i); + instanceNumber.add(count); + } + return new TemplateData(Stream.concat(oldQueries.stream(), instances.stream()).toList(), templateNumber, indices.stream().mapToInt(Integer::intValue).toArray(), instanceNumber.stream().mapToInt(Integer::intValue).toArray(), oldQueries.size()); + } + + public int getExecutableQueryCount() { + return executableQueryCount; + } + + public int getRepresentedQueryCount() { + return representedQueryCount; + } + + public List getQueryData() { + return queryData; + } +} From 45a4c7cf4c26b05a075686c9a436ba2c7bb87ede Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Sat, 9 Nov 2024 13:38:39 +0100 Subject: [PATCH 30/34] Trying to clarify comments --- .../cc/query/handler/TemplateHandler.java | 99 ++++++++++--------- 1 file changed, 54 insertions(+), 45 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java index 6525e5f58..eae715f60 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java @@ -98,55 +98,64 @@ public QueryList initializeTemplateQueryHandler(QuerySource templateSource) thro } } - // initialize queryData based on the template data - AtomicInteger templateIndex = new AtomicInteger(0); // index of the next template - AtomicInteger index = new AtomicInteger(0); // index of the current query - AtomicInteger instanceId = new AtomicInteger(0); // id of the current instance for the current template - queryData = templateData.queries.stream().map( - query -> { - // If "individualResults" is turned on, move the query templates outside the range of - // "representedQueryCount" to avoid them being represented in the results. - // Otherwise, if "individualResults" is turned off, the instances need to be moved outside the range - // of "representedQueryCount", but because "instantiateTemplateQueries" already appends the - // instances to the end of the original queries, this will already be done. - - // once the template instances start, the template index is reset and reused for the instances - // to track where the template instances belong - if (index.get() == templateData.instanceStart) templateIndex.set(0); + // Initialize queryData based on the template data. + // This means that every query is assigned a type (default, update, template, template instance) and + // an id (index), based on their type, position in the query file and the configuration. + // Because of the way the "StresstestResultProcessor" is currently implemented, the ids of the queries + // that are represented in the results need to be continuous and start at 0. + // In the case of "individualResults" turned on, + // every normal query and every template instance should be represented. + // Therefore, the ids of the templates have to be the last ones. + // Otherwise every normal query and every template should be represented. + // Template instances are located at the end of the query list. + // The queryData is later used to keep track of the queries, their types, ids, and relations. + int templateIndex = 0; // index of the next template + int index = 0; // index of the current query + int instanceId = 0; // id of the current instance for the current template + queryData = new ArrayList<>(); + for (var query : templateData.queries) { + // Once the template instances are being iterated, the template index is reset + // and reused to track of which query template the instances are being iterated. + if (index == templateData.instanceStart) templateIndex = 0; - if (index.get() >= templateData.instanceStart) { - // query is an instance of a template + if (index >= templateData.instanceStart) { + // query is an instance of a template - // if the instance id is equal to the number of instances for the current template, - // the next template is used - if (instanceId.get() == templateData.instanceNumber[templateIndex.get()]) { - templateIndex.getAndIncrement(); - instanceId.set(0); - } + // if the instance id is equal to the number of instances for the current template, + // the next instances belong to the next template + if (instanceId++ == templateData.instanceNumber[templateIndex]) { + templateIndex++; + instanceId = 0; + } - if (templateConfig.individualResults()) { - return new QueryData(index.getAndIncrement() - templateData.templates, QueryData.QueryType.TEMPLATE_INSTANCE, templateData.queries.size() - templateData.templates + templateIndex.get()); - } - return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex.get()); - } else if (templateIndex.get() < templateData.templates && index.get() == templateData.indices[templateIndex.get()]) { - // query is a template - if (templateConfig.individualResults()) { - // give the templates the last ids, so that there aren't any gaps in the ids and results - index.incrementAndGet(); - return new QueryData(templateData.queries.size() - templateData.templates + templateIndex.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); - } - templateIndex.getAndIncrement(); - return new QueryData(index.getAndIncrement(), QueryData.QueryType.TEMPLATE, null); - } else { - // query is neither a template nor an instance - final var update = QueryData.checkIfUpdate(new ByteArrayInputStream(query.getBytes())); - if (templateConfig.individualResults()) { - return new QueryData(index.getAndIncrement() - templateIndex.get(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); - } - return new QueryData(index.getAndIncrement(), update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null); - } + if (templateConfig.individualResults()) { + // In this case, the ids of the instances are shifted by the number of templates, + // because the templates received the last ids. + // This way, there are no gaps in the ids, + // and they can be correctly assigned to the results. + queryData.add(new QueryData(index++ - templateData.templates, QueryData.QueryType.TEMPLATE_INSTANCE, templateData.queries.size() - templateData.templates + templateIndex)); + } + queryData.add(new QueryData(index++, QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex)); + } else if (templateIndex < templateData.templates && index == templateData.indices[templateIndex]) { + // query is a template + if (templateConfig.individualResults()) { + // Give the templates the last ids. + index++; + queryData.add(new QueryData(templateData.queries.size() - templateData.templates + templateIndex++, QueryData.QueryType.TEMPLATE, null)); } - ).toList(); + templateIndex++; + queryData.add(new QueryData(index++, QueryData.QueryType.TEMPLATE, null)); + } else { + // query is neither a template nor an instance + final var update = QueryData.checkIfUpdate(new ByteArrayInputStream(query.getBytes())); + if (templateConfig.individualResults()) { + // Fill the gaps caused by the templates. + queryData.add(new QueryData(index++ - templateIndex, update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null)); + } + queryData.add(new QueryData(index++, update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null)); + } + + } // set the number of queries that can be executed and the number of queries // that are represented in the results From 959d88af5a04a1d6a77a29797764e78f562a5a08 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Sat, 9 Nov 2024 13:48:20 +0100 Subject: [PATCH 31/34] Add more comments --- .../aksw/iguana/cc/query/handler/QueryHandler.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 903f63d1a..54d5c053b 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -178,8 +178,16 @@ public record QueryStreamWrapper(int index, boolean cached, Supplier queryData; - private int executableQueryCount = 0; // stores the number of queries that can be executed - private int representedQueryCount = 0; // stores the number of queries that are represented in the results + // stores the number of queries that can be executed + private int executableQueryCount = 0; + + // Stores the number of queries that are represented in the results. + // If individual results are disabled for query templates, + // the template instances will represent the template by using its id. + // Otherwise, each template instance will represent itself by using its own unique id, and therefore + // query templates won't be represented in the results because they can't be executed otherwise. + // 'executableQueryCount == representedQueryCount' is always true when templates are not used. + private int representedQueryCount = 0; private int workerCount = 0; // give every worker inside the same worker config an offset seed private int totalWorkerCount = 0; From bae344882e7beb3ed42cccffce61bf648bc6d305 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Sat, 9 Nov 2024 13:48:50 +0100 Subject: [PATCH 32/34] Change behavior of noOfQueries property in results --- .../iguana/cc/tasks/impl/StresstestResultProcessor.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java b/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java index 3b0cac041..c794ca6fe 100644 --- a/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java +++ b/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java @@ -128,8 +128,10 @@ public void calculateAndSaveMetrics(Calendar start, Calendar end) { m.add(workerRes, RDF.type, IONT.worker); m.add(workerRes, IPROP.workerID, toInfinitePrecisionIntegerLiteral(worker.getWorkerID())); m.add(workerRes, IPROP.workerType, ResourceFactory.createTypedLiteral(worker.getClass().getSimpleName())); - // TODO: is it executable or representative? - m.add(workerRes, IPROP.noOfQueries, toInfinitePrecisionIntegerLiteral(config.queries().getExecutableQueryCount())); + m.add(workerRes, IPROP.noOfQueries, toInfinitePrecisionIntegerLiteral( + config.queries().getConfig().template().individualResults() ? + config.queries().getExecutableQueryCount() : + config.queries().getRepresentedQueryCount())); m.add(workerRes, IPROP.timeOut, TimeUtils.createTypedDurationLiteral(config.timeout())); if (config.completionTarget() instanceof HttpWorker.QueryMixes) m.add(workerRes, IPROP.noOfQueryMixes, toInfinitePrecisionIntegerLiteral(((HttpWorker.QueryMixes) config.completionTarget()).number())); From e967ff25f8e3e6675642acb68ae48d1e819d37c4 Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Sat, 9 Nov 2024 13:49:22 +0100 Subject: [PATCH 33/34] Remove unused import --- .../java/org/aksw/iguana/cc/query/handler/TemplateHandler.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java index eae715f60..b73fef371 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java @@ -18,7 +18,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Random; -import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; From 7a09506604fe10a0f08e99ba3b3b72699100e9bf Mon Sep 17 00:00:00 2001 From: Nick Molcanov <32801560+nck-mlcnv@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:25:46 +0100 Subject: [PATCH 34/34] Fix broken tests --- .../iguana/cc/query/handler/TemplateHandler.java | 13 +++++++++---- .../cc/tasks/impl/StresstestResultProcessor.java | 5 ++++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java index b73fef371..ae7fc1936 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/TemplateHandler.java @@ -134,16 +134,19 @@ public QueryList initializeTemplateQueryHandler(QuerySource templateSource) thro // and they can be correctly assigned to the results. queryData.add(new QueryData(index++ - templateData.templates, QueryData.QueryType.TEMPLATE_INSTANCE, templateData.queries.size() - templateData.templates + templateIndex)); } - queryData.add(new QueryData(index++, QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex)); + else { + queryData.add(new QueryData(index++, QueryData.QueryType.TEMPLATE_INSTANCE, templateIndex)); + } } else if (templateIndex < templateData.templates && index == templateData.indices[templateIndex]) { // query is a template if (templateConfig.individualResults()) { // Give the templates the last ids. index++; queryData.add(new QueryData(templateData.queries.size() - templateData.templates + templateIndex++, QueryData.QueryType.TEMPLATE, null)); + } else { + templateIndex++; + queryData.add(new QueryData(index++, QueryData.QueryType.TEMPLATE, null)); } - templateIndex++; - queryData.add(new QueryData(index++, QueryData.QueryType.TEMPLATE, null)); } else { // query is neither a template nor an instance final var update = QueryData.checkIfUpdate(new ByteArrayInputStream(query.getBytes())); @@ -151,7 +154,9 @@ public QueryList initializeTemplateQueryHandler(QuerySource templateSource) thro // Fill the gaps caused by the templates. queryData.add(new QueryData(index++ - templateIndex, update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null)); } - queryData.add(new QueryData(index++, update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null)); + else { + queryData.add(new QueryData(index++, update ? QueryData.QueryType.UPDATE : QueryData.QueryType.DEFAULT, null)); + } } } diff --git a/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java b/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java index c794ca6fe..b8e4c8f38 100644 --- a/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java +++ b/src/main/java/org/aksw/iguana/cc/tasks/impl/StresstestResultProcessor.java @@ -128,7 +128,10 @@ public void calculateAndSaveMetrics(Calendar start, Calendar end) { m.add(workerRes, RDF.type, IONT.worker); m.add(workerRes, IPROP.workerID, toInfinitePrecisionIntegerLiteral(worker.getWorkerID())); m.add(workerRes, IPROP.workerType, ResourceFactory.createTypedLiteral(worker.getClass().getSimpleName())); - m.add(workerRes, IPROP.noOfQueries, toInfinitePrecisionIntegerLiteral( + if (config.queries().getConfig() == null || config.queries().getConfig().template() == null) + m.add(workerRes, IPROP.noOfQueries, toInfinitePrecisionIntegerLiteral(config.queries().getExecutableQueryCount())); + else + m.add(workerRes, IPROP.noOfQueries, toInfinitePrecisionIntegerLiteral( config.queries().getConfig().template().individualResults() ? config.queries().getExecutableQueryCount() : config.queries().getRepresentedQueryCount()));