From ea2e9e2a3a297beb83da435dc8654ec39b502cb0 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Wed, 24 Apr 2024 10:22:49 +0100 Subject: [PATCH 1/9] Next development iteration: 3.3.7-SNAPSHOT --- dicoogle/pom.xml | 2 +- pom.xml | 2 +- sdk/pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dicoogle/pom.xml b/dicoogle/pom.xml index afbf6e5d3..14727a262 100644 --- a/dicoogle/pom.xml +++ b/dicoogle/pom.xml @@ -10,7 +10,7 @@ pt.ua.ieeta dicoogle-all - 3.3.6 + 3.3.7-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index bb6e41338..5c36973d9 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 pt.ua.ieeta dicoogle-all - 3.3.6 + 3.3.7-SNAPSHOT pom dicoogle-all diff --git a/sdk/pom.xml b/sdk/pom.xml index 2ad88f667..1f3f6100d 100644 --- a/sdk/pom.xml +++ b/sdk/pom.xml @@ -10,7 +10,7 @@ pt.ua.ieeta dicoogle-all - 3.3.6 + 3.3.7-SNAPSHOT ../pom.xml From 471e530aa5babed0d75ca5a4859ff58dae62e542 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Wed, 24 Apr 2024 21:01:28 +0100 Subject: [PATCH 2/9] [core] Add kill switch for dead plugin sets (#686) JVM property `dicoogle.deadPluginKillSwitch`. If true, any dead plugin will kill the process --- .../pt/ua/dicoogle/plugins/PluginController.java | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java b/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java index 62b7acde7..7dd980c00 100755 --- a/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java +++ b/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java @@ -89,6 +89,9 @@ public synchronized static PluginController getInstance() { private TaskManager taskManagerQueries = new TaskManager(Integer.parseInt(System.getProperty("dicoogle.taskManager.nQueryThreads", "4"))); + /** Whether to shut down Dicoogle when a plugin is marked as dead */ + private static boolean DEAD_PLUGIN_KILL_SWITCH = + System.getProperty("dicoogle.deadPluginKillSwitch", "false").equalsIgnoreCase("true"); public PluginController(File pathToPluginDirectory) { logger.info("Creating PluginController Instance"); @@ -172,9 +175,14 @@ private void configurePlugins() { logger.warn("Plugin set name cannot be retrieved: {}", ex2.getMessage()); name = "UNKNOWN"; } - logger.error("Unexpected error while loading plugin set {}. Plugin set marked as dead.", name, e); - this.deadPluginSets.add(new DeadPlugin(name, e)); - it.remove(); + if (DEAD_PLUGIN_KILL_SWITCH) { + logger.error("Unexpected error while loading plugin set {}. Dicoogle will shut down.", name, e); + System.exit(-4); + } else { + logger.error("Unexpected error while loading plugin set {}. Plugin set marked as dead.", name, e); + this.deadPluginSets.add(new DeadPlugin(name, e)); + it.remove(); + } } } logger.debug("Settings pushed to plugins"); From 71e1d82c82d7fbe633d5ed947ba0306cd0d38be2 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Mon, 27 May 2024 22:23:04 +0100 Subject: [PATCH 3/9] Clean up logging around indexing tasks (#691) * Clean up logging around indexing tasks - lower level of some log lines which are not very useful - remove commented out code * Revert removal of deprecated call StorageInterface#handles - its deprecation is currently being contested --- .../ua/dicoogle/plugins/PluginController.java | 24 ++++--------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java b/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java index 7dd980c00..7f7e1679a 100755 --- a/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java +++ b/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java @@ -454,7 +454,6 @@ public List getQueryProvidersName(boolean enabled) { for (QueryInterface p : plugins) { names.add(p.getName()); } - // logger.info("Query Providers: "+Arrays.toString(names.toArray()) ); return names; } @@ -462,7 +461,6 @@ public QueryInterface getQueryProviderByName(String name, boolean onlyEnabled) { Collection plugins = getQueryPlugins(onlyEnabled); for (QueryInterface p : plugins) { if (p.getName().equalsIgnoreCase(name)) { - // logger.info("Retrived Query Provider: "+name); return p; } } @@ -494,7 +492,6 @@ public IndexerInterface getIndexerByName(String name, boolean onlyEnabled) { Collection plugins = getIndexingPlugins(onlyEnabled); for (IndexerInterface p : plugins) { if (p.getName().equalsIgnoreCase(name)) { - // logger.info("Retrived Query Provider: "+name); return p; } } @@ -507,7 +504,6 @@ public JettyPluginInterface getServletByName(String name, boolean onlyEnabled) { Collection plugins = getServletPlugins(onlyEnabled); for (JettyPluginInterface p : plugins) { if (p.getName().equalsIgnoreCase(name)) { - // logger.info("Retrived Query Provider: "+name); return p; } } @@ -519,7 +515,6 @@ public StorageInterface getStorageByName(String name, boolean onlyEnabled) { Collection plugins = getStoragePlugins(onlyEnabled); for (StorageInterface p : plugins) { if (p.getName().equalsIgnoreCase(name)) { - // logger.info("Retrived Query Provider: "+name); return p; } } @@ -535,7 +530,6 @@ public JointQueryTask queryAll(JointQueryTask holder, final String query, final public JointQueryTask queryAll(JointQueryTask holder, final String query, final DimLevel level, final Object... parameters) { - // logger.info("Querying all providers"); List providers = this.getQueryProvidersName(true); return query(holder, providers, query, level, parameters); } @@ -552,7 +546,6 @@ public Task> query(String querySource, final String query final Object... parameters) { Task> t = getTaskForQueryDim(querySource, query, level, parameters); taskManagerQueries.dispatch(t); - // logger.info("Fired Query Task: "+querySource +" QueryString:"+query); return t;// returns the handler to obtain the computation results } @@ -573,7 +566,6 @@ public JointQueryTask query(JointQueryTask holder, List querySources, fi for (Task t : tasks) taskManagerQueries.dispatch(t); - // logger.info("Fired Query Tasks: "+Arrays.toString(querySources.toArray()) +" QueryString:"+query); return holder;// returns the handler to obtain the computation results } @@ -593,7 +585,6 @@ public JointQueryTask query(JointQueryTask holder, List querySources, fi for (Task t : tasks) taskManagerQueries.dispatch(t); - // logger.info("Fired Query Tasks: "+Arrays.toString(querySources.toArray()) +" QueryString:"+query); return holder;// returns the handler to obtain the computation results } @@ -618,7 +609,6 @@ public Iterable call() throws Exception { } }); - // logger.info("Prepared Query Task: QueryString"); return queryTask; } @@ -654,7 +644,6 @@ public Iterable call() { } }); - // logger.info("Prepared Query Task: QueryString"); return queryTask; } @@ -687,11 +676,8 @@ public List> index(URI path) { continue; final String taskUniqueID = UUID.randomUUID().toString(); task.setName(String.format("[%s]index %s", indexer.getName(), path)); - task.onCompletion(new Runnable() { - @Override - public void run() { - logger.info("Task [{}] complete: {} is indexed", taskUniqueID, pathF); - } + task.onCompletion(() -> { + logger.info("Task [{}] complete on {}", taskUniqueID, pathF); }); taskManager.dispatch(task); @@ -701,7 +687,7 @@ public void run() { logger.warn("Indexer {} failed unexpectedly", indexer.getName(), ex); } } - logger.info("Finished firing all indexing plugins for {}", path); + logger.debug("Finished firing all indexing plugins for {}", path); return rettasks; } @@ -728,14 +714,14 @@ public List> index(String pluginName, URI path) { @Override public void run() { - logger.info("Task [{}] complete: {} is indexed", taskUniqueID, pathF); + logger.info("Task [{}] complete on {}", taskUniqueID, pathF); } }); taskManager.dispatch(task); rettasks.add(task); - logger.info("Fired indexer {} for URI {}", pluginName, path.toString()); + logger.debug("Fired indexer {} for URI {}", pluginName, path.toString()); RunningIndexTasks.getInstance().addTask(task); } } catch (RuntimeException ex) { From cebf4e30c2d76b822fd8637f13aa7d5e18209cb5 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Thu, 27 Jun 2024 11:04:16 +0100 Subject: [PATCH 4/9] Add robustness to DIMGeneric against BodyPartThickness - do not assume that it is a string, because some index sources might save it as a double --- .../java/pt/ua/dicoogle/sdk/datastructs/dim/DIMGeneric.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/dim/DIMGeneric.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/dim/DIMGeneric.java index 5544f82f0..5462d572a 100755 --- a/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/dim/DIMGeneric.java +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/dim/DIMGeneric.java @@ -158,8 +158,7 @@ private void fillDim(Map extra, URI uri) { * Get data to Series */ String serieUID = toTrimmedString(extra.get("SeriesInstanceUID"), false); - String BodyPartThickness = (String) extra.get("BodyPartThickness"); - // System.out.println("serieUID"+serieUID); + String BodyPartThickness = toTrimmedString(extra.get("BodyPartThickness"), true); String serieNumber = toTrimmedString(extra.get("SeriesNumber"), true); String serieDescription = toTrimmedString(extra.get("SeriesDescription"), false); String modality = toTrimmedString(extra.get("Modality"), false); @@ -192,8 +191,6 @@ private void fillDim(Map extra, URI uri) { /** * Get data to Image */ - // TODO:Error checking here... but according to standard, all images - // must have one of these... String sopInstUID = toTrimmedString(extra.get("SOPInstanceUID"), true); if (sopInstUID == null) { From 7af44ecc04f282cbc2928aa69e88688619ea5698 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Tue, 27 Aug 2024 17:34:56 +0100 Subject: [PATCH 5/9] Bulk unindexing for IndexerInterface (#598) * [sdk] Add IndexerInterface::unindex(Collection) - for unindexing in bulk - clarify that both unindex methods are synchronous, unlike the indexing ones * [sdk] Revamp IndexerInterface documentation - adjust the content to align with good practices (see also https://bioinformatics-ua.github.io/dicoogle-learning-pack/docs/query_index/) * [sdk] rethink bulk unindexing to be more informative - add `UnindexReport` class and nested classes - for containing errors which may occur in bulk unindexing - change `IndexerInterface#unindex(Collection)` - returns `UnindexReport` - can throw `IOException` * [sdk] reiterate on IndexerInterface batch unindex - make it asynchronous: returns a `Task` like in `index` - add second parameter for keeping track of progress * [sdk] format UnindexReport * [sdk] Improve bulk IndexerInterface#unindex - clarify that it returns a task - remove unused import * [sdk] Add UnindexReport#errorCount * Add bulk unindexing to plugin controller - can only handle one indexer at a time, but other than that it works * Tweak PluginController - remove deprecated method call #handles, check scheme instead * Update unindex servlet to use bulk unindexing where appropriate * [core] Dispatch batch-unindex tasks * [sdk] Reiterate on the UnindexReport API - record a collection of URIs in each unindex failure * [sdk] Tweak UnindexReport interface and fix error file count - provide clearer methods to collect the counts of files which were not unindexed successfully --- .../ua/dicoogle/plugins/PluginController.java | 42 ++++- .../servlets/management/UnindexServlet.java | 61 +++++-- .../pt/ua/dicoogle/sdk/IndexerInterface.java | 107 ++++++++++-- .../sdk/datastructs/UnindexReport.java | 159 ++++++++++++++++++ 4 files changed, 338 insertions(+), 31 deletions(-) create mode 100644 sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java diff --git a/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java b/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java index 7f7e1679a..117ad5c1c 100755 --- a/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java +++ b/dicoogle/src/main/java/pt/ua/dicoogle/plugins/PluginController.java @@ -28,6 +28,7 @@ import pt.ua.dicoogle.plugins.webui.WebUIPluginManager; import pt.ua.dicoogle.sdk.*; import pt.ua.dicoogle.sdk.datastructs.Report; +import pt.ua.dicoogle.sdk.datastructs.UnindexReport; import pt.ua.dicoogle.sdk.datastructs.SearchResult; import pt.ua.dicoogle.sdk.datastructs.dim.DimLevel; import pt.ua.dicoogle.sdk.settings.ConfigurationHolder; @@ -45,6 +46,7 @@ import java.util.*; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; +import java.util.function.Consumer; import java.util.stream.Collectors; import java.util.zip.ZipFile; @@ -755,7 +757,43 @@ public void unindex(URI path, Collection indexProviders) { } } - /** Issue an unindexation procedure to the given indexers. + /** Issue the removal of indexed entries in bulk. + * + * @param indexProvider the name of the indexer + * @param items a collections of item identifiers to unindex + * @param progressCallback an optional function (can be `null`), + * called for every batch of items successfully unindexed + * to indicate early progress + * and inform consumers that + * it is safe to remove or exclude the unindexed item + * @return an asynchronous task object returning + * a report containing which files were not unindexed, + * and whether some of them were not found in the database + * @throws IOException + */ + public Task unindex(String indexProvider, Collection items, Consumer> progressCallback) throws IOException { + logger.info("Starting unindexing procedure for {} items", items.size()); + + IndexerInterface indexer = null; + if (indexProvider != null) { + indexer = this.getIndexerByName(indexProvider, true); + } + if (indexer == null) { + indexer = this.getIndexingPlugins(true).iterator().next(); + } + Task task = indexer.unindex(items, progressCallback); + if (task != null) { + final String taskUniqueID = UUID.randomUUID().toString(); + task.setName(String.format("[%s]unindex", indexer.getName())); + task.onCompletion(() -> { + logger.info("Unindexing task [{}] complete", taskUniqueID); + }); + taskManager.dispatch(task); + } + return task; + } + + /** Issue an unindexing procedure to the given indexers. * * @param path the URI of the directory or file to unindex * @param indexers a collection of providers @@ -776,7 +814,7 @@ public void remove(URI uri) { } public void doRemove(URI uri, StorageInterface si) { - if (si.handles(uri)) { + if (Objects.equals(uri.getScheme(), si.getScheme())) { si.remove(uri); } else { logger.warn("Storage Plugin does not handle URI: {},{}", uri, si); diff --git a/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/management/UnindexServlet.java b/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/management/UnindexServlet.java index f98f4b879..02e14f824 100644 --- a/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/management/UnindexServlet.java +++ b/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/management/UnindexServlet.java @@ -21,12 +21,13 @@ import java.io.IOException; import java.net.URI; -import java.net.URISyntaxException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; +import java.util.stream.Stream; import java.util.stream.StreamSupport; import javax.servlet.ServletException; @@ -42,6 +43,7 @@ import pt.ua.dicoogle.plugins.PluginController; import pt.ua.dicoogle.sdk.QueryInterface; import pt.ua.dicoogle.sdk.datastructs.SearchResult; +import pt.ua.dicoogle.sdk.datastructs.UnindexReport; import pt.ua.dicoogle.sdk.task.JointQueryTask; import pt.ua.dicoogle.sdk.task.Task; @@ -81,26 +83,50 @@ protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws S "No arguments provided; must include either one of `uri`, `SOPInstanceUID`, `SeriesInstanceUID` or `StudyInstanceUID`"); return; } + + PluginController pc = PluginController.getInstance(); long indexed = 0; long failed = 0; + long notfound = 0; - Collection uris = resolveURIs(paramUri, paramSop, paramSeries, paramStudy); + Collection uris = resolveURIs(paramUri, paramSop, paramSeries, paramStudy); - // unindex - for (String strUri : uris) { - try { - URI uri = new URI(strUri); + // if only one entry, do it inline + if (uris.size() <= 1) { + for (URI uri : uris) { try { - PluginController.getInstance().unindex(uri, providers); + pc.unindex(uri, providers); indexed += 1; } catch (RuntimeException ex) { logger.error("Failed to unindex {}", uri, ex); failed += 1; } - } catch (URISyntaxException ex) { - logger.warn("Received bad URI", ex); - failed += 1; + } + + } else { + // if many, use bulk unindexing + List> tasks = new ArrayList<>(); + + if (providers == null) { + providers = pc.getIndexingPlugins(true).stream() + .map(p -> p.getName()) + .collect(Collectors.toList()); + } + for (String indexProvider: providers) { + tasks.add(pc.unindex(indexProvider, uris, null)); + } + + int i = 0; + for (Task task: tasks) { + try { + UnindexReport report = task.get(); + indexed = uris.size() - report.notUnindexedFileCount(); + failed = report.failedFileCount(); + notfound = report.getNotFound().size(); + } catch (Exception ex) { + logger.error("Task to unindex items in {} failed", providers.get(i), ex); + } } } @@ -109,15 +135,18 @@ protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws S JSONObject obj = new JSONObject(); obj.put("indexed", indexed); obj.put("failed", failed); + obj.put("notFound", notfound); resp.setStatus(200); resp.getWriter().write(obj.toString()); } /// Convert the given parameters into a list of URIs - private static Collection resolveURIs(String[] paramUri, String[] paramSop, String[] paramSeries, + private static Collection resolveURIs(String[] paramUri, String[] paramSop, String[] paramSeries, String[] paramStudy) { if (paramUri != null) { - return Arrays.asList(paramUri); + return Stream.of(paramUri) + .map(URI::create) + .collect(Collectors.toList()); } String attribute = null; if (paramSop != null) { @@ -142,11 +171,11 @@ public void onCompletion() {} }; try { return StreamSupport.stream(PluginController.getInstance() - .queryAll(holder, dcmAttribute + ":" + uid).get().spliterator(), false); + .queryAll(holder, dcmAttribute + ":\"" + uid + '"').get().spliterator(), false); } catch (InterruptedException | ExecutionException ex) { throw new RuntimeException(ex); } - }).map(r -> r.getURI().toString()).collect(Collectors.toList()); + }).map(r -> r.getURI()).collect(Collectors.toList()); } String dicomProvider = dicomProviders.iterator().next(); @@ -154,7 +183,7 @@ public void onCompletion() {} // translate to URIs QueryInterface dicom = PluginController.getInstance().getQueryProviderByName(dicomProvider, false); - return StreamSupport.stream(dicom.query(dcmAttribute + ":" + uid).spliterator(), false); - }).map(r -> r.getURI().toString()).collect(Collectors.toList()); + return StreamSupport.stream(dicom.query(dcmAttribute + ":\"" + uid + '"').spliterator(), false); + }).map(r -> r.getURI()).collect(Collectors.toList()); } } diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java index 27eb2255a..e7df68890 100755 --- a/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/IndexerInterface.java @@ -18,47 +18,73 @@ */ package pt.ua.dicoogle.sdk; +import java.io.IOException; import java.net.URI; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.function.Consumer; + import pt.ua.dicoogle.sdk.datastructs.Report; +import pt.ua.dicoogle.sdk.datastructs.UnindexReport; +import pt.ua.dicoogle.sdk.datastructs.UnindexReport.FailedUnindex; import pt.ua.dicoogle.sdk.task.Task; /** - * Index Interface Plugin. Indexers analyze documents for performing queries. They may index - * documents by DICOM metadata for instance, but other document processing procedures may be involved. + * Indexing plugin interface. + * + * Indexers analyze and record documents for future retrieval. + * They are primarily designed to index DICOM meta-data, + * which in that case they are accompanied by a query plugin, + * and both plugins are called DIM providers. + * However, indexers are not restricted to processing DICOM files, + * or to retrieving and indexing meta-data. * - * @author Luís A. Bastião Silva + * @author Luís A. Bastião Silva * @author Frederico Valente */ public interface IndexerInterface extends DicooglePlugin { /** - * Indexes the file path to the database. Indexation procedures are asynchronous, and will return + * Indexes the file path to the database. Indexing procedures are asynchronous, and will return * immediately after the call. The outcome is a report that can be retrieved from the given task * as a future. * * @param file directory or file to index - * @return a representation of the asynchronous indexation task + * @return a representation of the asynchronous indexing task */ public Task index(StorageInputStream file, Object... parameters); /** - * Indexes multiple file paths to the database. Indexation procedures are asynchronous, and will return + * Indexes multiple file paths to the database. Indexing procedures are asynchronous, and will return * immediately after the call. The outcomes are aggregated into a single report and can be retrieved from * the given task as a future. * * @param files a collection of directories and/or files to index - * @return a representation of the asynchronous indexation task + * @return a representation of the asynchronous indexing task */ public Task index(Iterable files, Object... parameters); - /** - * Checks whether the file in the given path can be indexed by this indexer. The indexer should verify if - * the file holds compatible content (e.g. a DICOM file). If this method returns false, the file will not - * be indexed. - * + * Checks whether the file in the given path can be indexed by this indexer. + * + * The method should return false if and only if + * it is sure that the file cannot be indexed, + * by observation of its URI. + * This method exists in order to filter out files + * that are obviously incompatible for the indexer. + * However, there are situations where this is not reliable, + * since the storage is free to establish its own file naming rules, + * and that can affect the file extension. + * In case of doubt, it is recommended to leave the default implementation, + * which returns true unconditionally. + * Attempts to read invalid files can instead + * be handled gracefully by the indexer by capturing exceptions. + * * @param path a URI to the file to check - * @return whether the indexer can handle the file at the given path + * @return whether the item at the given URI path can be fed to this indexer */ public default boolean handles(URI path) { return true; @@ -67,8 +93,63 @@ public default boolean handles(URI path) { /** * Removes the indexed file at the given path from the database. * + * Unlike the other indexing tasks, + * this operation is synchronous + * and will only return when the operation is done. + * * @param path the URI of the document * @return whether it was successfully deleted from the database */ public boolean unindex(URI path); + + /** + * Removes indexed files from the database in bulk. + * + * The default implementation unindexes each item one by one + * in a non-specified order via {@linkplain #unindex(URI)}, + * but indexers may implement this as + * one or more individual operations in batch, + * thus becoming faster than unindexing each item individually. + * + * Like {@linkplain index}, + * this operation is asynchronous. + * One can keep track of the unindexing task's progress + * by passing a callback function as the second parameter. + * + * @param uris the URIs of the items to unindex + * @param progressCallback an optional function (can be `null`), + * called for every batch of items successfully unindexed + * to indicate early progress + * and inform consumers that + * it is safe to remove or exclude the unindexed item + * @return an asynchronous task object returning + * a report containing which files were not unindexed, + * and whether some of them were not found in the database + * @throws IOException if an error occurred + * before the unindexing operation could start, + * such as when failing to access or open the database + */ + public default Task unindex(Collection uris, Consumer> progressCallback) + throws IOException { + Objects.requireNonNull(uris); + return new Task<>(() -> { + List failures = new ArrayList<>(); + for (URI uri : uris) { + try { + if (unindex(uri)) { + // unindexed successfully + if (progressCallback != null) { + progressCallback.accept(Collections.singleton(uri)); + } + } else { + // failed to unindex, reason unknown + failures.add(new FailedUnindex(Collections.singleton(uri), null)); + } + } catch (Exception ex) { + failures.add(new FailedUnindex(Collections.singleton(uri), ex)); + } + } + return UnindexReport.withFailures(failures); + }); + } } diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java new file mode 100644 index 000000000..34ebcc952 --- /dev/null +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/datastructs/UnindexReport.java @@ -0,0 +1,159 @@ +/** + * Copyright (C) 2014 Universidade de Aveiro, DETI/IEETA, Bioinformatics Group - http://bioinformatics.ua.pt/ + * + * This file is part of Dicoogle/dicoogle-sdk. + * + * Dicoogle/dicoogle-sdk is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Dicoogle/dicoogle-sdk is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Dicoogle. If not, see . + */ +package pt.ua.dicoogle.sdk.datastructs; + +import java.io.Serializable; +import java.net.URI; +import java.util.Collection; +import java.util.Collections; +import java.util.Objects; + +/** Describes a report for a bulk unindexing operation. + */ +public final class UnindexReport implements Serializable { + + /** The description of an indexing error. + * + * Whether the file remains indexed or not + * when an error of this kind occurs + * is not specified. + */ + public static final class FailedUnindex implements Serializable { + /** The URIs to the items which failed to unindex. */ + public final Collection urisAffected; + + /** The exception describing the error which led to the failure. + * This field can be null + * when no cause is specified. + */ + public final Exception cause; + + /** Creates a failed unindex description + * due to the file not being found in the database. + * + * @param uri the URI of the file which could not be unindexed + * @param cause the underlying exception, if any + */ + public FailedUnindex(Collection urisAffected, Exception cause) { + Objects.requireNonNull(urisAffected); + this.urisAffected = urisAffected; + this.cause = cause; + } + + @Override + public String toString() { + return "FailedUnindex{urisAffected=" + urisAffected + ", cause=" + cause + "}"; + } + } + + /** URIs of files which were not found. */ + private final Collection notFound; + private final Collection failures; + + /** Creates a full report for a bulk unindexing operation. + * All parameters are nullable, + * in which case is equivalent to passing an empty collection. + * Once created, the report is final and immutable. + * + * @param notFound the URIs of files which were not found + * @param failures the error reports of files which could not be unindexed + */ + public UnindexReport(Collection notFound, Collection failures) { + if (notFound == null) { + notFound = Collections.emptyList(); + } + if (failures == null) { + failures = Collections.emptyList(); + } + this.notFound = notFound; + this.failures = failures; + } + + /** Creates a report with no unindexing failures. + */ + public static UnindexReport ok() { + return new UnindexReport(null, null); + } + + /** Creates a report with the given failures. + */ + public static UnindexReport withFailures(Collection failures) { + return new UnindexReport(null, failures); + } + + /** Returns whether all files were successfully unindexed from the database + * as requested. + */ + public boolean isOk() { + return notFound.isEmpty() && failures.isEmpty(); + } + + /** Returns whether all files are no longer unindexed, + * meaning that no errors occurred when trying to unindex an indexed file. + * + * This is different from {@link #isOk()} in that + * it does not imply that all files to unindex were found in the database. + * + * @return true if no unindex failures are reported other than files not found + */ + public boolean allUnindexed() { + return failures.isEmpty(); + } + + /** Obtains an immutable collection to + * the file batches which failed to unindex due to errors. + */ + public Collection getUnindexFailures() { + return Collections.unmodifiableCollection(this.failures); + } + + /** Obtains an immutable collection to the files + * which were not found in the index. + */ + public Collection getNotFound() { + return Collections.unmodifiableCollection(this.notFound); + } + + /** Returns the total count of failures reported during unindexing. + * + * Note that this does not necessarily correspond to + * the number of files affected, + * and does not include files which were not found. + */ + public long failureCount() { + return this.failures.size(); + } + + /** Returns the total count of files which were not unindexed, + * whether because they were not found + * or could not be unindexed for other reasons. + */ + public long notUnindexedFileCount() { + return this.notFound.size() + failedFileCount(); + } + + /** Returns the total count of files which failed to unindexed + * for reasons other than the files not being found. + */ + public long failedFileCount() { + return this.failures.stream() + .mapToLong(f -> f.urisAffected.size()) + .sum(); + } +} From 169d3eb0a055f60ff065c5b02a314ad4668a3f29 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Tue, 27 Aug 2024 17:35:40 +0100 Subject: [PATCH 6/9] Improve StorageInterface, add method `get` (#660) * [sdk] Add default method StorageInterface#get - specifically for retrieving a single item from storage, without deep file traversal or requiring any iterables - default impl uses `at` underneath to implement it - update code in core to use it where suitable - also correct image servlet to only query for instances to DIM providers if specified * [sdk] Improve StorageInterface documentation - Fix example code in #at - Clarify in #list that a trailing forward slash means that it's a directory * Apply suggestions from code review --- .../server/web/dicom/Information.java | 7 +-- .../server/web/servlets/ImageServlet.java | 29 ++++++---- .../pt/ua/dicoogle/sdk/StorageInterface.java | 53 ++++++++++++++++--- 3 files changed, 67 insertions(+), 22 deletions(-) diff --git a/dicoogle/src/main/java/pt/ua/dicoogle/server/web/dicom/Information.java b/dicoogle/src/main/java/pt/ua/dicoogle/server/web/dicom/Information.java index e3cc3df77..004f8aadf 100644 --- a/dicoogle/src/main/java/pt/ua/dicoogle/server/web/dicom/Information.java +++ b/dicoogle/src/main/java/pt/ua/dicoogle/server/web/dicom/Information.java @@ -113,20 +113,17 @@ public void onReceive(Task> e) { for (SearchResult r : itResults) { if (uri == null) uri = r.getURI(); - System.out.println("URI: " + uri.toString()); } if (uri != null) { StorageInterface str = PluginController.getInstance().getStorageForSchema(uri); if (str != null) { - Iterable stream = str.at(uri); - for (StorageInputStream r : stream) { + StorageInputStream r = str.get(uri); + if (r != null) { ret = r; stopAllTaks(); - latch.countDown(); - return; } } diff --git a/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/ImageServlet.java b/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/ImageServlet.java index dcc090656..d65f224a3 100644 --- a/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/ImageServlet.java +++ b/dicoogle/src/main/java/pt/ua/dicoogle/server/web/servlets/ImageServlet.java @@ -127,13 +127,13 @@ protected void doGet(HttpServletRequest request, HttpServletResponse response) // get the image file by the URI URI imgUri = new URI(uri); StorageInterface storageInt = PluginController.getInstance().getStorageForSchema(imgUri); - Iterator storages = storageInt.at(imgUri).iterator(); + StorageInputStream storageItem = storageInt.get(imgUri); // take the first valid storage - if (!storages.hasNext()) { + if (storageItem == null) { ResponseUtil.sendError(response, 404, "No image file for supplied URI"); return; } - imgFile = storages.next(); + imgFile = storageItem; } catch (URISyntaxException ex) { ResponseUtil.sendError(response, 400, "Bad URI syntax"); @@ -220,7 +220,6 @@ protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws S private static StorageInputStream getFileFromSOPInstanceUID(String sopInstanceUID, List providers) throws IOException { - // TODO use only DIM sources? JointQueryTask qt = new JointQueryTask() { @Override public void onCompletion() {} @@ -229,24 +228,32 @@ public void onCompletion() {} public void onReceive(Task> e) {} }; try { + PluginController pc = PluginController.getInstance(); if (providers == null) { - providers = PluginController.getInstance().getQueryProvidersName(true); + // use only DIM sources + providers = ServerSettingsManager.getSettings().getArchiveSettings().getDIMProviders(); + // exclude unknown query providers + providers.removeIf(pName -> pc.getQueryProviderByName(pName, true) == null); + if (providers.isEmpty()) { + // fallback to all query providers + providers = pc.getQueryProvidersName(true); + } } - Iterator it = PluginController.getInstance() - .query(qt, providers, "SOPInstanceUID:" + sopInstanceUID).get().iterator(); + Iterator it = pc + .query(qt, providers, "SOPInstanceUID:\"" + sopInstanceUID + '"').get().iterator(); if (!it.hasNext()) { throw new IOException("No such image of SOPInstanceUID " + sopInstanceUID); } SearchResult res = it.next(); - StorageInterface storage = PluginController.getInstance().getStorageForSchema(res.getURI()); + StorageInterface storage = pc.getStorageForSchema(res.getURI()); if (storage == null) { throw new IOException("Unsupported file scheme"); } - Iterator store = storage.at(res.getURI()).iterator(); - if (!store.hasNext()) { + StorageInputStream item = storage.get(res.getURI()); + if (item == null) { throw new IOException("No storage item found"); } - return store.next(); + return item; } catch (InterruptedException | ExecutionException ex) { throw new IOException(ex); } diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/StorageInterface.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/StorageInterface.java index d574c14b4..fe26d0369 100644 --- a/sdk/src/main/java/pt/ua/dicoogle/sdk/StorageInterface.java +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/StorageInterface.java @@ -61,19 +61,57 @@ public default boolean handles(URI location) { * This method is particularly nice for use in for-each loops. * * The provided scheme is not relevant at this point, but the developer must avoid calling this method - * with a path of a different schema. - * - *
for(StorageInputStream dicomObj : storagePlugin.at("file://dataset/")){
-     *      System.err.println(dicomObj.getURI());
-     * }
+ * with a path of a different scheme. + * + *
+     * URI uri = URI.create("file://dataset/");
+     * for (StorageInputStream dicomObj: storagePlugin.at(uri)) {
+     *     System.err.println(dicomObj.getURI());
+     * }
+     * 
* * @param location the location to read - * @param parameters a variable list of extra parameters for the retrieve + * @param parameters a variable list of extra retrieval parameters * @return an iterable of storage input streams * @see StorageInputStream */ public Iterable at(URI location, Object... parameters); + /** + * Obtains an item stored at the exact location specified. + * + * The provided scheme is not relevant at this point, + * but the developer must avoid calling this method + * with a path of a different scheme. + * + *
+     * URI uri = URI.create("file://dataset/CT/001.dcm");
+     * StorageInputStream item = storagePlugin.get(uri);
+     * if (item != null) {
+     *      System.err.println("Item at " + dicomObj.getURI() + " is available");
+     * }
+     * 
+ * + * The default implementation calls {@linkplain #at} + * and returns the first item if its URI matches the location requested. + * Implementations may wish to override this method for performance reasons. + * + * @param location the URI of the item to retrieve + * @param parameters a variable list of extra retrieval parameters + * @return a storage item if it was found, null otherwise + * @see StorageInputStream + */ + default public StorageInputStream get(URI location, Object... parameters) { + for (StorageInputStream sis : this.at(location, parameters)) { + if (Objects.equals(sis.getURI(), location)) { + return sis; + } + // don't try any further + break; + } + return null; + } + /** * Stores a DICOM object into the storage. * @@ -104,6 +142,9 @@ public default boolean handles(URI location) { * can yield intermediate URIs representing other directories rather than * objects. * + * Directories can be distinguished from regular files + * by the presence of a trailing forward slash in the URI. + * * The provided scheme is not relevant at this point, but the developer * must avoid calling this method with a path of a different scheme. * From 27848ccb455151f53e8309f2637c0e6464ef4462 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Tue, 27 Aug 2024 17:38:24 +0100 Subject: [PATCH 7/9] [sdk] Undeprecate StorageInterface#handles (#695) --- .../ua/dicoogle/plugins/DefaultFileStoragePlugin.java | 11 ++--------- .../java/pt/ua/dicoogle/sdk/StorageInterface.java | 4 +++- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/dicoogle/src/main/java/pt/ua/dicoogle/plugins/DefaultFileStoragePlugin.java b/dicoogle/src/main/java/pt/ua/dicoogle/plugins/DefaultFileStoragePlugin.java index f9564461a..e2ceec73e 100644 --- a/dicoogle/src/main/java/pt/ua/dicoogle/plugins/DefaultFileStoragePlugin.java +++ b/dicoogle/src/main/java/pt/ua/dicoogle/plugins/DefaultFileStoragePlugin.java @@ -75,13 +75,6 @@ public String getScheme() { return defaultScheme; } - @Override - public boolean handles(URI location) { - if (location.getScheme() == null) - return true; - return location.getScheme().equals(defaultScheme); - } - private Iterator createIterator(URI location) { if (!handles(location)) { logger.error("Cannot Handle: " + location.toString()); @@ -122,7 +115,7 @@ public URI store(DicomInputStream inputStream, Object... args) throws IOExceptio @Override public void remove(URI location) { - if (!location.getScheme().equals(defaultScheme)) { + if (!handles(location)) { return; } @@ -135,7 +128,7 @@ public void remove(URI location) { @Override public Stream list(URI location) throws IOException { - if (!location.getScheme().equals(defaultScheme)) { + if (!handles(location)) { return Stream.empty(); } diff --git a/sdk/src/main/java/pt/ua/dicoogle/sdk/StorageInterface.java b/sdk/src/main/java/pt/ua/dicoogle/sdk/StorageInterface.java index fe26d0369..36b9c0735 100644 --- a/sdk/src/main/java/pt/ua/dicoogle/sdk/StorageInterface.java +++ b/sdk/src/main/java/pt/ua/dicoogle/sdk/StorageInterface.java @@ -47,10 +47,12 @@ public interface StorageInterface extends DicooglePlugin { /** * Checks whether the file in the given path can be handled by this storage plugin. * + * The default implementation checks that the given location URI + * has the exact same scheme as the scheme returned by {@link #getScheme()}. + * * @param location a URI containing a scheme to be verified * @return true if this storage plugin is in charge of URIs in the given form */ - @Deprecated public default boolean handles(URI location) { return Objects.equals(this.getScheme(), location.getScheme()); } From fab2732ec22d8ac8628a4215a53ae6509b4e6d7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Basti=C3=A3o=20Silva?= Date: Wed, 28 Aug 2024 11:39:33 +0100 Subject: [PATCH 8/9] Update CHANGELOG to 3.4.0 --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b1b898b39..ea7eaa598 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,17 @@ # CHANGELOG This document intends to keep track of the changes performed on the various releases of Dicoogle. +## 3.4 + +### 3.4.0 (2024-08-27) + +* New: Introduced bulk unindexing functionality for IndexerInterface. (#598) +* Enhancement: Added a kill switch for dead plugin sets in the core. (#686) +* Enhancement: Improved logging around indexing tasks. (#691) +* Enhancement: Added robustness to DIMGeneric against BodyPartThickness. (#694) +* Enhancement: Improved StorageInterface by adding a `get` method. (#660) +* Enhancement: Undeprecated StorageInterface#handles in the SDK. (#695) + ## 3.3 ### 3.3.6 (2024-04-24) From ad82398012c03d72fefc9d9bebbc4ee4d7279282 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Basti=C3=A3o=20Silva?= Date: Wed, 28 Aug 2024 11:39:48 +0100 Subject: [PATCH 9/9] Jump to 3.4.0 --- dicoogle/pom.xml | 2 +- pom.xml | 2 +- sdk/pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dicoogle/pom.xml b/dicoogle/pom.xml index 14727a262..6adf5ec49 100644 --- a/dicoogle/pom.xml +++ b/dicoogle/pom.xml @@ -10,7 +10,7 @@ pt.ua.ieeta dicoogle-all - 3.3.7-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/pom.xml b/pom.xml index 5c36973d9..cebb1a7a1 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 pt.ua.ieeta dicoogle-all - 3.3.7-SNAPSHOT + 3.4.0 pom dicoogle-all diff --git a/sdk/pom.xml b/sdk/pom.xml index 1f3f6100d..8bb056c04 100644 --- a/sdk/pom.xml +++ b/sdk/pom.xml @@ -10,7 +10,7 @@ pt.ua.ieeta dicoogle-all - 3.3.7-SNAPSHOT + 3.4.0 ../pom.xml