Skip to content

Commit

Permalink
set limits for exported matches
Browse files Browse the repository at this point in the history
  • Loading branch information
Cristian Goina committed Nov 27, 2024
1 parent 283d737 commit 90e2e9e
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@
* @param <T>
*/
public class ResultMatches<M extends AbstractNeuronMetadata, R extends AbstractMatchedTarget<? extends AbstractNeuronMetadata>> extends GroupedItems<M, R> {
public ResultMatches() {
}

public ResultMatches(M key, List<R> items) {
setKey(key);
setItems(items);
}

@JsonProperty("inputImage")
@Override
public M getKey() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,13 @@ static class ExportMatchesCmdArgs extends AbstractCmdArgs {
variableArity = true)
List<MultiKeyValueArg> imageStoresPerMetadata = new ArrayList<>();

@Parameter(names = {"--max-matches-with-same-name-per-mip"},
description = "Maximum number of matches per MIP that has the same published name. This will limit the number of matches with the same line name per MIP.")
int maxMatchedNamesPerMIP = 0;

@Parameter(names = {"--max-matches-per-mip"}, description = "Maximum number of matches per mip")
int maxMatchesPerMIP = 0;

ExportMatchesCmdArgs(CommonArgs commonArgs) {
super(commonArgs);
}
Expand Down Expand Up @@ -298,7 +305,9 @@ private DataExporter getDataExporter() {
),
daosProvider.getNeuronMetadataDao(),
itemsWriter,
args.processingPartitionSize
args.processingPartitionSize,
args.maxMatchesPerMIP,
args.maxMatchedNamesPerMIP
);
case LM_CD_MATCHES:
return new LMCDMatchesExporter(
Expand All @@ -322,7 +331,9 @@ private DataExporter getDataExporter() {
),
daosProvider.getNeuronMetadataDao(),
itemsWriter,
args.processingPartitionSize
args.processingPartitionSize,
args.maxMatchesPerMIP,
args.maxMatchedNamesPerMIP
);
case EM_PPP_MATCHES:
return new EMPPPMatchesExporter(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import java.io.File;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand All @@ -15,7 +16,6 @@
import org.apache.commons.lang3.RegExUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.janelia.colormipsearch.cmd.jacsdata.CachedDataHelper;
import org.janelia.colormipsearch.dao.NeuronMetadataDao;
import org.janelia.colormipsearch.dataio.DataSourceParam;
Expand Down Expand Up @@ -48,6 +48,8 @@ public abstract class AbstractCDMatchesExporter extends AbstractDataExporter {
final NeuronMetadataDao<AbstractNeuronEntity> neuronMetadataDao;
final ItemsWriterToJSONFile resultMatchesWriter;
final int processingPartitionSize;
final int maxMatchedTargets;
final int maxMatchesWithSameNamePerMIP;

protected AbstractCDMatchesExporter(CachedDataHelper jacsDataHelper,
DataSourceParam dataSourceParam,
Expand All @@ -65,7 +67,9 @@ protected AbstractCDMatchesExporter(CachedDataHelper jacsDataHelper,
NeuronMatchesReader<CDMatchEntity<? extends AbstractNeuronEntity, ? extends AbstractNeuronEntity>> neuronMatchesReader,
NeuronMetadataDao<AbstractNeuronEntity> neuronMetadataDao,
ItemsWriterToJSONFile resultMatchesWriter,
int processingPartitionSize) {
int processingPartitionSize,
int maxMatchedTargets,
int maxMatchesWithSameNamePerMIP) {
super(jacsDataHelper, dataSourceParam, urlTransformer, imageStoreMapping, outputDir, executor);
this.targetLibraries = targetLibraries;
this.targetTags = targetTags;
Expand All @@ -78,9 +82,11 @@ protected AbstractCDMatchesExporter(CachedDataHelper jacsDataHelper,
this.neuronMetadataDao = neuronMetadataDao;
this.resultMatchesWriter = resultMatchesWriter;
this.processingPartitionSize = processingPartitionSize;
this.maxMatchedTargets = maxMatchedTargets;
this.maxMatchesWithSameNamePerMIP = maxMatchesWithSameNamePerMIP;
}

void retrieveAllCDMIPs(List<CDMatchEntity<? extends AbstractNeuronEntity, ? extends AbstractNeuronEntity>> matches) {
void retrieveAllCDMIPs(List<CDMatchEntity<AbstractNeuronEntity, AbstractNeuronEntity>> matches) {
// retrieve source ColorDepth MIPs
Set<String> sourceMIPIds = matches.stream()
.flatMap(m -> Stream.of(m.getMaskMIPId(), m.getMatchedMIPId()))
Expand All @@ -92,15 +98,48 @@ void retrieveAllCDMIPs(List<CDMatchEntity<? extends AbstractNeuronEntity, ? exte
/**
* Select the best matches for each pair of mip IDs
*/
<M extends AbstractNeuronEntity, T extends AbstractNeuronEntity> List<CDMatchEntity<? extends AbstractNeuronEntity, ? extends AbstractNeuronEntity>> selectBestMatchPerMIPPair(
List<CDMatchEntity<? extends AbstractNeuronEntity, ? extends AbstractNeuronEntity>> cdMatchEntities) {
Map<Pair<String, String>, CDMatchEntity<? extends AbstractNeuronEntity, ? extends AbstractNeuronEntity>> bestMatchesPerMIPsPairs = cdMatchEntities.stream()
<M extends AbstractNeuronEntity, T extends AbstractNeuronEntity>
List<CDMatchEntity<M, T>> selectBestMatchPerMIPPair(List<CDMatchEntity<? extends M, ? extends T>> cdMatchEntities) {
// one mask MIP ID may have multiple matches with the same target MIP ID
// here we only keep the best target MIP ID for the mask MIP ID
return cdMatchEntities.stream()
.filter(this::doesNotLookSuspicious)
.collect(Collectors.toMap(
m -> ImmutablePair.of(m.getMaskMIPId(), m.getMatchedMIPId()),
m -> m,
(m1, m2) -> m1.getNormalizedScore() >= m2.getNormalizedScore() ? m1 : m2)); // resolve by picking the best match
return new ArrayList<>(bestMatchesPerMIPsPairs.values());
.collect(Collectors.collectingAndThen(
Collectors.toMap(
m -> ImmutablePair.of(m.getMaskMIPId(), m.getMatchedMIPId()),
m -> (CDMatchEntity<M, T>)m,
// resolve the conflict by picking the best match
(m1, m2) -> m1.getNormalizedScore() >= m2.getNormalizedScore() ? m1 : m2),
m -> limitMatches(m.values())
));
}

private <M extends AbstractNeuronEntity, T extends AbstractNeuronEntity> List<CDMatchEntity<M, T>> limitMatches(
Collection<CDMatchEntity<M, T>> cdMatchEntites) {
// order descending by normalized score
Comparator<CDMatchEntity<M, T>> ordering = Comparator.comparingDouble(m -> -m.getNormalizedScore());
List<CDMatchEntity<M,T>> results = cdMatchEntites.stream()
.collect(Collectors.groupingBy(
m -> ImmutablePair.of(m.getMaskMIPId(), m.getMatchedImage().getPublishedName()),
Collectors.collectingAndThen(
Collectors.toList(),
l -> {
if (maxMatchesWithSameNamePerMIP > 0 && maxMatchesWithSameNamePerMIP < l.size()) {
l.sort(ordering);
return l.subList(0, maxMatchesWithSameNamePerMIP);
} else {
return l;
}
}
)
)).entrySet().stream()
.flatMap(e -> e.getValue().stream())
.sorted(ordering)
.collect(Collectors.toList());
if (maxMatchedTargets > 0 && results.size() > maxMatchedTargets) {
results.sort(ordering);
return results.subList(0, maxMatchedTargets);
} else return results;
}

/**
Expand All @@ -123,43 +162,43 @@ private boolean doesNotLookSuspicious(CDMatchEntity<? extends AbstractNeuronEnti
resultMatches.getKey().transformAllNeuronFiles(this::relativizeURL);
String maskImageStore = resultMatches.getKey().getNeuronFile(FileType.store);
resultMatches.getItems()
.forEach(target -> {
updateTargetMatchMethod.accept(target.getTargetImage(), publisheURLsByNeuronId.get(target.getTargetImage().getInternalId()));
target.getTargetImage().transformAllNeuronFiles(this::relativizeURL);
// update match files - ideally we get these from PublishedURLs but
// if they are not there we try to create the searchable URL based on the input name and ColorDepthMIP name
NeuronPublishedURLs maskPublishedURLs = publisheURLsByNeuronId.get(target.getMaskImageInternalId());
String maskImageURL = getSearchableNeuronURL(maskPublishedURLs);
if (maskImageURL == null) {
// we used to construct the path to the PNG of the input (searchable_png) from the corresponding input mip,
// but we are no longer doing that we expect this to be uploaded and its URL "published" in the proper collection
LOG.error("No published URLs or no searchable neuron URL for match {} mask {}:{} -> {}",
target.getMatchInternalId(),
target.getMaskImageInternalId(), resultMatches.getKey(), target);
target.setMatchFile(FileType.CDMInput, null);
} else {
target.setMatchFile(FileType.CDMInput, relativizeURL(FileType.CDMInput, maskImageURL));
}
NeuronPublishedURLs targetPublishedURLs = publisheURLsByNeuronId.get(target.getTargetImage().getInternalId());
String targetImageStore = target.getTargetImage().getNeuronFile(FileType.store);
String tagetImageURL = getSearchableNeuronURL(targetPublishedURLs);
if (tagetImageURL == null) {
// we used to construct the path to the PNG of the input (searchable_png) from the corresponding input mip,
// but we are no longer doing that we expect this to be uploaded and its URL "published" in the proper collection
LOG.error("No published URLs or no searchable neuron URL for match {} target {}:{} -> {}",
target.getMatchInternalId(),
target.getTargetImage().getInternalId(), target.getTargetImage(), target);
target.setMatchFile(FileType.CDMMatch, null);
} else {
target.setMatchFile(FileType.CDMMatch, relativizeURL(FileType.CDMMatch, tagetImageURL));
}
if (!StringUtils.equals(maskImageStore, targetImageStore)) {
LOG.error("Image stores for mask {} and target {} do not match - this will become a problem when viewing this match",
maskImageStore, targetImageStore);
} else {
target.setMatchFile(FileType.store, targetImageStore);
}
});
.forEach(target -> {
updateTargetMatchMethod.accept(target.getTargetImage(), publisheURLsByNeuronId.get(target.getTargetImage().getInternalId()));
target.getTargetImage().transformAllNeuronFiles(this::relativizeURL);
// update match files - ideally we get these from PublishedURLs but
// if they are not there we try to create the searchable URL based on the input name and ColorDepthMIP name
NeuronPublishedURLs maskPublishedURLs = publisheURLsByNeuronId.get(target.getMaskImageInternalId());
String maskImageURL = getSearchableNeuronURL(maskPublishedURLs);
if (maskImageURL == null) {
// we used to construct the path to the PNG of the input (searchable_png) from the corresponding input mip,
// but we are no longer doing that we expect this to be uploaded and its URL "published" in the proper collection
LOG.error("No published URLs or no searchable neuron URL for match {} mask {}:{} -> {}",
target.getMatchInternalId(),
target.getMaskImageInternalId(), resultMatches.getKey(), target);
target.setMatchFile(FileType.CDMInput, null);
} else {
target.setMatchFile(FileType.CDMInput, relativizeURL(FileType.CDMInput, maskImageURL));
}
NeuronPublishedURLs targetPublishedURLs = publisheURLsByNeuronId.get(target.getTargetImage().getInternalId());
String targetImageStore = target.getTargetImage().getNeuronFile(FileType.store);
String tagetImageURL = getSearchableNeuronURL(targetPublishedURLs);
if (tagetImageURL == null) {
// we used to construct the path to the PNG of the input (searchable_png) from the corresponding input mip,
// but we are no longer doing that we expect this to be uploaded and its URL "published" in the proper collection
LOG.error("No published URLs or no searchable neuron URL for match {} target {}:{} -> {}",
target.getMatchInternalId(),
target.getTargetImage().getInternalId(), target.getTargetImage(), target);
target.setMatchFile(FileType.CDMMatch, null);
} else {
target.setMatchFile(FileType.CDMMatch, relativizeURL(FileType.CDMMatch, tagetImageURL));
}
if (!StringUtils.equals(maskImageStore, targetImageStore)) {
LOG.error("Image stores for mask {} and target {} do not match - this will become a problem when viewing this match",
maskImageStore, targetImageStore);
} else {
target.setMatchFile(FileType.store, targetImageStore);
}
});
}

private String getSearchableNeuronURL(NeuronPublishedURLs publishedURLs) {
Expand Down
Loading

0 comments on commit 90e2e9e

Please sign in to comment.