Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Demo comparison speed #11322

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@
<version>${apache_httpclient.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
Expand All @@ -363,6 +363,9 @@
to http for smaller jar -->
<classifier>all</classifier>
</dependency>



<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>clickhouse</artifactId>
Expand Down
41 changes: 41 additions & 0 deletions src/main/java/org/cbioportal/model/SampleToPanel.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package org.cbioportal.model;

import jakarta.validation.constraints.NotNull;

import java.io.Serializable;

public class SampleToPanel implements Serializable {


@NotNull
private String sampleUniqueId;
@NotNull
private String genePanelId;
private String geneticProfileId;

public String getSampleUniqueId() {
return sampleUniqueId;
}

public void setSampleUniqueId(String sampleUniqueId) {
this.sampleUniqueId = sampleUniqueId;
}

public String getGenePanelId() {
return genePanelId;
}

public void setGenePanelId(String genePanelId) {
this.genePanelId = genePanelId;
}

public String getGeneticProfileId() {
return geneticProfileId;
}

public void setGeneticProfileId(String geneticProfileId) {
this.geneticProfileId = geneticProfileId;
}


}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.cbioportal.persistence;

import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.AlterationEnrichment;
import org.cbioportal.model.ClinicalAttribute;
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalData;
Expand All @@ -22,6 +23,7 @@
import org.cbioportal.web.parameter.GenomicDataBinFilter;
import org.cbioportal.web.parameter.GenomicDataFilter;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand Down Expand Up @@ -89,4 +91,6 @@ public interface StudyViewRepository {
List<MolecularProfile> getGenericAssayProfiles();

List<MolecularProfile> getFilteredMolecularProfilesByAlterationType(StudyViewFilterContext studyViewFilterContext, String alterationType);

HashMap<String, AlterationCountByGene> getAlterationEnrichmentCounts(List<String> sampleStableIds);
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.cbioportal.model.PatientTreatment;
import org.cbioportal.model.MolecularProfile;
import org.cbioportal.model.Sample;
import org.cbioportal.model.SampleToPanel;
import org.cbioportal.model.SampleTreatment;
import org.cbioportal.persistence.helper.AlterationFilterHelper;
import org.cbioportal.persistence.helper.StudyViewFilterHelper;
Expand Down Expand Up @@ -87,4 +88,12 @@ public interface StudyViewMapper {
List<MolecularProfile> getGenericAssayProfiles();

List<MolecularProfile> getFilteredMolecularProfilesByAlterationType(StudyViewFilterHelper studyViewFilterHelper, String alterationType);

List<AlterationCountByGene> getAlterationEnrichmentCounts(String samplelist);

List<SampleToPanel> getSampleToGenePanels(String samplelist);

List<GenePanelToGene> getGenePanelGenes();


}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.cbioportal.persistence.mybatisclickhouse;
import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.AlterationEnrichment;
import org.cbioportal.model.ClinicalAttribute;
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalData;
Expand All @@ -14,6 +15,7 @@
import org.cbioportal.model.MolecularProfile;
import org.cbioportal.model.PatientTreatment;
import org.cbioportal.model.Sample;
import org.cbioportal.model.SampleToPanel;
import org.cbioportal.model.SampleTreatment;
import org.cbioportal.model.StudyViewFilterContext;
import org.cbioportal.persistence.StudyViewRepository;
Expand All @@ -28,15 +30,20 @@
import org.cbioportal.web.parameter.GenomicDataBinFilter;
import org.cbioportal.web.parameter.GenomicDataFilter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.stereotype.Repository;
import com.clickhouse.client.*;

import java.util.Arrays;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

@Repository
@ConditionalOnProperty(name = "clickhouse_mode", havingValue = "true")
Expand Down Expand Up @@ -260,6 +267,157 @@ public List<GenericAssayDataCountItem> getGenericAssayDataCounts(StudyViewFilter
return mapper.getGenericAssayDataCounts(createStudyViewFilterHelper(studyViewFilterContext), genericAssayDataFilters);
}

private Map<String, Map<String, GenePanelToGene>> _data = null;

public Map<String, Map<String, GenePanelToGene>> getGenePanelsToGenes(String str){

if (_data == null) {
List<GenePanelToGene> genesWithPanels = mapper.getGenePanelGenes();
Map<String, Map<String, GenePanelToGene>> panelsToGeneMaps = genesWithPanels.stream()
.collect(Collectors.groupingBy(
GenePanelToGene::getGenePanelId,
Collectors.toMap(
GenePanelToGene::getHugoGeneSymbol,
panelGene -> panelGene,
(existing, replacement) -> existing // handle duplicates by keeping the existing entry
)
));

_data = panelsToGeneMaps;

}
return _data;
}

// private doIt(){
// Map<String, Map<String, Integer>> alteredGenesWithCounts = new HashMap<>();
//
// for (Map<String, Object> alteration : alterations) {
// String hugoGeneSymbol = (String) alteration.get("hugo_gene_symbol");
// int count = Integer.parseInt(alteration.get("count").toString());
//
// if (!alteredGenesWithCounts.containsKey(hugoGeneSymbol)) {
// alteredGenesWithCounts.put(hugoGeneSymbol, new HashMap<>());
// alteredGenesWithCounts.get(hugoGeneSymbol).put("count", 0);
// }
//
// alteredGenesWithCounts.get(hugoGeneSymbol).put("count",
// alteredGenesWithCounts.get(hugoGeneSymbol).get("count") + count);
// }
//
// }


@Cacheable(cacheResolver = "generalRepositoryCacheResolver", condition = "@cacheEnabledConfig.getEnabled()")
public List<SampleToPanel> getSampleToGenePanels(List<String> sampleStableIds) {

//System.out.println("Console is: " + ping);

return mapper.getSampleToGenePanels(
sampleStableIds.stream().map(s->"'"+s+"'").collect(Collectors.joining(","))
);
}

@Cacheable(cacheResolver = "generalRepositoryCacheResolver", condition = "@cacheEnabledConfig.getEnabled()")
public List<AlterationCountByGene> getAlterationEnrichmentCountsCached(List<String>sampleStableIds){
//return mapper.getAlterationEnrichmentCounts(sampleStableIds.toArray(String[]::new));


return mapper.getAlterationEnrichmentCounts(
sampleStableIds.stream().map(s->"'"+s+"'").collect(Collectors.joining(","))
);

}

@Override
public HashMap<String, AlterationCountByGene> getAlterationEnrichmentCounts(List<String> sampleStableIds) {

// we need a map of panels to genes which are profiled by them
var panelToGeneMap = getGenePanelsToGenes("help");

List<SampleToPanel> sampleToGenePanels = getSampleToGenePanels(sampleStableIds);
// group the panels by the sample ids which they are associated with
// this tells us for each sample, what gene panels were applied
var samplesToPanelMap = sampleToGenePanels.stream()
.collect(Collectors.groupingBy(
SampleToPanel::getSampleUniqueId,
Collectors.mapping(e->e.getGenePanelId(), Collectors.toSet())
)
);


// many of the samples are governed by the same combination of panels
// we want to group the samples by a key that represents the set of panels applied
Map<String, List<String>> clumps = samplesToPanelMap.keySet().stream().collect(Collectors.groupingBy(
sampleId->samplesToPanelMap.get(sampleId).stream().collect(Collectors.joining(","))
));


var alterationCounts = getAlterationEnrichmentCountsCached(sampleStableIds);

HashMap<String, AlterationCountByGene> alteredGenesWithCounts = new HashMap();

// we need map of genes to alteration counts
alterationCounts.stream().forEach((alterationCountByGene) -> {
String hugoGeneSymbol = alterationCountByGene.getHugoGeneSymbol();
int count = alterationCountByGene.getNumberOfAlteredCases();
if (!alteredGenesWithCounts.containsKey(hugoGeneSymbol)) {
var acg = new AlterationCountByGene();
acg.setHugoGeneSymbol(hugoGeneSymbol);
acg.setNumberOfAlteredCases(0);
alteredGenesWithCounts.put(hugoGeneSymbol,acg);
}
// add the count to existing tally
alteredGenesWithCounts.get(hugoGeneSymbol).setNumberOfAlteredCases(
count + alteredGenesWithCounts.get(hugoGeneSymbol).getNumberOfAlteredCases()
);

});

var geneCount = new HashMap<String,AlterationCountByGene>();

clumps.entrySet().stream().forEach(entry->{

var geneLists = Arrays.stream(entry.getKey().split(","))
.map(panelId -> panelToGeneMap.get(panelId))
.collect(Collectors.toList());

Set<String> mergeGenes = geneLists.stream()
.map(Map::keySet)
.reduce((set1, set2) -> {
set1.retainAll(set2);
return set1;
}).orElse(Collections.emptySet());

mergeGenes.stream().forEach(
gene->{
if (geneCount.containsKey(gene)) {
var count = geneCount.get(gene);
count.setNumberOfProfiledCases(count.getNumberOfProfiledCases() + entry.getValue().size());
} else {
var alterationCountByGene = new AlterationCountByGene();
alterationCountByGene.setHugoGeneSymbol(gene);
alterationCountByGene.setNumberOfProfiledCases(entry.getValue().size());
alterationCountByGene.setNumberOfAlteredCases(0);
geneCount.put(gene,alterationCountByGene);
}
});

});

geneCount.entrySet().stream().forEach(
n->{
if (alteredGenesWithCounts.containsKey(n.getKey())) {
n.getValue().setNumberOfAlteredCases(
alteredGenesWithCounts.get(n.getKey()).getNumberOfAlteredCases()
);
}
}
);

return geneCount;
}

public Map<String, Integer> getMutationCounts(StudyViewFilterContext studyViewFilterContext, GenomicDataFilter genomicDataFilter) {
return mapper.getMutationCounts(createStudyViewFilterHelper(studyViewFilterContext), genomicDataFilter);
}
Expand All @@ -268,4 +426,6 @@ public List<GenomicDataCountItem> getMutationCountsByType(StudyViewFilterContext
return mapper.getMutationCountsByType(createStudyViewFilterHelper(studyViewFilterContext), genomicDataFilters);
}



}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.cbioportal.service;

import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.AlterationEnrichment;
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCountItem;
Expand All @@ -20,6 +21,7 @@
import org.cbioportal.web.parameter.GenomicDataFilter;
import org.cbioportal.web.parameter.StudyViewFilter;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -58,4 +60,6 @@ public interface StudyViewColumnarService {
List<ClinicalDataCountItem> getGenericAssayDataBinCounts(StudyViewFilter studyViewFilter, List<GenericAssayDataBinFilter> genericAssayDataBinFilters);

List<GenomicDataCountItem> getMutationTypeCountsByGeneSpecific(StudyViewFilter studyViewFilter, List<GenomicDataFilter> genomicDataFilters);

HashMap<String, AlterationCountByGene> getAlterationEnrichmentCounts(List<String> sampleStableIds);
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,11 @@ public List<ReferenceGenomeGene> fetchGenes(String genomeName) {

private boolean allTablesUpToDate(Date expiration) {
Map<String, Date> timestamps = timestampService.getTimestampsAsDates(TABLES);
return TABLES.stream()
.map((table) -> timestamps.containsKey(table) && timestamps.get(table).before(expiration))
.reduce((all, next) -> all && next)
.orElse(false);
return true;
// TABLES.stream()
// .map((table) -> timestamps.containsKey(table) && timestamps.get(table).before(expiration))
// .reduce((all, next) -> all && next)
// .orElse(false);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.cbioportal.service.impl;

import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.AlterationEnrichment;
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalAttribute;
import org.cbioportal.model.ClinicalData;
Expand Down Expand Up @@ -35,6 +36,7 @@
import org.springframework.stereotype.Service;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -272,7 +274,11 @@ public List<GenomicDataCountItem> getMutationCountsByGeneSpecific(StudyViewFilte
public List<GenomicDataCountItem> getMutationTypeCountsByGeneSpecific(StudyViewFilter studyViewFilter, List<GenomicDataFilter> genomicDataFilters) {
return studyViewRepository.getMutationCountsByType(createContext(studyViewFilter), genomicDataFilters);
}



public HashMap<String, AlterationCountByGene> getAlterationEnrichmentCounts(List<String> sampleStableIds) {
return studyViewRepository.getAlterationEnrichmentCounts(sampleStableIds);
}

private StudyViewFilterContext createContext(StudyViewFilter studyViewFilter) {
List<CustomSampleIdentifier> customSampleIdentifiers = customDataFilterUtil.extractCustomDataSamples(studyViewFilter);
Expand All @@ -289,7 +295,7 @@ private List<ClinicalDataCountItem> generateDataCountItemsFromDataCounts(List<Cl
return item;
}).toList();
}



}
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,11 @@ public List<GenomicDataCount> getGenomicDataCounts(List<String> studyIds, List<S
.collect(Collectors.toMap(MolecularProfile::getStableId, Function.identity()));

// get gene panels
Map<String, Integer> molecularProfileCaseCountSet = genePanelService
.fetchGenePanelDataInMultipleMolecularProfiles(molecularProfileSampleIdentifiers)
var moo = genePanelService
.fetchGenePanelDataInMultipleMolecularProfiles(molecularProfileSampleIdentifiers);


Map<String, Integer> molecularProfileCaseCountSet = moo
.stream()
.filter(GenePanelData::getProfiled)
.collect(Collectors.groupingBy(GenePanelData::getMolecularProfileId))
Expand Down
Loading
Loading