Skip to content

Commit

Permalink
Molecular-profile-counts-service (cBioPortal#10934)
Browse files Browse the repository at this point in the history
* add unit tests for molecular-profile-counts-service
* fix multiple-study scenario

Update method name getGenomicDataCounts -> getMolecularProfileSampleCounts
  • Loading branch information
alisman authored Aug 13, 2024
1 parent d62c6e5 commit 280dd58
Show file tree
Hide file tree
Showing 11 changed files with 172 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public interface StudyViewRepository {

List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilter studyViewFilter, List<String> filteredAttributes);

List<GenomicDataCount> getGenomicDataCounts(StudyViewFilter studyViewFilter);
List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilter studyViewFilter);

List<ClinicalAttribute> getClinicalAttributes();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
public interface StudyViewMapper {
List<Sample> getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters);

List<GenomicDataCount> getGenomicDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters);
List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters);

List<AlterationCountByGene> getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter,
boolean applyPatientIdFilters, AlterationFilterHelper alterationFilterHelper);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import org.cbioportal.web.parameter.StudyViewFilter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Repository;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -78,10 +80,33 @@ public List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilter studyViewFi
}

@Override
public List<GenomicDataCount> getGenomicDataCounts(StudyViewFilter studyViewFilter) {
public List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilter studyViewFilter) {
CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter);
return mapper.getGenomicDataCounts(studyViewFilter, categorizedClinicalDataCountFilter,
var sampleCounts = mapper.getMolecularProfileSampleCounts(studyViewFilter, categorizedClinicalDataCountFilter,
shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter));

Map<String, List<GenomicDataCount>> countsPerType = sampleCounts.stream()
.collect((Collectors.groupingBy(GenomicDataCount::getValue)));

// different cancer studies combined into one cohort will have separate molecular profiles
// of a given type (e.g. mutation). We need to merge the counts for these
// different profiles based on the type and choose a label
// this code just picks the first label, which assumes that the labels will match
// across studies.
List<GenomicDataCount> mergedCounts = new ArrayList<>();
for (Map.Entry<String,List<GenomicDataCount>> entry : countsPerType.entrySet()) {
var dc = new GenomicDataCount();
dc.setValue(entry.getKey());
// here just snatch the label of the first profile
dc.setLabel(entry.getValue().get(0).getLabel());
Integer sum = entry.getValue().stream()
.map(x -> x.getCount())
.collect(Collectors.summingInt(Integer::intValue));
dc.setCount(sum);
mergedCounts.add(dc);
}
return mergedCounts;

}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public interface StudyViewColumnarService {

List<ClinicalData> getSampleClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds);

List<GenomicDataCount> getGenomicDataCounts(StudyViewFilter studyViewFilter);
List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilter studyViewFilter);

List<ClinicalEventTypeCount> getClinicalEventTypeCounts(StudyViewFilter studyViewFilter);
PatientTreatmentReport getPatientTreatmentReport(StudyViewFilter studyViewFilter);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ public List<AlterationCountByGene> getMutatedGenes(StudyViewFilter studyViewFilt
}

@Override
public List<GenomicDataCount> getGenomicDataCounts(StudyViewFilter studyViewFilter) {
return studyViewRepository.getGenomicDataCounts(studyViewFilter);
public List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilter studyViewFilter) {
return studyViewRepository.getMolecularProfileSampleCounts(studyViewFilter);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ public ResponseEntity<List<GenomicDataCount>> fetchMolecularProfileSampleCounts(
)
{
return new ResponseEntity<List<GenomicDataCount>>(
studyViewColumnarService.getGenomicDataCounts(interceptedStudyViewFilter)
studyViewColumnarService.getMolecularProfileSampleCounts(interceptedStudyViewFilter)
, HttpStatus.OK);
}

Expand Down
3 changes: 2 additions & 1 deletion src/main/resources/db-scripts/clickhouse/clickhouse.sql
Original file line number Diff line number Diff line change
Expand Up @@ -282,4 +282,5 @@ OPTIMIZE TABLE gene_panel_to_gene_derived;
OPTIMIZE TABLE sample_derived;
OPTIMIZE TABLE genomic_event_derived;
OPTIMIZE TABLE clinical_data_derived;
OPTIMIZE TABLE clinical_event_derived;
OPTIMIZE TABLE clinical_event_derived;

Original file line number Diff line number Diff line change
Expand Up @@ -129,24 +129,24 @@
</select>

<!-- for /molecular-profile-sample-counts/fetch (returns GenomicDataCount) which will then be converted to clinicalDataCountItems -->
<select id="getGenomicDataCounts" resultType="org.cbioportal.model.GenomicDataCount">
--we need to derive the alteration type from the stable_id by removing cancer study id
--this should probaby be refactored at some point but we need to maintain api interface
SELECT replaceOne(genetic_profile.stable_id, concat(sample_derived.cancer_study_identifier,'_'), '') AS value,
genetic_profile.stable_id,
genetic_profile.name AS label,
count(sample_profile.genetic_profile_id) AS count FROM sample_profile
<select id="getMolecularProfileSampleCounts" resultType="org.cbioportal.model.GenomicDataCount">
--we need to derive the alteration type from the stable_id by removing cancer study id
--this should probaby be refactored at some point but we need to maintain api interface
SELECT replaceOne(genetic_profile.stable_id,
concat(sample_derived.cancer_study_identifier,'_'), '') AS value,
genetic_profile.name AS label,
count(sample_profile.genetic_profile_id) AS count
FROM sample_profile
LEFT JOIN sample_derived ON sample_profile.sample_id=sample_derived.internal_id
LEFT JOIN genetic_profile on sample_profile.genetic_profile_id = genetic_profile.genetic_profile_id
<where>
<where>
<include refid="applyStudyViewFilter">
<property name="filter_type" value="'SAMPLE_ID_ONLY'"/>
</include>
</where>
GROUP BY genetic_profile.stable_id, genetic_profile.name, sample_derived.cancer_study_identifier;
GROUP BY genetic_profile.stable_id, genetic_profile.name, sample_derived.cancer_study_identifier
</select>


<!-- for /sample-lists-counts/fetch (returns CaseListDataCount) -->
<select id="getCaseListDataCounts" resultType="org.cbioportal.model.CaseListDataCount">
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public static void beforeAll() {

@ClassRule
public static final ClickHouseContainer clickhouseContainer =
new ClickHouseContainer("clickhouse/clickhouse-server:22.6")
new ClickHouseContainer("clickhouse/clickhouse-server:24.5")
.withUsername("cbio_user")
.withPassword("P@ssword1")
.withClasspathResourceMapping("clickhouse_cgds.sql", "/docker-entrypoint-initdb.d/a_schema.sql",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package org.cbioportal.persistence.mybatisclickhouse;

import org.cbioportal.persistence.mybatisclickhouse.config.MyBatisConfig;
import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter;

import org.cbioportal.web.parameter.StudyViewFilter;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.jdbc.AutoConfigureTestDatabase;
import org.springframework.boot.test.autoconfigure.orm.jpa.DataJpaTest;
import org.springframework.context.annotation.Import;
import org.springframework.test.annotation.DirtiesContext;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringRunner;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static org.junit.Assert.assertEquals;

@RunWith(SpringRunner.class)
@Import(MyBatisConfig.class)
@DataJpaTest
@DirtiesContext
@AutoConfigureTestDatabase(replace= AutoConfigureTestDatabase.Replace.NONE)
@ContextConfiguration(initializers = AbstractTestcontainers.Initializer.class)
public class MolecularProfileCountTest extends AbstractTestcontainers {

private static final String STUDY_TCGA_PUB = "study_tcga_pub";
private static final String STUDY_ACC_TCGA = "acc_tcga";

@Autowired
private StudyViewMapper studyViewMapper;

@Test
public void getMolecularProfileCounts() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB));

var profiles = new ArrayList<String>(Arrays.asList("mutations"));
var profileGroups = new ArrayList<List<String>>(Arrays.asList(profiles));

studyViewFilter.setGenomicProfiles(profileGroups);

var molecularProfileCounts = studyViewMapper.getMolecularProfileSampleCounts(studyViewFilter,
CategorizedClinicalDataCountFilter.getBuilder().build(), false );

var size = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mutations"))
.findFirst().get().getCount().intValue();
assertEquals(10, size);

}

@Test
public void getMolecularProfileCountsMultipleStudies() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB, STUDY_ACC_TCGA));

var profiles = new ArrayList<String>(Arrays.asList("mutations"));
var profileGroups = new ArrayList<List<String>>(Arrays.asList(profiles));

studyViewFilter.setGenomicProfiles(profileGroups);

var molecularProfileCounts = studyViewMapper.getMolecularProfileSampleCounts(studyViewFilter,
CategorizedClinicalDataCountFilter.getBuilder().build(), false );

var size = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mutations"))
.findFirst().get().getCount().intValue();
assertEquals(10, size);

}

@Test
public void getMolecularProfileCountsMultipleProfilesUnion() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB));

var profiles = new ArrayList<String>(Arrays.asList("mutations","mrna"));
var profileGroups = new ArrayList<List<String>>(Arrays.asList(profiles));

studyViewFilter.setGenomicProfiles(profileGroups);

var molecularProfileCounts = studyViewMapper.getMolecularProfileSampleCounts(studyViewFilter,
CategorizedClinicalDataCountFilter.getBuilder().build(), false );

var sizeMutations = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mutations"))
.findFirst().get().getCount().intValue();
assertEquals(10, sizeMutations);

var sizeMrna = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mrna"))
.findFirst().get().getCount().intValue();
assertEquals(9, sizeMrna);

}

@Test
public void getMolecularProfileCountsMultipleProfilesIntersect() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB));

var profile1 = new ArrayList<String>(Arrays.asList("mutations"));
var profile2 = new ArrayList<String>(Arrays.asList("mrna"));
var profileGroups = new ArrayList<List<String>>(Arrays.asList(profile1, profile2));

studyViewFilter.setGenomicProfiles(profileGroups);

var molecularProfileCounts = studyViewMapper.getMolecularProfileSampleCounts(studyViewFilter,
CategorizedClinicalDataCountFilter.getBuilder().build(), false );

var sizeMutations = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mutations"))
.findFirst().get().getCount().intValue();
assertEquals(9, sizeMutations);



}




}
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
import org.springframework.boot.test.autoconfigure.jdbc.AutoConfigureTestDatabase;
import org.springframework.boot.test.autoconfigure.orm.jpa.DataJpaTest;
import org.springframework.context.annotation.Import;
import org.springframework.test.annotation.DirtiesContext;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringRunner;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
Expand All @@ -37,6 +39,7 @@
@RunWith(SpringRunner.class)
@Import(MyBatisConfig.class)
@DataJpaTest
@DirtiesContext
@AutoConfigureTestDatabase(replace= AutoConfigureTestDatabase.Replace.NONE)
@ContextConfiguration(initializers = AbstractTestcontainers.Initializer.class)
public class StudyViewMapperTest extends AbstractTestcontainers {
Expand Down

0 comments on commit 280dd58

Please sign in to comment.