Skip to content

Commit

Permalink
add unit tests for clinical data counts sql
Browse files Browse the repository at this point in the history
  • Loading branch information
onursumer committed Aug 9, 2024
1 parent d62c6e5 commit 0a5dd3e
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.cbioportal.persistence.mybatisclickhouse;

import org.cbioportal.model.AlterationFilter;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.MutationEventType;
import org.cbioportal.model.TemporalRelation;
import org.cbioportal.persistence.helper.AlterationFilterHelper;
Expand All @@ -25,6 +26,7 @@
import org.springframework.test.context.junit4.SpringRunner;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand All @@ -43,6 +45,7 @@ public class StudyViewMapperTest extends AbstractTestcontainers {

private static final String STUDY_TCGA_PUB = "study_tcga_pub";
private static final String STUDY_ACC_TCGA = "acc_tcga";
private static final String STUDY_GENIE_PUB = "study_genie_pub";

@Autowired
private StudyViewMapper studyViewMapper;
Expand Down Expand Up @@ -234,4 +237,59 @@ public void getSampleTreatmentCounts() {

}

@Test
public void getClinicalDataCounts() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(STUDY_GENIE_PUB));

var numericalClinicalDataCounts = studyViewMapper.getClinicalDataCounts(
studyViewFilter,
CategorizedClinicalDataCountFilter.getBuilder().build(),
false,
List.of("mutation_count"),
Collections.emptyList()
);
assertEquals(6, numericalClinicalDataCounts.size());
assertEquals(1, findClinicaDataCount(numericalClinicalDataCounts, "11"));
assertEquals(1, findClinicaDataCount(numericalClinicalDataCounts, "6"));
assertEquals(2, findClinicaDataCount(numericalClinicalDataCounts, "4"));
assertEquals(4, findClinicaDataCount(numericalClinicalDataCounts, "2"));
assertEquals(2, findClinicaDataCount(numericalClinicalDataCounts, "1"));
// both empty string and 'NAN' count as NA
assertEquals(2, findClinicaDataCount(numericalClinicalDataCounts, "NA"));

var categoricalClinicalDataCounts = studyViewMapper.getClinicalDataCounts(
studyViewFilter,
CategorizedClinicalDataCountFilter.getBuilder().build(),
false,
List.of("center"),
Collections.emptyList()
);

assertEquals(7, categoricalClinicalDataCounts.size());
assertEquals(3, findClinicaDataCount(categoricalClinicalDataCounts, "msk"));
assertEquals(2, findClinicaDataCount(categoricalClinicalDataCounts, "dfci"));
assertEquals(2, findClinicaDataCount(categoricalClinicalDataCounts, "chop"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "mda"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "ohsu"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "ucsf"));
// both empty string and 'NA' count as NA
assertEquals(2, findClinicaDataCount(categoricalClinicalDataCounts, "NA"));

var combinedClinicalDataCounts = studyViewMapper.getClinicalDataCounts(
studyViewFilter,
CategorizedClinicalDataCountFilter.getBuilder().build(),
false,
List.of("mutation_count", "center"),
Collections.emptyList()
);

assertEquals(13, combinedClinicalDataCounts.size());
}

private int findClinicaDataCount(List<ClinicalDataCount> counts, String attrValue) {
var count = counts.stream().filter(c -> c.getValue().equals(attrValue)).findAny().orElse(null);

return count == null ? 0 : count.getCount();
}
}
53 changes: 52 additions & 1 deletion src/test/resources/clickhouse_data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ insert into `reference_genome` values (2, 'human', 'hg38', 'grch38', null, 'http

insert into cancer_study (cancer_study_id,cancer_study_identifier,type_of_cancer_id,name,description,public,pmid,citation,groups,status,import_date,reference_genome_id) values(1,'study_tcga_pub','brca','breast invasive carcinoma (tcga, nature 2012)','<a href=\"http://cancergenome.nih.gov/\">the cancer genome atlas (tcga)</a> breast invasive carcinoma project. 825 cases.<br><i>nature 2012.</i> <a href=\"http://tcga-data.nci.nih.gov/tcga/\">raw data via the tcga data portal</a>.',1,'23000897,26451490','tcga, nature 2012, ...','su2c-pi3k;public;gdac',0,'2011-12-18 13:17:17+00:00',1);
insert into cancer_study (cancer_study_id,cancer_study_identifier,type_of_cancer_id,name,description,public,pmid,citation,groups,status,import_date,reference_genome_id) values(2,'acc_tcga','acc','adrenocortical carcinoma (tcga, provisional)','tcga adrenocortical carcinoma; raw data at the <a href="https://tcga-data.nci.nih.gov/">nci</a>.',1,'23000897','tcga, nature 2012','su2c-pi3k;public;gdac',0,'2013-10-12 11:11:15+00:00',1);
insert into cancer_study (cancer_study_id,cancer_study_identifier,type_of_cancer_id,name,description,public,pmid,citation,groups,status,import_date,reference_genome_id) values(3,'study_genie_pub','mixed','GENIE Cohort public','GENIE public','1','','','GENIEPUB;GENIE',0,'2024-04-03 20:12:24.000000',1);

insert into cancer_study_tags (cancer_study_id,tags) values(1,'{"analyst": {"name": "jack", "email": "[email protected]"}, "load id": 35}');
insert into cancer_study_tags (cancer_study_id,tags) values(2,'{"load id": 36}');
Expand Down Expand Up @@ -102,6 +103,19 @@ insert into patient (internal_id,stable_id,cancer_study_id) values (15,'tcga-a1-
insert into patient (internal_id,stable_id,cancer_study_id) values (16,'tcga-a1-b0sp',2);
insert into patient (internal_id,stable_id,cancer_study_id) values (17,'tcga-a1-b0sq',2);
insert into patient (internal_id,stable_id,cancer_study_id) values (18,'tcga-a1-a0sb',2);
insert into patient (internal_id,stable_id,cancer_study_id) values (301,'GENIE-TEST-301',3);
insert into patient (internal_id,stable_id,cancer_study_id) values (302,'GENIE-TEST-302',3);
insert into patient (internal_id,stable_id,cancer_study_id) values (303,'GENIE-TEST-303',3);
insert into patient (internal_id,stable_id,cancer_study_id) values (304,'GENIE-TEST-304',3);
insert into patient (internal_id,stable_id,cancer_study_id) values (305,'GENIE-TEST-305',3);
insert into patient (internal_id,stable_id,cancer_study_id) values (306,'GENIE-TEST-306',3);
insert into patient (internal_id,stable_id,cancer_study_id) values (307,'GENIE-TEST-307',3);
insert into patient (internal_id,stable_id,cancer_study_id) values (308,'GENIE-TEST-308',3);
insert into patient (internal_id,stable_id,cancer_study_id) values (309,'GENIE-TEST-309',3);
insert into patient (internal_id,stable_id,cancer_study_id) values (310,'GENIE-TEST-310',3);
insert into patient (internal_id,stable_id,cancer_study_id) values (311,'GENIE-TEST-311',3);
insert into patient (internal_id,stable_id,cancer_study_id) values (312,'GENIE-TEST-312',3);

insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values(10,'1,2,3,4,5,6,7,8,9,10,11,');

insert into sample (internal_id,stable_id,sample_type,patient_id) values (1,'tcga-a1-a0sb-01','primary solid tumor',1);
Expand All @@ -123,6 +137,18 @@ insert into sample (internal_id,stable_id,sample_type,patient_id) values (16,'tc
insert into sample (internal_id,stable_id,sample_type,patient_id) values (17,'tcga-a1-b0sq-01','primary solid tumor',17);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (18,'tcga-a1-a0sb-02','primary solid tumor',1);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (19,'tcga-a1-a0sb-01','primary solid tumor',18);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (301,'GENIE-TEST-301-01','primary solid tumor',301);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (302,'GENIE-TEST-302-01','primary solid tumor',302);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (303,'GENIE-TEST-303-01','primary solid tumor',303);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (304,'GENIE-TEST-304-01','primary solid tumor',304);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (305,'GENIE-TEST-305-01','primary solid tumor',305);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (306,'GENIE-TEST-306-01','primary solid tumor',306);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (307,'GENIE-TEST-307-01','primary solid tumor',307);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (308,'GENIE-TEST-308-01','primary solid tumor',308);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (309,'GENIE-TEST-309-01','primary solid tumor',309);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (310,'GENIE-TEST-310-01','primary solid tumor',310);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (311,'GENIE-TEST-311-01','primary solid tumor',311);
insert into sample (internal_id,stable_id,sample_type,patient_id) values (312,'GENIE-TEST-312-01','primary solid tumor',312);


insert into mutation_event (mutation_event_id,entrez_gene_id,chr,start_position,end_position,reference_allele,tumor_seq_allele,protein_change,mutation_type,ncbi_build,strand,variant_type,db_snp_rs,db_snp_val_status,refseq_mrna_id,codon_change,uniprot_accession,protein_pos_start,protein_pos_end,canonical_transcript,keyword) values (2038,672,'17',41244748,41244748,'g','a','q934*','nonsense_mutation','37','+','snp','rs80357223','unknown','nm_007294','c.(2800-2802)cag>tag','p38398',934,934,1,'brca1 truncating');
Expand Down Expand Up @@ -336,6 +362,18 @@ insert into clinical_patient (internal_id,attr_id,attr_value) values (15,'dfs_st
insert into clinical_patient (internal_id,attr_id,attr_value) values (15,'os_months','12.3');
insert into clinical_patient (internal_id,attr_id,attr_value) values (15,'os_status','0:living');
insert into clinical_patient (internal_id,attr_id,attr_value) values (18,'retrospective_collection','no');
insert into clinical_patient (internal_id,attr_id,attr_value) values (301,'center','msk');
insert into clinical_patient (internal_id,attr_id,attr_value) values (302,'center','msk');
insert into clinical_patient (internal_id,attr_id,attr_value) values (303,'center','msk');
insert into clinical_patient (internal_id,attr_id,attr_value) values (304,'center','dfci');
insert into clinical_patient (internal_id,attr_id,attr_value) values (305,'center','dfci');
insert into clinical_patient (internal_id,attr_id,attr_value) values (306,'center','chop');
insert into clinical_patient (internal_id,attr_id,attr_value) values (307,'center','chop');
insert into clinical_patient (internal_id,attr_id,attr_value) values (308,'center','mda');
insert into clinical_patient (internal_id,attr_id,attr_value) values (309,'center','ohsu');
insert into clinical_patient (internal_id,attr_id,attr_value) values (310,'center','ucsf');
insert into clinical_patient (internal_id,attr_id,attr_value) values (311,'center','NA');
insert into clinical_patient (internal_id,attr_id,attr_value) values (312,'center','');

insert into clinical_sample (internal_id,attr_id,attr_value) values (1,'other_sample_id','5c631ce8-f96a-4c35-a459-556fc4ab21e1');
insert into clinical_sample (internal_id,attr_id,attr_value) values (1,'days_to_collection','276');
Expand All @@ -348,7 +386,18 @@ insert into clinical_sample (internal_id,attr_id,attr_value) values (2,'sample_t
insert into clinical_sample (internal_id,attr_id,attr_value) values (15,'other_sample_id','91e7f41c-17b3-4724-96ef-d3c207b964e1');
insert into clinical_sample (internal_id,attr_id,attr_value) values (15,'days_to_collection','111');
insert into clinical_sample (internal_id,attr_id,attr_value) values (19,'days_to_collection','111');

insert into clinical_sample (internal_id,attr_id,attr_value) values (301,'mutation_count','11');
insert into clinical_sample (internal_id,attr_id,attr_value) values (302,'mutation_count','6');
insert into clinical_sample (internal_id,attr_id,attr_value) values (303,'mutation_count','4');
insert into clinical_sample (internal_id,attr_id,attr_value) values (304,'mutation_count','4');
insert into clinical_sample (internal_id,attr_id,attr_value) values (305,'mutation_count','2');
insert into clinical_sample (internal_id,attr_id,attr_value) values (306,'mutation_count','2');
insert into clinical_sample (internal_id,attr_id,attr_value) values (307,'mutation_count','2');
insert into clinical_sample (internal_id,attr_id,attr_value) values (308,'mutation_count','2');
insert into clinical_sample (internal_id,attr_id,attr_value) values (309,'mutation_count','1');
insert into clinical_sample (internal_id,attr_id,attr_value) values (310,'mutation_count','1');
insert into clinical_sample (internal_id,attr_id,attr_value) values (311,'mutation_count','NAN');
insert into clinical_sample (internal_id,attr_id,attr_value) values (312,'mutation_count','');

insert into clinical_attribute_meta (attr_id,display_name,description,datatype,patient_attribute,priority,cancer_study_id) values ('retrospective_collection','tissue retrospective collection indicator','text indicator for the time frame of tissue procurement,indicating that the tissue was obtained and stored prior to the initiation of the project.','string',1,'1',1);
insert into clinical_attribute_meta (attr_id,display_name,description,datatype,patient_attribute,priority,cancer_study_id) values ('prospective_collection','tissue prospective collection indicator','text indicator for the time frame of tissue procurement,indicating that the tissue was procured in parallel to the project.','string',1,'1',1);
Expand Down Expand Up @@ -378,6 +427,8 @@ insert into clinical_attribute_meta (attr_id,display_name,description,datatype,p
insert into clinical_attribute_meta (attr_id,display_name,description,datatype,patient_attribute,priority,cancer_study_id) values ('oct_embedded','oct embedded','oct embedded','string',0,'1',2);
insert into clinical_attribute_meta (attr_id,display_name,description,datatype,patient_attribute,priority,cancer_study_id) values ('pathology_report_file_name','pathology report file name','pathology report file name','string',0,'1',2);
insert into clinical_attribute_meta (attr_id,display_name,description,datatype,patient_attribute,priority,cancer_study_id) values ('sample_type','sample type','the type of sample (i.e.,normal,primary,met,recurrence).','string',0,'1',2);
insert into clinical_attribute_meta (attr_id,display_name,description,datatype,patient_attribute,priority,cancer_study_id) values ('mutation_count','mutaiton count','mutation count','number',0,'30',3);
insert into clinical_attribute_meta (attr_id,display_name,description,datatype,patient_attribute,priority,cancer_study_id) values ('center','center','center of sequencing','string',1,'1',3);

-- add genes, genetic entities and structural variants for structural_variant
insert into genetic_entity (id,entity_type) values(21,'gene');
Expand Down

0 comments on commit 0a5dd3e

Please sign in to comment.