Skip to content

Commit

Permalink
normalize clinical data
Browse files Browse the repository at this point in the history
  • Loading branch information
onursumer committed Aug 20, 2024
1 parent 776fbae commit 0b3fb79
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -262,16 +262,20 @@
</if>
</trim>
</foreach>

</sql>

<sql id="categoricalClinicalDataCountFilter">
SELECT ${unique_id}
FROM ${table_name}
WHERE attribute_name = '${clinicalDataFilter.attributeId}' AND
type='${type}'
<foreach item="dataFilterValue" collection="clinicalDataFilter.values" open=" AND ((" separator=") OR (" close="))">
<trim prefix="" prefixOverrides="AND">
AND attribute_value = '${dataFilterValue.value}'
AND (
<include refid="normalizeAttributeValue">
<property name="attribute_value" value="attribute_value"/>
</include>
) = '${dataFilterValue.value}'
</trim>
</foreach>
</sql>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,9 @@
<sql id="getClinicalDataCountsQuerySample">
SELECT
attribute_name as attributeId,
<include refid="normalizeAttributeValueNA">
<include refid="normalizeAttributeValue">
<property name="attribute_value" value="attribute_value"/>
<property name="as_value" value="value"/>
</include>,
</include> as value,
count(value) as count
FROM clinical_data_derived
<where>
Expand All @@ -197,10 +196,9 @@
<sql id="getClinicalDataCountsQueryPatient">
SELECT
attribute_name as attributeId,
<include refid="normalizeAttributeValueNA">
<include refid="normalizeAttributeValue">
<property name="attribute_value" value="attribute_value"/>
<property name="as_value" value="value"/>
</include>,
</include> as value,
count(value) as count
FROM clinical_data_derived
<where>
Expand Down Expand Up @@ -627,15 +625,30 @@
OR upperUTF8(${attribute_value})='NAN'
OR upperUTF8(${attribute_value})='N/A'
</sql>

<!-- This is to match boolean values ignoring the case -->
<sql id="isAttributeValueTrue">
upperUTF8(${attribute_value})='TRUE'
</sql>
<sql id="isAttributeValueFalse">
upperUTF8(${attribute_value})='FALSE'
</sql>

<sql id="normalizeAttributeValueNA">
if(
<include refid="isAttributeValueNA">
<sql id="normalizeAttributeValue">
multiIf(
<include refid="isAttributeValueNA">
<property name="attribute_value" value="${attribute_value}"/>
</include>,
'NA',
${attribute_value}
</include>,
'NA',
<include refid="isAttributeValueTrue">
<property name="attribute_value" value="${attribute_value}"/>
</include>,
'True',
<include refid="isAttributeValueFalse">
<property name="attribute_value" value="${attribute_value}"/>
</include>,
'False',
${attribute_value}
)
AS ${as_value}
</sql>
</mapper>
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,29 @@ public void getCenterCounts() {
assertEquals(13, findClinicaDataCount(categoricalClinicalDataCounts, "NA"));
}

@Test
public void getDeadCounts() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(STUDY_GENIE_PUB));

var categoricalClinicalDataCounts = studyViewMapper.getClinicalDataCounts(
studyViewFilter,
CategorizedClinicalDataCountFilter.getBuilder().build(),
false,
List.of("dead"),
Collections.emptyList()
);

assertEquals(6, categoricalClinicalDataCounts.size());
assertEquals(3, findClinicaDataCount(categoricalClinicalDataCounts, "True"));
assertEquals(4, findClinicaDataCount(categoricalClinicalDataCounts, "False"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "Not Released"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "Not Collected"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "Unknown"));
// 1 empty string + 1 'N/A' + 11 samples with no data
assertEquals(13, findClinicaDataCount(categoricalClinicalDataCounts, "NA"));
}

@Test
public void getMutationAndCenterCounts() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
Expand Down
13 changes: 13 additions & 0 deletions src/test/resources/clickhouse_data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,18 @@ insert into clinical_patient (internal_id,attr_id,attr_value) values (309,'cente
insert into clinical_patient (internal_id,attr_id,attr_value) values (310,'center','ucsf');
insert into clinical_patient (internal_id,attr_id,attr_value) values (311,'center','NA');
insert into clinical_patient (internal_id,attr_id,attr_value) values (312,'center','');
insert into clinical_patient (internal_id,attr_id,attr_value) values (301,'dead','True');
insert into clinical_patient (internal_id,attr_id,attr_value) values (302,'dead','false');
insert into clinical_patient (internal_id,attr_id,attr_value) values (303,'dead','TRUE');
insert into clinical_patient (internal_id,attr_id,attr_value) values (304,'dead','False');
insert into clinical_patient (internal_id,attr_id,attr_value) values (305,'dead','FALSE');
insert into clinical_patient (internal_id,attr_id,attr_value) values (306,'dead','true');
insert into clinical_patient (internal_id,attr_id,attr_value) values (307,'dead','Not Released');
insert into clinical_patient (internal_id,attr_id,attr_value) values (308,'dead','Not Collected');
insert into clinical_patient (internal_id,attr_id,attr_value) values (309,'dead','FALSE');
insert into clinical_patient (internal_id,attr_id,attr_value) values (310,'dead','Unknown');
insert into clinical_patient (internal_id,attr_id,attr_value) values (311,'dead','N/A');
insert into clinical_patient (internal_id,attr_id,attr_value) values (312,'dead','');
insert into clinical_patient (internal_id,attr_id,attr_value) values (301,'age','<18');
insert into clinical_patient (internal_id,attr_id,attr_value) values (302,'age','<18');
insert into clinical_patient (internal_id,attr_id,attr_value) values (303,'age','<18');
Expand Down Expand Up @@ -474,6 +486,7 @@ insert into clinical_attribute_meta (attr_id,display_name,description,datatype,p
insert into clinical_attribute_meta (attr_id,display_name,description,datatype,patient_attribute,priority,cancer_study_id) values ('mutation_count','mutaiton count','mutation count','number',0,'30',3);
insert into clinical_attribute_meta (attr_id,display_name,description,datatype,patient_attribute,priority,cancer_study_id) values ('age','age at metastatic diagnosis (years)','age at metastatic diagnosis (years)','number',1,'3',3);
insert into clinical_attribute_meta (attr_id,display_name,description,datatype,patient_attribute,priority,cancer_study_id) values ('center','center','center of sequencing','string',1,'1',3);
insert into clinical_attribute_meta (attr_id,display_name,description,datatype,patient_attribute,priority,cancer_study_id) values ('dead','vital status','is this patient known to be deceased','string',1,'1',3);

-- add genes, genetic entities and structural variants for structural_variant
insert into genetic_entity (id,entity_type) values(21,'gene');
Expand Down

0 comments on commit 0b3fb79

Please sign in to comment.