Skip to content

Commit

Permalink
improve numerical clinical data filter
Browse files Browse the repository at this point in the history
  • Loading branch information
onursumer committed Aug 20, 2024
1 parent 776fbae commit 24253fb
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -244,18 +244,39 @@
<property name="attribute_value" value="attribute_value"/>
</include>
</if>
<if test="dataFilterValue.start != null || dataFilterValue.end != null">
AND match(attribute_value, '^[\d\.]+$')
<if test="dataFilterValue.start != null and dataFilterValue.end == null">
AND match(attribute_value, '^>?=?[-+]?[0-9]*[.,]?[0-9]+$')
</if>
<if test="dataFilterValue.start == null and dataFilterValue.end != null">
AND match(attribute_value, '^&lt;?=?[-+]?[0-9]*[.,]?[0-9]+$')
</if>
<if test="dataFilterValue.start != null and dataFilterValue.end != null">
AND match(attribute_value, '^[-+]?[0-9]*[.,]?[0-9]+$')
</if>
<if test="dataFilterValue.start != null or dataFilterValue.end != null">
<choose>
<when test="dataFilterValue.start == dataFilterValue.end">
AND abs(minus(cast(attribute_value as float), ${dataFilterValue.start})) &lt; exp(-11)
AND abs(
minus(
<include refid="castStringValueToFloat">
<property name="attribute_value" value="attribute_value"/>
</include>,
${dataFilterValue.start}
)
) &lt; exp(-11)
</when>
<otherwise>
<if test="dataFilterValue.start != null">
AND cast(attribute_value as float) &gt; ${dataFilterValue.start}
AND
<include refid="castStringValueToFloat">
<property name="attribute_value" value="attribute_value"/>
</include> &gt; ${dataFilterValue.start}
</if>
<if test="dataFilterValue.end != null">
AND cast(attribute_value as float) &lt;= ${dataFilterValue.end}
AND
<include refid="castStringValueToFloat">
<property name="attribute_value" value="attribute_value"/>
</include> &lt;= ${dataFilterValue.end}
</if>
</otherwise>
</choose>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,29 @@
</where>
</sql>

<!--
Convert string values to a single numerical value for filtering purposes only.
Not designed as a general purpose decimal number parser.
Examples conversions:
'6' to 6
'>=6' to 6
'<=6' to 6
'>6' to 6.00..1...
'<6' to 5.99..9...
-->
<sql id="castStringValueToFloat">
multiIf(
(startsWith(${attribute_value}, '&lt;=') OR startsWith(${attribute_value}, '>=')),
cast(substr(${attribute_value}, 3) as float),
startsWith(${attribute_value}, '&lt;'),
cast(substr(${attribute_value}, 2) as float) - exp(-10),
startsWith(${attribute_value}, '>'),
cast(substr(${attribute_value}, 2) as float) + exp(-10),
cast(${attribute_value} as float)
)
</sql>

<!-- This is to match actual NA values ('NA', 'NAN', and 'N/A') in addition to the empty string -->
<sql id="isAttributeValueNA">
${attribute_value}=''
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,12 @@ public void getMutationCountsFilteredByAge() {
}

@Test
public void getMutationCountsFilteredByAgeWithSpecialValues() {
public void getMutationCountsFilteredByAgeWithOpenStartValues() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(STUDY_GENIE_PUB));

// filter patients with age less than 20
// (there are 4 patients within this range, which are 301,302,303, and 306)
// (there are 4 patients within this range, which are 301, 302, 303, and 306)
ClinicalDataFilter filter = buildClinicalDataFilter("age", null, 20);
studyViewFilter.setClinicalDataFilters(List.of(filter));

Expand All @@ -207,17 +207,44 @@ public void getMutationCountsFilteredByAgeWithSpecialValues() {
Collections.emptyList()
);

// TODO commented out tests below are failing due to a known issue
// (https://github.com/cBioPortal/rfc80-team/issues/32)
// assertEquals(4, mutationCountsFiltered.size());
// assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "11")); // patient 301
// assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "6")); // patient 302
// assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "4")); // patient 303
assertEquals(4, mutationCountsFiltered.size());
assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "11")); // patient 301
assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "6")); // patient 302
assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "4")); // patient 303
assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "2")); // patient 306

// no patients/samples with NA
assertEquals(0, findClinicaDataCount(mutationCountsFiltered, "NA"));
}

@Test
public void getMutationCountsFilteredByAgeWithOpenEndValues() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(STUDY_GENIE_PUB));

// filter patients with age greater than 80
// (there are 4 patients within this range, which are 317, 318, 319, 304, and 305)
ClinicalDataFilter filter = buildClinicalDataFilter("age", 80, null);
studyViewFilter.setClinicalDataFilters(List.of(filter));

var mutationCountsFiltered = studyViewMapper.getClinicalDataCounts(
studyViewFilter,
CategorizedClinicalDataCountFilter
.getBuilder()
.setPatientNumericalClinicalDataFilters(List.of(filter))
.build(),
true,
List.of("mutation_count"),
Collections.emptyList()
);

assertEquals(3, mutationCountsFiltered.size());
assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "4")); // patient 304
assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "2")); // patient 305

// patients/samples with NA data: 317, 318, and 319
assertEquals(3, findClinicaDataCount(mutationCountsFiltered, "NA"));
}

private ClinicalDataFilter buildClinicalDataFilter(String attributeId, Integer start, Integer end) {
DataFilterValue value = new DataFilterValue();
Expand Down

0 comments on commit 24253fb

Please sign in to comment.