From 179186ec86d989bad925e21f744c5c0e1d42dfb3 Mon Sep 17 00:00:00 2001 From: Onur Sumer Date: Tue, 1 Oct 2024 18:13:37 -0400 Subject: [PATCH] Normalize data counts in a generic case-insensitive way --- .../impl/StudyViewColumnarServiceImpl.java | 43 +++++++++++++++++-- .../StudyViewFilterMapper.xml | 4 +- .../mybatisclickhouse/StudyViewMapper.xml | 16 ------- .../StudyViewMapperClinicalDataCountTest.java | 10 +++-- 4 files changed, 49 insertions(+), 24 deletions(-) diff --git a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java index ed8a341edda..4dec4b0dc1f 100644 --- a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java @@ -172,15 +172,52 @@ private StudyViewFilterContext createContext(StudyViewFilter studyViewFilter) { } private List generateDataCountItemsFromDataCounts(List dataCounts) { - return dataCounts.stream().collect(Collectors.groupingBy(ClinicalDataCount::getAttributeId)) + return dataCounts.stream().collect(Collectors.groupingBy(ClinicalDataCount::getAttributeId)) .entrySet().parallelStream().map(e -> { ClinicalDataCountItem item = new ClinicalDataCountItem(); item.setAttributeId(e.getKey()); - item.setCounts(e.getValue()); + item.setCounts(normalizeDataCounts(e.getValue())); return item; }).toList(); } - + + private List normalizeDataCounts(List dataCounts) { + // Normalize data counts ignoring the attribute value case + // For example attribute values "TRUE", "True", and 'true' will be merged into a single aggregated count + return dataCounts + .stream() + .collect( + Collectors.groupingBy( + c -> c.getValue().toLowerCase(), + Collectors.reducing(new ClinicalDataCount(), (count1, count2) -> { + String attributeId = + count1.getAttributeId() != null + ? count1.getAttributeId() + : count2.getAttributeId(); + String value = count1.getValue() != null + ? count1.getValue() + : count2.getValue(); + if (count1.getValue() != null && count2.getValue() != null) { + value = count1.getValue().compareTo(count2.getValue()) > 0 + ? count1.getValue() + : count2.getValue(); + } + Integer count = (count1.getCount() != null ? count1.getCount(): 0) + + (count2.getCount() != null ? count2.getCount(): 0); + + ClinicalDataCount reduced = new ClinicalDataCount(); + reduced.setAttributeId(attributeId); + reduced.setValue(value); + reduced.setCount(count); + return reduced; + }) + ) + ) + .values() + .stream() + .toList(); + } + public static List calculateMissingNaCountsForClinicalDataCountItems( List clinicalDataCountItems, List filteredAttributes, diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml index 0e47b2fe499..c9d20cb4543 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml @@ -380,7 +380,7 @@ - ) = '${dataFilterValue.value}' + ) ILIKE '${dataFilterValue.value}' @@ -505,7 +505,7 @@ - ) = '${dataFilterValue.value}' + ) ILIKE '${dataFilterValue.value}' diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index e500707c8b5..fd3faaffe6a 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -695,28 +695,12 @@ OR upperUTF8(${attribute_value})='N/A' - - - upperUTF8(${attribute_value})='TRUE' - - - upperUTF8(${attribute_value})='FALSE' - - multiIf( , 'NA', - - - , - 'True', - - - , - 'False', ${attribute_value} ) diff --git a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java index 8013aa5f6a2..26a5466ace7 100644 --- a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java +++ b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java @@ -89,9 +89,13 @@ public void getDeadCounts() { Collections.emptyList() ); - assertEquals(6, categoricalClinicalDataCounts.size()); - assertEquals(3, findClinicaDataCount(categoricalClinicalDataCounts, "True")); - assertEquals(4, findClinicaDataCount(categoricalClinicalDataCounts, "False")); + assertEquals(10, categoricalClinicalDataCounts.size()); + assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "True")); + assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "TRUE")); + assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "true")); + assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "False")); + assertEquals(2, findClinicaDataCount(categoricalClinicalDataCounts, "FALSE")); + assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "false")); assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "Not Released")); assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "Not Collected")); assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "Unknown"));