Skip to content

Commit

Permalink
add missing poc clinical data binning function
Browse files Browse the repository at this point in the history
  • Loading branch information
onursumer committed May 22, 2024
1 parent 66b058c commit 8704a6a
Show file tree
Hide file tree
Showing 3 changed files with 283 additions and 5 deletions.
133 changes: 133 additions & 0 deletions src/main/java/org/cbioportal/web/columnar/ClinicalDataBinner.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
package org.cbioportal.web.columnar;

import org.cbioportal.model.*;
import org.cbioportal.service.StudyViewColumnarService;
import org.cbioportal.web.columnar.util.NewClinicalDataBinUtil;
import org.cbioportal.web.parameter.*;
import org.cbioportal.web.util.DataBinner;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;

@Component
public class ClinicalDataBinner {
private final StudyViewColumnarService studyViewColumnarService;
private final DataBinner dataBinner;

@Autowired
public ClinicalDataBinner(
StudyViewColumnarService studyViewColumnarService,
DataBinner dataBinner
) {
this.studyViewColumnarService = studyViewColumnarService;
this.dataBinner = dataBinner;
}

public List<ClinicalDataBin> fetchClinicalDataBinCounts(
DataBinMethod dataBinMethod,
ClinicalDataBinCountFilter dataBinCountFilter,
boolean shouldRemoveSelfFromFilter
) {
List<ClinicalDataBinFilter> attributes = dataBinCountFilter.getAttributes();
StudyViewFilter studyViewFilter = dataBinCountFilter.getStudyViewFilter();

if (shouldRemoveSelfFromFilter) {
studyViewFilter = NewClinicalDataBinUtil.removeSelfFromFilter(dataBinCountFilter);
}

List<String> attributeIds = attributes.stream().map(ClinicalDataBinFilter::getAttributeId).collect(Collectors.toList());

// a new StudyView filter to partially filter by study and sample ids only
StudyViewFilter partialFilter = new StudyViewFilter();
partialFilter.setStudyIds(studyViewFilter.getStudyIds());
partialFilter.setSampleIdentifiers(studyViewFilter.getSampleIdentifiers());

// filter only by study id and sample identifiers, ignore rest
// we need this additional partial filter because we always need to know the bins generated for the initial state
// which allows us to keep the number of bins and bin ranges consistent even if there are additional data filters.
// we only want to update the counts for each bin, we don't want to regenerate the bins for the filtered data.
// NOTE: partial filter is only needed when dataBinMethod == DataBinMethod.STATIC but that's always the case
// for the frontend implementation. we can't really use dataBinMethod == DataBinMethod.DYNAMIC because of the
// complication it brings to the frontend visualization and filtering
List<Sample> unfilteredSamples = studyViewColumnarService.getFilteredSamples(partialFilter);
List<Sample> filteredSamples = studyViewColumnarService.getFilteredSamples(studyViewFilter);

// TODO make sure unique sample and patient keys don't need to be distinct
List<String> unfilteredUniqueSampleKeys = unfilteredSamples.stream().map(Sample::getUniqueSampleKey).collect(Collectors.toList());
List<String> filteredUniqueSampleKeys = filteredSamples.stream().map(Sample::getUniqueSampleKey).collect(Collectors.toList());
List<String> unfilteredUniquePatientKeys = unfilteredSamples.stream().map(Sample::getUniquePatientKey).collect(Collectors.toList());
List<String> filteredUniquePatientKeys = filteredSamples.stream().map(Sample::getUniquePatientKey).collect(Collectors.toList());

// TODO make sure we don't need a distinction between sample vs patient attribute ids here
// ideally we shouldn't because we have patient clinical data separated from sample clinical data in clickhouse

// we need the clinical data for the partial filter in order to generate the bins for initial state
// we use the filtered data to calculate the counts for each bin, we do not regenerate bins for the filtered data
List<ClinicalData> unfilteredClinicalDataForSamples = studyViewColumnarService.getSampleClinicalData(partialFilter, attributeIds);
List<ClinicalData> filteredClinicalDataForSamples = studyViewColumnarService.getSampleClinicalData(studyViewFilter, attributeIds);
List<ClinicalData> unfilteredClinicalDataForPatients = studyViewColumnarService.getPatientClinicalData(partialFilter, attributeIds);
List<ClinicalData> filteredClinicalDataForPatients = studyViewColumnarService.getPatientClinicalData(studyViewFilter, attributeIds);

Map<String, ClinicalDataType> attributeDatatypeMap = NewClinicalDataBinUtil.toAttributeDatatypeMap(
unfilteredClinicalDataForSamples.stream().map(ClinicalData::getAttrId).collect(Collectors.toList()),
unfilteredClinicalDataForPatients.stream().map(ClinicalData::getAttrId).collect(Collectors.toList()),
Collections.emptyList() // TODO ignoring conflictingPatientAttributeIds for now
);

List<Binnable> unfilteredClinicalData = Stream.of(
unfilteredClinicalDataForSamples,
unfilteredClinicalDataForPatients
// unfilteredClinicalDataForConflictingPatientAttributes /// TODO ignoring conflictingPatientAttributeIds for now
).flatMap(Collection::stream).collect(Collectors.toList());

List<Binnable> filteredClinicalData = Stream.of(
filteredClinicalDataForSamples,
filteredClinicalDataForPatients
// filteredClinicalDataForConflictingPatientAttributes // TODO ignoring conflictingPatientAttributeIds for now
).flatMap(Collection::stream).collect(Collectors.toList());

Map<String, List<Binnable>> unfilteredClinicalDataByAttributeId =
unfilteredClinicalData.stream().collect(Collectors.groupingBy(Binnable::getAttrId));

Map<String, List<Binnable>> filteredClinicalDataByAttributeId =
filteredClinicalData.stream().collect(Collectors.groupingBy(Binnable::getAttrId));

List<ClinicalDataBin> clinicalDataBins = Collections.emptyList();

if (dataBinMethod == DataBinMethod.STATIC) {
if (!unfilteredSamples.isEmpty() && !unfilteredClinicalData.isEmpty()) {
clinicalDataBins = NewClinicalDataBinUtil.calculateStaticDataBins(
dataBinner,
attributes,
attributeDatatypeMap,
unfilteredClinicalDataByAttributeId,
filteredClinicalDataByAttributeId,
unfilteredUniqueSampleKeys,
unfilteredUniquePatientKeys,
filteredUniqueSampleKeys,
filteredUniquePatientKeys
);
}
}
else { // dataBinMethod == DataBinMethod.DYNAMIC
// TODO we should consider removing dynamic binning support
// we never use dynamic binning in the frontend because number of bins and the bin ranges can change
// each time there is a new filter which makes the frontend implementation complicated
if (!filteredClinicalData.isEmpty()) {
clinicalDataBins = NewClinicalDataBinUtil.calculateDynamicDataBins(
dataBinner,
attributes,
attributeDatatypeMap,
filteredClinicalDataByAttributeId,
filteredUniqueSampleKeys,
filteredUniquePatientKeys
);
}
}

return clinicalDataBins;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import org.cbioportal.web.parameter.ClinicalDataFilter;
import org.cbioportal.web.parameter.DataBinMethod;
import org.cbioportal.web.parameter.StudyViewFilter;
import org.cbioportal.web.util.ClinicalDataBinUtil;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
Expand Down Expand Up @@ -43,13 +42,13 @@ public class StudyViewColumnStoreController {

private final StudyViewColumnarService studyViewColumnarService;
private final StudyViewService studyViewService;
private final ClinicalDataBinUtil clinicalDataBinUtil;
private final ClinicalDataBinner clinicalDataBinner;

@Autowired
public StudyViewColumnStoreController(StudyViewColumnarService studyViewColumnarService, StudyViewService studyViewService, ClinicalDataBinUtil clinicalDataBinUtil) {
public StudyViewColumnStoreController(StudyViewColumnarService studyViewColumnarService, StudyViewService studyViewService, ClinicalDataBinner clinicalDataBinner) {
this.studyViewColumnarService = studyViewColumnarService;
this.studyViewService = studyViewService;
this.clinicalDataBinUtil = clinicalDataBinUtil;
this.clinicalDataBinner = clinicalDataBinner;
}

@PreAuthorize("hasPermission(#involvedCancerStudies, 'Collection<CancerStudyId>', T(org.cbioportal.utils.security.AccessLevel).READ)")
Expand Down Expand Up @@ -112,7 +111,7 @@ public ResponseEntity<List<ClinicalDataBin>> fetchClinicalDataBinCounts(
@RequestAttribute(required = false, value = "involvedCancerStudies") Collection<String> involvedCancerStudies,
@RequestAttribute(required = false, value = "interceptedClinicalDataBinCountFilter") ClinicalDataBinCountFilter interceptedClinicalDataBinCountFilter
) {
List<ClinicalDataBin> clinicalDataBins = clinicalDataBinUtil.fetchClinicalDataBinCounts(
List<ClinicalDataBin> clinicalDataBins = clinicalDataBinner.fetchClinicalDataBinCounts(
dataBinMethod,
interceptedClinicalDataBinCountFilter,
true
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
package org.cbioportal.web.columnar.util;

import org.cbioportal.model.Binnable;
import org.cbioportal.model.ClinicalDataBin;
import org.cbioportal.model.DataBin;
import org.cbioportal.web.parameter.ClinicalDataBinCountFilter;
import org.cbioportal.web.parameter.ClinicalDataBinFilter;
import org.cbioportal.web.parameter.ClinicalDataType;
import org.cbioportal.web.parameter.StudyViewFilter;
import org.cbioportal.web.util.DataBinner;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static java.util.Collections.emptyList;
import static java.util.stream.Collectors.toList;

public class NewClinicalDataBinUtil {
// TODO duplicate of ClinicalDataBinUtil.removeSelfFromFilter
public static StudyViewFilter removeSelfFromFilter(ClinicalDataBinCountFilter dataBinCountFilter) {
List<ClinicalDataBinFilter> attributes = dataBinCountFilter.getAttributes();
StudyViewFilter studyViewFilter = dataBinCountFilter.getStudyViewFilter();

if (attributes.size() == 1) {
NewStudyViewFilterUtil.removeSelfFromFilter(attributes.get(0).getAttributeId(), studyViewFilter);
}

return studyViewFilter;
}

// TODO duplicate of StudyViewFilterUtil.dataBinToClinicalDataBin
public static ClinicalDataBin dataBinToClinicalDataBin(ClinicalDataBinFilter attribute, DataBin dataBin) {
ClinicalDataBin clinicalDataBin = new ClinicalDataBin();
clinicalDataBin.setAttributeId(attribute.getAttributeId());
clinicalDataBin.setCount(dataBin.getCount());
if (dataBin.getEnd() != null) {
clinicalDataBin.setEnd(dataBin.getEnd());
}
if (dataBin.getSpecialValue() != null) {
clinicalDataBin.setSpecialValue(dataBin.getSpecialValue());
}
if (dataBin.getStart() != null) {
clinicalDataBin.setStart(dataBin.getStart());
}
return clinicalDataBin;
}

// TODO duplicate of ClinicalDataBinUtil.toAttributeDatatypeMap
public static Map<String, ClinicalDataType> toAttributeDatatypeMap(
List<String> sampleAttributeIds,
List<String> patientAttributeIds,
List<String> conflictingPatientAttributeIds
) {
Map<String, ClinicalDataType> attributeDatatypeMap = new HashMap<>();

sampleAttributeIds.forEach(attribute -> {
attributeDatatypeMap.put(attribute, ClinicalDataType.SAMPLE);
});
patientAttributeIds.forEach(attribute -> {
attributeDatatypeMap.put(attribute, ClinicalDataType.PATIENT);
});
conflictingPatientAttributeIds.forEach(attribute -> {
attributeDatatypeMap.put(attribute, ClinicalDataType.SAMPLE);
});

return attributeDatatypeMap;
}

// TODO duplicate of ClinicalDataBinUtil.calculateStaticDataBins
public static List<ClinicalDataBin> calculateStaticDataBins(
DataBinner dataBinner,
List<ClinicalDataBinFilter> attributes,
Map<String, ClinicalDataType> attributeDatatypeMap,
Map<String, List<Binnable>> unfilteredClinicalDataByAttributeId,
Map<String, List<Binnable>> filteredClinicalDataByAttributeId,
List<String> unfilteredUniqueSampleKeys,
List<String> unfilteredUniquePatientKeys,
List<String> filteredUniqueSampleKeys,
List<String> filteredUniquePatientKeys
) {
List<ClinicalDataBin> clinicalDataBins = new ArrayList<>();

for (ClinicalDataBinFilter attribute : attributes) {
if (attributeDatatypeMap.containsKey(attribute.getAttributeId())) {
ClinicalDataType clinicalDataType = attributeDatatypeMap.get(attribute.getAttributeId());
List<String> filteredIds = clinicalDataType == ClinicalDataType.PATIENT ? filteredUniquePatientKeys
: filteredUniqueSampleKeys;
List<String> unfilteredIds = clinicalDataType == ClinicalDataType.PATIENT
? unfilteredUniquePatientKeys
: unfilteredUniqueSampleKeys;

List<ClinicalDataBin> dataBins = dataBinner
.calculateClinicalDataBins(attribute, clinicalDataType,
filteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(),
emptyList()),
unfilteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(),
emptyList()),
filteredIds, unfilteredIds)
.stream()
.map(dataBin -> dataBinToClinicalDataBin(attribute, dataBin))
.collect(toList());

clinicalDataBins.addAll(dataBins);
}
}

return clinicalDataBins;
}

// TODO duplicate of ClinicalDataBinUtil.calculateDynamicDataBins
public static List<ClinicalDataBin> calculateDynamicDataBins(
DataBinner dataBinner,
List<ClinicalDataBinFilter> attributes,
Map<String, ClinicalDataType> attributeDatatypeMap,
Map<String, List<Binnable>> filteredClinicalDataByAttributeId,
List<String> filteredUniqueSampleKeys,
List<String> filteredUniquePatientKeys
) {
List<ClinicalDataBin> clinicalDataBins = new ArrayList<>();

for (ClinicalDataBinFilter attribute : attributes) {

// if there is clinical data for requested attribute
if (attributeDatatypeMap.containsKey(attribute.getAttributeId())) {
ClinicalDataType clinicalDataType = attributeDatatypeMap.get(attribute.getAttributeId());
List<String> filteredIds = clinicalDataType == ClinicalDataType.PATIENT
? filteredUniquePatientKeys
: filteredUniqueSampleKeys;

List<ClinicalDataBin> dataBins = dataBinner
.calculateDataBins(attribute, clinicalDataType,
filteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(),
emptyList()),
filteredIds)
.stream()
.map(dataBin -> dataBinToClinicalDataBin(attribute, dataBin))
.collect(toList());
clinicalDataBins.addAll(dataBins);
}
}

return clinicalDataBins;
}
}

0 comments on commit 8704a6a

Please sign in to comment.