From f456bd5c0bbfd1e066fb5d8839ee8159ab6caba1 Mon Sep 17 00:00:00 2001 From: Onur Sumer Date: Tue, 27 Aug 2024 15:20:59 -0400 Subject: [PATCH] improve numerical clinical data filter (#10951) --- .../StudyViewFilterMapper.xml | 31 ++++++++++--- .../mybatisclickhouse/StudyViewMapper.xml | 23 ++++++++++ .../StudyViewMapperClinicalDataCountTest.java | 45 +++++++++++++++---- 3 files changed, 86 insertions(+), 13 deletions(-) diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml index 4893f84608e..c0b2909cd53 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml @@ -283,18 +283,39 @@ - - AND match(attribute_value, '^[\d\.]+$') + + AND match(attribute_value, '^>?=?[-+]?[0-9]*[.,]?[0-9]+$') + + + AND match(attribute_value, '^<?=?[-+]?[0-9]*[.,]?[0-9]+$') + + + AND match(attribute_value, '^[-+]?[0-9]*[.,]?[0-9]+$') + + - AND abs(minus(cast(attribute_value as float), ${dataFilterValue.start})) < exp(-11) + AND abs( + minus( + + + , + ${dataFilterValue.start} + ) + ) < exp(-11) - AND cast(attribute_value as float) > ${dataFilterValue.start} + AND + + + > ${dataFilterValue.start} - AND cast(attribute_value as float) <= ${dataFilterValue.end} + AND + + + <= ${dataFilterValue.end} diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index d560addfa16..e94d466f6f0 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -618,6 +618,29 @@ + + + multiIf( + (startsWith(${attribute_value}, '<=') OR startsWith(${attribute_value}, '>=')), + cast(substr(${attribute_value}, 3) as float), + startsWith(${attribute_value}, '<'), + cast(substr(${attribute_value}, 2) as float) - exp(-10), + startsWith(${attribute_value}, '>'), + cast(substr(${attribute_value}, 2) as float) + exp(-10), + cast(${attribute_value} as float) + ) + + ${attribute_value}='' diff --git a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java index 3ab8d2dccce..90bc9b864d8 100644 --- a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java +++ b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperClinicalDataCountTest.java @@ -205,12 +205,12 @@ public void getMutationCountsFilteredByAge() { } @Test - public void getMutationCountsFilteredByAgeWithSpecialValues() { + public void getMutationCountsFilteredByAgeWithOpenStartValues() { StudyViewFilter studyViewFilter = new StudyViewFilter(); studyViewFilter.setStudyIds(List.of(STUDY_GENIE_PUB)); // filter patients with age less than 20 - // (there are 4 patients within this range, which are 301,302,303, and 306) + // (there are 4 patients within this range, which are 301, 302, 303, and 306) ClinicalDataFilter filter = buildClinicalDataFilter("age", null, 20); studyViewFilter.setClinicalDataFilters(List.of(filter)); @@ -227,17 +227,46 @@ public void getMutationCountsFilteredByAgeWithSpecialValues() { Collections.emptyList() ); - // TODO commented out tests below are failing due to a known issue - // (https://github.com/cBioPortal/rfc80-team/issues/32) - // assertEquals(4, mutationCountsFiltered.size()); - // assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "11")); // patient 301 - // assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "6")); // patient 302 - // assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "4")); // patient 303 + assertEquals(4, mutationCountsFiltered.size()); + assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "11")); // patient 301 + assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "6")); // patient 302 + assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "4")); // patient 303 assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "2")); // patient 306 // no patients/samples with NA assertEquals(0, findClinicaDataCount(mutationCountsFiltered, "NA")); } + + @Test + public void getMutationCountsFilteredByAgeWithOpenEndValues() { + StudyViewFilter studyViewFilter = new StudyViewFilter(); + studyViewFilter.setStudyIds(List.of(STUDY_GENIE_PUB)); + + // filter patients with age greater than 80 + // (there are 4 patients within this range, which are 317, 318, 319, 304, and 305) + ClinicalDataFilter filter = buildClinicalDataFilter("age", 80, null); + studyViewFilter.setClinicalDataFilters(List.of(filter)); + + Map> clinicalAttributeDataSourceListMap = new HashMap<>(); + var clinicalAttr = new ClinicalAttribute(); + clinicalAttr.setAttrId("age"); + clinicalAttr.setPatientAttribute(true); + clinicalAttr.setDatatype("NUMBER"); + clinicalAttributeDataSourceListMap.put(ClinicalAttributeDataSource.PATIENT, List.of(clinicalAttr)); + clinicalAttributeDataSourceListMap.put(ClinicalAttributeDataSource.SAMPLE, List.of()); + var mutationCountsFiltered = studyViewMapper.getClinicalDataCounts( + StudyViewFilterHelper.build(studyViewFilter, clinicalAttributeDataSourceListMap, null), + List.of("mutation_count"), + Collections.emptyList() + ); + + assertEquals(3, mutationCountsFiltered.size()); + assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "4")); // patient 304 + assertEquals(1, findClinicaDataCount(mutationCountsFiltered, "2")); // patient 305 + + // patients/samples with NA data: 317, 318, and 319 + assertEquals(3, findClinicaDataCount(mutationCountsFiltered, "NA")); + } private ClinicalDataFilter buildClinicalDataFilter(String attributeId, Integer start, Integer end) { DataFilterValue value = new DataFilterValue();