Skip to content

Commit

Permalink
new stats calculator
Browse files Browse the repository at this point in the history
  • Loading branch information
nitin-ebi committed Aug 2, 2024
1 parent 36f5f07 commit 4ce5475
Show file tree
Hide file tree
Showing 15 changed files with 655 additions and 1 deletion.
2 changes: 2 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@
<filtering>true</filtering>
<includes>
<include>test-mongo.properties</include>
<include>test-stats.properties</include>
<include>opencga/conf/storage-mongodb.properties</include>
</includes>
</testResource>
Expand All @@ -208,6 +209,7 @@
<filtering>false</filtering>
<excludes>
<exclude>test-mongo.properties</exclude>
<exclude>test-stats.properties</exclude>
<exclude>opencga/conf/storage-mongodb.properties</exclude>
</excludes>
</testResource>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ private Genotype normalizeGenotypeAlleles(Genotype g) {
}
}

void setGenotypesCount(Map<Genotype, Integer> genotypesCount) {
public void setGenotypesCount(Map<Genotype, Integer> genotypesCount) {
this.genotypesCount = genotypesCount;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,4 +266,8 @@ public Set<VariantStatsMongo> getVariantStatsMongo() {
public Set<VariantAnnotation> getAnnotations() {
return annotations;
}

public void setStats(Set<VariantStatsMongo> variantStats) {
this.variantStatsMongo = variantStats;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,16 @@ private BasicDBObject buildAttributes(Map<String, String> attributes) {
return attrs;
}

public BasicDBObject getSampleData() {
return samp;
}

public String getStudyId() {
return studyId;
}

public String getFileId() {
return fileId;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,20 @@ public class BeanNames {
public static final String VARIANT_ANNOTATION_READER = "variant-annotation-reader";
public static final String VARIANT_READER = "variant-reader";
public static final String ACCESSION_REPORT_READER = "accession-report-reader";
public static final String STATS_VARIANTS_READER = "stats-variants-reader";

public static final String VEP_ANNOTATION_PROCESSOR = "vep-annotation-processor";
public static final String ANNOTATION_PARSER_PROCESSOR = "annotation-parser-processor";
public static final String ANNOTATION_COMPOSITE_PROCESSOR = "annotation-composite-processor";
public static final String STATS_VARIANTS_PROCESSOR = "stats-variants-processor";

public static final String GENE_WRITER = "gene-writer";
public static final String ANNOTATION_WRITER = "annotation-writer";
public static final String ANNOTATION_IN_VARIANT_WRITER = "annotation-in-variant-writer";
public static final String COMPOSITE_ANNOTATION_VARIANT_WRITER = "composite-annotation-variant-writer";
public static final String VARIANT_WRITER = "variant-writer";
public static final String ACCESSION_IMPORTER = "accession-importer";
public static final String STATS_VARIANTS_WRITER = "stats-variants-writer";

public static final String ANNOTATION_SKIP_STEP_DECIDER = "annotation-skip-step-decider";
public static final String STATISTICS_SKIP_STEP_DECIDER = "statistics-skip-step-decider";
Expand All @@ -60,6 +63,7 @@ public class BeanNames {
public static final String DROP_FILES_BY_STUDY_STEP = "drop-files-by-study-step";
public static final String LOAD_ANNOTATION_METADATA_STEP = "annotation-metadata-step";
public static final String ACCESSION_IMPORT_STEP = "accession-import-step";
public static final String CALCULATE_STATISTICS_STEP_NEW = "calculate-statistics-step-new";

public static final String AGGREGATED_VCF_JOB = "aggregated-vcf-job";
public static final String ANNOTATE_VARIANTS_JOB = "annotate-variants-job";
Expand All @@ -69,4 +73,5 @@ public class BeanNames {
public static final String CALCULATE_STATISTICS_JOB = "calculate-statistics-job";
public static final String DROP_STUDY_JOB = "drop-study-job";
public static final String ACCESSION_IMPORT_JOB = "accession-import-job";
public static final String CALCULATE_STATISTICS_JOB_NEW = "calculate-statistics-job-new";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright 2024 EMBL - European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.ac.ebi.eva.pipeline.configuration.io.readers;

import org.springframework.batch.core.configuration.annotation.StepScope;
import org.springframework.batch.item.ItemStreamReader;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.mongodb.core.MongoTemplate;
import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument;
import uk.ac.ebi.eva.pipeline.io.readers.StatsVariantReader;
import uk.ac.ebi.eva.pipeline.parameters.ChunkSizeParameters;
import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters;
import uk.ac.ebi.eva.pipeline.parameters.InputParameters;

import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.STATS_VARIANTS_READER;

@Configuration
public class StatsVariantsReaderConfiguration {

@Bean(STATS_VARIANTS_READER)
@StepScope
public ItemStreamReader<VariantDocument> statsVariantsReader(DatabaseParameters databaseParameters,
MongoTemplate mongoTemplate,
InputParameters inputParameters,
ChunkSizeParameters chunkSizeParameters) {

return new StatsVariantReader(databaseParameters, mongoTemplate, inputParameters.getStudyId(), chunkSizeParameters.getChunkSize());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright 2024 EMBL - European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.ac.ebi.eva.pipeline.configuration.io.writers;

import org.springframework.batch.core.configuration.annotation.StepScope;
import org.springframework.batch.item.ItemWriter;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.mongodb.core.MongoTemplate;
import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument;
import uk.ac.ebi.eva.pipeline.io.writers.StatsVariantWriter;
import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters;

import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.STATS_VARIANTS_WRITER;

@Configuration
public class StatsVariantsWriterConfiguration {

@Bean(STATS_VARIANTS_WRITER)
@StepScope
public ItemWriter<VariantDocument> statsVariantsWriter(DatabaseParameters databaseParameters, MongoTemplate mongoTemplate) {
return new StatsVariantWriter(databaseParameters, mongoTemplate);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright 2024 EMBL - European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.ac.ebi.eva.pipeline.configuration.jobs;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Scope;
import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.CalculateStatisticsStepConfigurationNew;
import uk.ac.ebi.eva.pipeline.parameters.NewJobIncrementer;

import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_STATISTICS_JOB_NEW;
import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_STATISTICS_STEP_NEW;

/**
* Configuration to run a full Statistics job: variantStatsFlow: statsCreate --> statsLoad
* <p>
* TODO add a new PopulationStatisticsJobParametersValidator
*/
@Configuration
@EnableBatchProcessing
@Import({CalculateStatisticsStepConfigurationNew.class})
public class PopulationStatisticsJobConfigurationNew {

private static final Logger logger = LoggerFactory.getLogger(PopulationStatisticsJobConfigurationNew.class);

@Autowired
@Qualifier(CALCULATE_STATISTICS_STEP_NEW)
private Step calculateStatisticsStepNew;

@Bean(CALCULATE_STATISTICS_JOB_NEW)
@Scope("prototype")
public Job calculateStatisticsJob(JobBuilderFactory jobBuilderFactory) {
logger.debug("Building '" + CALCULATE_STATISTICS_JOB_NEW + "'");

return jobBuilderFactory
.get(CALCULATE_STATISTICS_JOB_NEW)
.incrementer(new NewJobIncrementer())
.start(calculateStatisticsStepNew)
.build();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright 2024 EMBL - European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.ac.ebi.eva.pipeline.configuration.jobs.steps;

import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.step.tasklet.TaskletStep;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.batch.item.ItemStreamReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.repeat.policy.SimpleCompletionPolicy;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument;
import uk.ac.ebi.eva.pipeline.configuration.ChunkSizeCompletionPolicyConfiguration;
import uk.ac.ebi.eva.pipeline.configuration.io.readers.StatsVariantsReaderConfiguration;
import uk.ac.ebi.eva.pipeline.configuration.io.writers.StatsVariantsWriterConfiguration;
import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.processors.StatsVariantsProcessorConfiguration;

import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_STATISTICS_STEP_NEW;
import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.STATS_VARIANTS_PROCESSOR;
import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.STATS_VARIANTS_READER;
import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.STATS_VARIANTS_WRITER;


@Configuration
@EnableBatchProcessing
@Import({StatsVariantsReaderConfiguration.class, StatsVariantsWriterConfiguration.class,
StatsVariantsProcessorConfiguration.class, ChunkSizeCompletionPolicyConfiguration.class})
public class CalculateStatisticsStepConfigurationNew {

@Bean(CALCULATE_STATISTICS_STEP_NEW)
public Step calculateStatisticsStep(
@Qualifier(STATS_VARIANTS_READER) ItemStreamReader<VariantDocument> variantReader,
@Qualifier(STATS_VARIANTS_PROCESSOR) ItemProcessor<VariantDocument, VariantDocument> variantProcessor,
@Qualifier(STATS_VARIANTS_WRITER) ItemWriter<VariantDocument> variantWriter,
StepBuilderFactory stepBuilderFactory,
SimpleCompletionPolicy chunkSizeCompletionPolicy) {
TaskletStep step = stepBuilderFactory.get(CALCULATE_STATISTICS_STEP_NEW)
.<VariantDocument, VariantDocument>chunk(chunkSizeCompletionPolicy)
.reader(variantReader)
.processor(variantProcessor)
.writer(variantWriter)
.build();
return step;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright 2024 EMBL - European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.ac.ebi.eva.pipeline.configuration.jobs.steps.processors;

import org.springframework.batch.core.configuration.annotation.StepScope;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument;
import uk.ac.ebi.eva.pipeline.io.processors.StatsVariantProcessor;

import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.STATS_VARIANTS_PROCESSOR;

@Configuration
public class StatsVariantsProcessorConfiguration {

@Bean(STATS_VARIANTS_PROCESSOR)
@StepScope
public ItemProcessor<VariantDocument, VariantDocument> statsVariantsReader() {
return new StatsVariantProcessor();
}
}
Loading

0 comments on commit 4ce5475

Please sign in to comment.