Skip to content

Commit

Permalink
Write failed and successful annotations to additional files
Browse files Browse the repository at this point in the history
  • Loading branch information
ozguzMete committed Sep 21, 2022
1 parent fe506e6 commit 327e277
Show file tree
Hide file tree
Showing 14 changed files with 436 additions and 52 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,24 @@ public class AnnotationPipeline {
private static final Logger LOG = LoggerFactory.getLogger(AnnotationPipeline.class);

private static void annotateJob(String[] args, String filename, String outputFilename, String outputFormat, String isoformOverride,
String errorReportLocation, boolean replace, String postIntervalSize) throws Exception {
String errorReportLocation, boolean replace, String postIntervalSize, boolean splitOutput) throws Exception {
SpringApplication app = new SpringApplication(AnnotationPipeline.class);
app.setWebApplicationType(WebApplicationType.NONE);
app.setAllowBeanDefinitionOverriding(Boolean.TRUE);
ConfigurableApplicationContext ctx = app.run(args);
JobLauncher jobLauncher = ctx.getBean(JobLauncher.class);

String failedOutputFilename = "";
String successfulOutputFilename = "";
if(splitOutput) {
failedOutputFilename = outputFilename + ".FAILED";
successfulOutputFilename = outputFilename + ".SUCCESS";
}
Job annotationJob = ctx.getBean(BatchConfiguration.ANNOTATION_JOB, Job.class);
JobParameters jobParameters = new JobParametersBuilder()
.addString("filename", filename)
.addString("outputFilename", outputFilename)
.addString("failedOutputFilename", failedOutputFilename)
.addString("successfulOutputFilename", successfulOutputFilename)
.addString("outputFormat", outputFormat)
.addString("replace", String.valueOf(replace))
.addString("isoformOverride", isoformOverride)
Expand Down Expand Up @@ -217,7 +224,7 @@ private static void annotate(Subcommand subcommand, String[] args) throws Annota
try {
annotateJob(args, subcommand.getOptionValue("filename"), subcommand.getOptionValue("output-filename"), outputFormat, subcommand.getOptionValue("isoform-override"),
subcommand.getOptionValue("error-report-location", ""),
subcommand.hasOption("replace-symbol-entrez"), subcommand.getOptionValue("post-interval-size", "100"));
subcommand.hasOption("replace-symbol-entrez"), subcommand.getOptionValue("post-interval-size", "100"), subcommand.hasOption("split-output"));
// When you change the default value of post-interval-size, do not forget to update MutationRecordReader.postIntervalSize accordingly
} catch (Exception e) {
throw new AnnotationFailedException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ private static Options getOptions() {
.addOption("i", "isoform-override", true, "Isoform Overrides (mskcc or uniprot)")
.addOption("e", "error-report-location", true, "Error report filename (including path)")
.addOption("r", "replace-symbol-entrez", false, "Replace gene symbols and entrez id with what is provided by annotator")
.addOption("s", "split-output", false, "Output will be split based on Annotation_Status. 3 files will be created: <output-filename>, <output-filename>.SUCCESS, <output-filename>.FAILED" )
.addOption("p", "post-interval-size", true, "Number of records to make POST requests to Genome Nexus with at a time");
return gnuOptions;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,38 @@
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
*/

package org.cbioportal.annotation.pipeline;


import org.cbioportal.annotator.util.AnnotationUtil;
import org.cbioportal.models.AnnotatedRecord;

import org.springframework.batch.core.*;
import org.springframework.batch.item.*;
import org.springframework.batch.core.configuration.annotation.*;
import org.springframework.context.annotation.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepScope;
import org.springframework.batch.item.ItemStreamReader;
import org.springframework.batch.item.ItemStreamWriter;
import org.springframework.batch.item.support.CompositeItemWriter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;

import java.util.Arrays;


/**
* @author Zachary Heins
*/
@Configuration
@EnableBatchProcessing
@ComponentScan(basePackages="org.cbioportal.annotator")
public class BatchConfiguration
{
@ComponentScan(basePackages = "org.cbioportal.annotator")
public class BatchConfiguration {
public static final String ANNOTATION_JOB = "annotationJob";

@Autowired
Expand All @@ -63,11 +72,10 @@ public class BatchConfiguration
private String chunk;

@Bean
public Job annotationJob()
{
public Job annotationJob() {
return jobBuilderFactory.get(ANNOTATION_JOB)
.start(step())
.build();
.start(step())
.build();
}

@Bean
Expand All @@ -76,35 +84,48 @@ public AnnotationUtil annotationUtil() {
}

@Bean
public Step step()
{
public Step step() {
return stepBuilderFactory.get("step")
.<AnnotatedRecord, String> chunk(Integer.parseInt(chunk))
.reader(reader())
.processor(processor())
.writer(writer())
.build();
.<AnnotatedRecord, String>chunk(Integer.parseInt(chunk))
.reader(reader())
.processor(processor())
.writer(compositeItemWriter())
.build();
}

@Bean
@StepScope
public ItemStreamReader<AnnotatedRecord> reader()
{
public ItemStreamReader<AnnotatedRecord> reader() {
return new MutationRecordReader();
}

@Bean
@StepScope
public MutationRecordProcessor processor()
{
public MutationRecordProcessor processor() {
return new MutationRecordProcessor();
}

@Bean
@StepScope
public ItemStreamWriter<String> writer()
{
public ItemStreamWriter<String> mainWriter() {
return new MutationRecordWriter();
}

@Bean
@StepScope
public ItemStreamWriter<String> failedItemWriter() {
return new FailedMutationRecordWriter();
}

@Bean
@StepScope
public ItemStreamWriter<String> successfulItemWriter() {
return new SuccessfulMutationRecordWriter();
}

public CompositeItemWriter<String> compositeItemWriter() {
CompositeItemWriter writer = new CompositeItemWriter();
writer.setDelegates(Arrays.asList(mainWriter(), successfulItemWriter(), failedItemWriter()));
return writer;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (c) 2016 Memorial Sloan-Kettering Cancer Center.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS
* FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder
* is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no
* obligations to provide maintenance, support, updates, enhancements or
* modifications. In no event shall Memorial Sloan-Kettering Cancer Center be
* liable to any party for direct, indirect, special, incidental or
* consequential damages, including lost profits, arising out of the use of this
* software and its documentation, even if Memorial Sloan-Kettering Cancer
* Center has been advised of the possibility of such damage.
*/

/*
* This file is part of cBioPortal CMO-Pipelines.
*
* cBioPortal is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package org.cbioportal.annotation.pipeline;

import org.apache.commons.lang.StringUtils;
import org.springframework.batch.item.file.FlatFileHeaderCallback;

import java.io.IOException;
import java.io.Writer;
import java.util.List;

/**
* @author Mete Ozguz
*/
public class DefaultFlatFileHeaderCallback implements FlatFileHeaderCallback {
private final List<String> header;
private final List<String> commentLines;

public DefaultFlatFileHeaderCallback(List<String> header, List<String> commentLines) {
this.header = header;
this.commentLines = commentLines;
}

@Override
public void writeHeader(Writer writer) throws IOException {
// first write out the comment lines, then write the actual header
for (String comment : commentLines) {
writer.write(comment + "\n");
}
writer.write(StringUtils.join(header, "\t"));
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,35 @@
/*
* Copyright (c) 2016 Memorial Sloan-Kettering Cancer Center.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS
* FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder
* is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no
* obligations to provide maintenance, support, updates, enhancements or
* modifications. In no event shall Memorial Sloan-Kettering Cancer Center be
* liable to any party for direct, indirect, special, incidental or
* consequential damages, including lost profits, arising out of the use of this
* software and its documentation, even if Memorial Sloan-Kettering Cancer
* Center has been advised of the possibility of such damage.
*/

/*
* This file is part of cBioPortal CMO-Pipelines.
*
* cBioPortal is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package org.cbioportal.annotation.pipeline;

import org.springframework.batch.item.file.LineCallbackHandler;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Copyright (c) 2016 Memorial Sloan-Kettering Cancer Center.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS
* FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder
* is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no
* obligations to provide maintenance, support, updates, enhancements or
* modifications. In no event shall Memorial Sloan-Kettering Cancer Center be
* liable to any party for direct, indirect, special, incidental or
* consequential damages, including lost profits, arising out of the use of this
* software and its documentation, even if Memorial Sloan-Kettering Cancer
* Center has been advised of the possibility of such damage.
*/

/*
* This file is part of cBioPortal CMO-Pipelines.
*
* cBioPortal is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package org.cbioportal.annotation.pipeline;

import org.springframework.batch.item.ExecutionContext;
import org.springframework.batch.item.ItemStreamException;
import org.springframework.batch.item.ItemStreamWriter;
import org.springframework.batch.item.file.FlatFileItemWriter;
import org.springframework.batch.item.file.transform.PassThroughLineAggregator;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.FileSystemResource;

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

/**
* @author Mete Ozguz
*/
public class FailedMutationRecordWriter implements ItemStreamWriter<String> {

@Value("#{jobParameters[failedOutputFilename]}")
private String failedOutputFilename;

@Value("#{stepExecutionContext['commentLines']}")
private List<String> commentLines;

@Value("#{stepExecutionContext['mutation_header']}")
private List<String> header;

@Value("#{stepExecutionContext['failedRecordsCount']}")
private Integer failedRecordsCount;

private FlatFileItemWriter<String> flatFileItemWriter = new FlatFileItemWriter<>();

// Set up the writer and print the json from CVR to a file
@Override
public void open(ExecutionContext ec) throws ItemStreamException {
if (failedOutputFilename != null && !failedOutputFilename.isEmpty() && failedRecordsCount > 0) {
Path stagingFile = Paths.get(failedOutputFilename);
PassThroughLineAggregator aggr = new PassThroughLineAggregator();
flatFileItemWriter.setLineAggregator(aggr);
flatFileItemWriter.setResource(new FileSystemResource(stagingFile.toString()));
flatFileItemWriter.setHeaderCallback(new DefaultFlatFileHeaderCallback(header, commentLines));
flatFileItemWriter.open(ec);
}
}

@Override
public void update(ExecutionContext ec) throws ItemStreamException {
}

@Override
public void close() throws ItemStreamException {
if (failedOutputFilename != null && !failedOutputFilename.isEmpty() && failedRecordsCount > 0) {
flatFileItemWriter.close();
}
}

@Override
public void write(List<? extends String> items) throws Exception {
if (failedOutputFilename != null && !failedOutputFilename.isEmpty() && failedRecordsCount > 0) {
List<String> failedItems = new ArrayList<>();
for (String item : items) {
if (item.contains("\tFAILED")) {
failedItems.add(item);
}
}
flatFileItemWriter.write(failedItems);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,18 @@ public void open(ExecutionContext ec) throws ItemStreamException {
}
}
} else {
for (AnnotatedRecord ar : allAnnotatedRecords) {
int failedRecordCount = 0;
int successfulRecordCount = 0;
for (AnnotatedRecord ar : this.allAnnotatedRecords) {
header.addAll(ar.getHeaderWithAdditionalFields());
if(ar.getANNOTATION_STATUS().equals("SUCCESS")) {
successfulRecordCount++;
} else {
failedRecordCount++;
}
}
ec.put("failedRecordsCount", failedRecordCount);
ec.put("successfulRecordsCount", successfulRecordCount);
}
// add 'Annotation_Status' to header if not already present
if (!header.contains("Annotation_Status")) {
Expand Down
Loading

0 comments on commit 327e277

Please sign in to comment.