Skip to content

Commit

Permalink
Write failed annotations to an additional file
Browse files Browse the repository at this point in the history
  • Loading branch information
ozguzMete committed Aug 6, 2022
1 parent e2e8c65 commit edda1f1
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 78 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ private static Options getOptions(String[] args)
.addOption("i", "isoform-override", true, "Isoform Overrides (mskcc or uniprot)")
.addOption("e", "error-report-location", true, "Error report filename (including path)")
.addOption("r", "replace-symbol-entrez", false, "Replace gene symbols and entrez id with what is provided by annotator" )
.addOption("s", "dump-failed", false, "Write failed annotations into an additional file" )
.addOption("p", "post-interval-size", true, "Number of records to make POST requests to Genome Nexus with at a time");

return gnuOptions;
Expand All @@ -71,18 +72,22 @@ private static void help(Options gnuOptions, int exitStatus)
}

private static void launchJob(String[] args, String filename, String outputFilename, String isoformOverride,
String errorReportLocation, boolean replace, Integer postIntervalSize) throws Exception
String errorReportLocation, boolean replace, Integer postIntervalSize, boolean dumpFailed) throws Exception
{
SpringApplication app = new SpringApplication(AnnotationPipeline.class);
app.setWebApplicationType(WebApplicationType.NONE);
app.setAllowBeanDefinitionOverriding(Boolean.TRUE);
ConfigurableApplicationContext ctx = app.run(args);
JobLauncher jobLauncher = ctx.getBean(JobLauncher.class);

String failedOutputFilename = "";
if(dumpFailed) {
failedOutputFilename = outputFilename + ".FAILED";
}
Job annotationJob = ctx.getBean(BatchConfiguration.ANNOTATION_JOB, Job.class);
JobParameters jobParameters = new JobParametersBuilder()
.addString("filename", filename)
.addString("outputFilename", outputFilename)
.addString("failedOutputFilename", failedOutputFilename)
.addString("replace", String.valueOf(replace))
.addString("isoformOverride", isoformOverride)
.addString("errorReportLocation", errorReportLocation)
Expand All @@ -106,6 +111,6 @@ public static void main(String[] args) throws Exception
}
launchJob(args, commandLine.getOptionValue("filename"), commandLine.getOptionValue("output-filename"),commandLine.getOptionValue("isoform-override"),
commandLine.hasOption("error-report-location") ? commandLine.getOptionValue("error-report-location") : null,
commandLine.hasOption("replace-symbol-entrez"), commandLine.hasOption("post-interval-size") ? Integer.valueOf(commandLine.getOptionValue("post-interval-size")) : -1);
commandLine.hasOption("replace-symbol-entrez"), commandLine.hasOption("post-interval-size") ? Integer.valueOf(commandLine.getOptionValue("post-interval-size")) : -1, commandLine.hasOption("dump-failed"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@
package org.cbioportal.annotation.pipeline;

import java.net.MalformedURLException;
import java.util.Arrays;
import javax.sql.DataSource;
import org.cbioportal.annotator.util.AnnotationUtil;
import org.cbioportal.models.AnnotatedRecord;

import org.springframework.batch.core.*;
import org.springframework.batch.item.*;
import org.springframework.batch.core.configuration.annotation.*;
import org.springframework.batch.item.support.CompositeItemWriter;
import org.springframework.context.annotation.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.batch.core.configuration.annotation.StepScope;
Expand Down Expand Up @@ -94,7 +96,7 @@ public Step step()
.<AnnotatedRecord, String> chunk(Integer.parseInt(chunk))
.reader(reader())
.processor(processor())
.writer(writer())
.writer(compositeItemWriter())
.build();
}

Expand All @@ -114,11 +116,24 @@ public MutationRecordProcessor processor()

@Bean
@StepScope
public ItemStreamWriter<String> writer()
public ItemStreamWriter<String> mainWriter()
{
return new MutationRecordWriter();
}

@Bean
@StepScope
public ItemStreamWriter<String> failedItemWriter()
{
return new FailedMutationRecordWriter();
}

public CompositeItemWriter<String> compositeItemWriter(){
CompositeItemWriter writer = new CompositeItemWriter();
writer.setDelegates(Arrays.asList(mainWriter(), failedItemWriter()));
return writer;
}

// general spring batch configuration
@Value("org/springframework/batch/core/schema-drop-sqlite.sql")
private Resource dropRepositoryTables;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* Copyright (c) 2016 Memorial Sloan-Kettering Cancer Center.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS
* FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder
* is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no
* obligations to provide maintenance, support, updates, enhancements or
* modifications. In no event shall Memorial Sloan-Kettering Cancer Center be
* liable to any party for direct, indirect, special, incidental or
* consequential damages, including lost profits, arising out of the use of this
* software and its documentation, even if Memorial Sloan-Kettering Cancer
* Center has been advised of the possibility of such damage.
*/

/*
* This file is part of cBioPortal CMO-Pipelines.
*
* cBioPortal is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package org.cbioportal.annotation.pipeline;

import org.apache.commons.lang.StringUtils;
import org.cbioportal.models.AnnotatedRecord;
import org.springframework.batch.item.ExecutionContext;
import org.springframework.batch.item.ItemStreamException;
import org.springframework.batch.item.ItemStreamWriter;
import org.springframework.batch.item.file.FlatFileHeaderCallback;
import org.springframework.batch.item.file.FlatFileItemWriter;
import org.springframework.batch.item.file.transform.PassThroughLineAggregator;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.FileSystemResource;

import java.io.IOException;
import java.io.Writer;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

/**
* @author Mete Ozguz
*/
public class FailedMutationRecordWriter implements ItemStreamWriter<String> {

@Value("#{jobParameters[failedOutputFilename]}")
private String failedOutputFilename;

@Value("#{stepExecutionContext['commentLines']}")
private List<String> commentLines;

@Value("#{stepExecutionContext['mutation_header']}")
private List<String> header;

@Value("#{stepExecutionContext['failed_records_count']}")
private Integer failedRecordsCount;

private Path stagingFile;
private FlatFileItemWriter<String> flatFileItemWriter = new FlatFileItemWriter<>();

// Set up the writer and print the json from CVR to a file
@Override
public void open(ExecutionContext ec) throws ItemStreamException {
if (!failedOutputFilename.isEmpty() && failedRecordsCount > 0) {
stagingFile = Paths.get(failedOutputFilename);

PassThroughLineAggregator aggr = new PassThroughLineAggregator();
flatFileItemWriter.setLineAggregator(aggr);
flatFileItemWriter.setResource(new FileSystemResource(stagingFile.toString()));
flatFileItemWriter.setHeaderCallback(new FlatFileHeaderCallback() {
@Override
public void writeHeader(Writer writer) throws IOException {
AnnotatedRecord record = new AnnotatedRecord();

// first write out the comment lines, then write the actual header
for (String comment : commentLines) {
writer.write(comment + "\n");
}
writer.write(StringUtils.join(header, "\t"));
}
});
flatFileItemWriter.open(ec);
}
}

@Override
public void update(ExecutionContext ec) throws ItemStreamException {
}

@Override
public void close() throws ItemStreamException {
if (!failedOutputFilename.isEmpty() && failedRecordsCount > 0) {
flatFileItemWriter.close();
}
}

@Override
public void write(List<? extends String> items) throws Exception {
if (!failedOutputFilename.isEmpty() && failedRecordsCount > 0) {
List<String> failedItems = new ArrayList<>();
for (String item : items) {
if (!item.endsWith("SUCCESS")) {
failedItems.add(item);
}
}
flatFileItemWriter.write(failedItems);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,14 @@ public void open(ExecutionContext ec) throws ItemStreamException {
} else {
this.allAnnotatedRecords = annotator.annotateRecordsUsingGET(summaryStatistics, mutationRecords, isoformOverride, replace, true);
}
int failedRecordCount = 0;
for (AnnotatedRecord ar : this.allAnnotatedRecords) {
header.addAll(ar.getHeaderWithAdditionalFields());
if(!ar.getANNOTATION_STATUS().equals("SUCCESS")) {
failedRecordCount++;
}
}
ec.put("failed_records_count", failedRecordCount);
// add 'Annotation_Status' to header if not already present
if (!header.contains("Annotation_Status")) {
header.add("Annotation_Status");
Expand Down

This file was deleted.

0 comments on commit edda1f1

Please sign in to comment.