Skip to content

Commit

Permalink
Merge pull request #14 from nitin-ebi/store_validation_result
Browse files Browse the repository at this point in the history
EVA-3414 Store validation result and pass it to submit
  • Loading branch information
tcezard authored Oct 13, 2023
2 parents 301ebbb + 8d84d55 commit ddb9d9d
Show file tree
Hide file tree
Showing 7 changed files with 195 additions and 86 deletions.
2 changes: 2 additions & 0 deletions cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@
LSRI_CLIENT_ID = "aa0fcc42-096a-4f9d-b871-aceb1a97d174"

__version__ = open(os.path.join(os.path.dirname(os.path.abspath(cli.__file__)), 'VERSION')).read().strip()

SUB_CLI_CONFIG_FILE = ".eva_sub_cli_config.yml"
10 changes: 5 additions & 5 deletions cli/docker_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@

logger = logging_config.get_logger(__name__)

docker_path = 'docker'
container_image = 'eva_sub_cli'
container_validation_dir = '/opt/vcf_validation'
container_validation_output_dir = '/opt/vcf_validation/vcf_validation_output'
container_etc_dir = '/opt/cli/etc'


def run_command_with_output(command_description, command, return_process_output=True,
log_error_stream_to_output=False):
process_output = ""
Expand Down Expand Up @@ -48,15 +48,15 @@ def run_command_with_output(command_description, command, return_process_output=

class DockerValidator(Reporter):

def __init__(self, mapping_file, output_dir, metadata_json=None,
metadata_xlsx=None, container_name=container_image, docker_path='docker'):
def __init__(self, mapping_file, output_dir, metadata_json=None, metadata_xlsx=None,
container_name=container_image, docker_path='docker', submission_config=None):
self.docker_path = docker_path
self.mapping_file = mapping_file
self.metadata_json = metadata_json
self.metadata_xlsx = metadata_xlsx
self.container_name = container_name
self.spreadsheet2json_conf = os.path.join(ETC_DIR, "spreadsheet2json_conf.yaml")
super().__init__(self._find_vcf_file(), output_dir)
super().__init__(self._find_vcf_file(), output_dir, submission_config=submission_config)

def _validate(self):
self.run_docker_validator()
Expand Down Expand Up @@ -289,4 +289,4 @@ def _copy(file_description, file_path):
validator = DockerValidator(args.vcf_files_mapping, args.output_dir, args.metadata_json, args.metadata_xlsx,
docker_container_name, docker_path)
validator.validate()
validator.create_reports()
validator.create_reports()
88 changes: 75 additions & 13 deletions cli/eva_sub_cli.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,82 @@
import csv
import os
from argparse import ArgumentParser

from ebi_eva_common_pyutils.config import WritableConfig
from ebi_eva_common_pyutils.logger import logging_config

from cli import SUB_CLI_CONFIG_FILE, __version__
from cli.docker_validator import DockerValidator, docker_path, container_image
from cli.reporter import READY_FOR_SUBMISSION_TO_EVA
from cli.submit import StudySubmitter

VALIDATION_OUTPUT_DIR = "validation_output"
VALIDATE = 'validate'
SUBMIT = 'submit'
RESUME_SUBMISSION = 'resume_submission'

logging_config.add_stdout_handler()


def get_docker_validator(vcf_files_mapping, output_dir, metadata_json, metadata_xlsx,
arg_container, arg_docker, sub_config):
docker = arg_docker or docker_path
container = arg_container or container_image
validation_output_dir = os.path.join(output_dir, VALIDATION_OUTPUT_DIR)
return DockerValidator(vcf_files_mapping, validation_output_dir, metadata_json, metadata_xlsx,
container, docker, sub_config)

def get_vcf_files(mapping_file):
vcf_files = []
with open(mapping_file) as open_file:
reader = csv.DictReader(open_file, delimiter=',')
for row in reader:
vcf_files.append(row['vcf'])
return vcf_files

if __name__ == "__main__":
argparse = ArgumentParser(description='EVA Submission CLI')
argparse.add_argument('--submission-dir', required=True, type=str,
help='Full path to the submission directory where all submission info is/will be stored')
argparse.add_argument('--resume', action='store_true', default=False, help='resume an existing submission')
args = argparse.parse_args()

logging_config.add_stdout_handler()

submitter = StudySubmitter()
if args.resume:
submitter.upload_submission(args.submission_dir)
else:
submitter.submit(args.submission_dir)
argparser = ArgumentParser(description='EVA Submission CLI - validate and submit data to EVA')
argparser.add_argument('--task', required=True, choices=[VALIDATE, SUBMIT, RESUME_SUBMISSION],
help='Select a task to perform')
argparser.add_argument('--submission_dir', required=True, type=str,
help='Full path to the directory where all processing will be done '
'and submission info is/will be stored')
argparser.add_argument("--vcf_files_mapping", required=True,
help="csv file with the mappings for vcf files, fasta and assembly report")
group = argparser.add_mutually_exclusive_group(required=True)
group.add_argument("--metadata_json",
help="Json file that describe the project, analysis, samples and files")
group.add_argument("--metadata_xlsx",
help="Excel spreadsheet that describe the project, analysis, samples and files")
argparser.add_argument("--docker_path", required=False, help="Full path to the docker installation, "
"not required if docker is available in the PATH environment variable")
argparser.add_argument("--container_name", required=False, help="Name of the docker container")

args = argparser.parse_args()

# load config
config_file_path = os.path.join(args.submission_dir, SUB_CLI_CONFIG_FILE)
sub_config = WritableConfig(config_file_path, version=__version__)

vcf_files = get_vcf_files(args.vcf_files_mapping)
metadata_file = args.metadata_json or args.metadata_xlsx

if args.task == RESUME_SUBMISSION:
# if validation is not passed, process task submit (validate and submit)
if READY_FOR_SUBMISSION_TO_EVA not in sub_config or not sub_config[READY_FOR_SUBMISSION_TO_EVA]:
args.task = SUBMIT
else:
# if validation is passed, upload files without validating again
with StudySubmitter(args.submission_dir, vcf_files, metadata_file, submission_config=sub_config) as submitter:
submitter.upload_submission()

if args.task == VALIDATE or args.task == SUBMIT:
docker_validator = get_docker_validator(args.vcf_files_mapping, args.submission_dir, args.metadata_json,
args.metadata_xlsx, args.container_name, args.docker_path, sub_config)
docker_validator.validate()
docker_validator.create_reports()
docker_validator.update_config_with_validation_result()

if args.task == SUBMIT:
with StudySubmitter(args.submission_dir, vcf_files, metadata_file, submission_config=sub_config) as submitter:
submitter.submit()
27 changes: 25 additions & 2 deletions cli/reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
import re

import yaml
from ebi_eva_common_pyutils.config import WritableConfig

from cli import ETC_DIR
from cli import ETC_DIR, SUB_CLI_CONFIG_FILE, __version__
from cli.report import generate_html_report
from ebi_eva_common_pyutils.logger import logging_config

VALIDATION_RESULTS = 'validation_results'
READY_FOR_SUBMISSION_TO_EVA = 'ready_for_submission_to_eva'

logger = logging_config.get_logger(__name__)

Expand All @@ -23,12 +26,24 @@ def resolve_single_file_path(file_path):

class Reporter:

def __init__(self, vcf_files, output_dir):
def __init__(self, vcf_files, output_dir, submission_config: WritableConfig =None):
self.output_dir = output_dir
self.vcf_files = vcf_files
self.results = {}
self.project_title = None # TODO fill this from metadata?
self.validation_date = datetime.datetime.now()
if submission_config:
self.sub_config = submission_config
else:
config_file = os.path.join(output_dir, SUB_CLI_CONFIG_FILE)
self.sub_config = WritableConfig(config_file, version=__version__)

def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):
self.sub_config.backup()
self.sub_config.write()

def validate(self):
self._validate()
Expand All @@ -37,6 +52,14 @@ def validate(self):
def _validate(self):
raise NotImplementedError

def update_config_with_validation_result(self):
self.sub_config.set(VALIDATION_RESULTS, value=self.results)
self.sub_config.set(READY_FOR_SUBMISSION_TO_EVA, value=self.verify_ready_for_submission_to_eva())

def verify_ready_for_submission_to_eva(self):
# TODO: check validation results and confirm if they are good enough for submitting to EVA
return True

def parse_assembly_check_log(self, assembly_check_log):
error_list = []
nb_error, nb_mismatch = 0, 0
Expand Down
68 changes: 40 additions & 28 deletions cli/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,48 +3,52 @@
from urllib.parse import urljoin

import requests
import yaml

from ebi_eva_common_pyutils.config import WritableConfig
from ebi_eva_common_pyutils.logger import AppLogger
from retry import retry

from cli import SUB_CLI_CONFIG_FILE, __version__
from cli.auth import get_auth
from cli.reporter import READY_FOR_SUBMISSION_TO_EVA

SUB_CLI_CONFIG_FILE = ".eva-sub-cli-config.yml"
SUB_CLI_CONFIG_KEY_SUBMISSION_ID = "submission_id"
SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL = "submission_upload_url"
SUBMISSION_INITIATE_URL = "http://www.ebi.ac.uk/eva/v1/submission/initiate"


class StudySubmitter(AppLogger):
def __init__(self, vcf_files, metadata_file, submission_initiate_url=SUBMISSION_INITIATE_URL):
def __init__(self, submission_dir, vcf_files, metadata_file, submission_initiate_url=SUBMISSION_INITIATE_URL,
submission_config: WritableConfig = None):
self.auth = get_auth()
self.submission_initiate_url = submission_initiate_url
self.submission_dir = submission_dir
self.vcf_files = vcf_files
self.metadata_file = metadata_file

def create_submission_config_file(self, submission_dir, submission_id, submission_upload_url):
submission_config_file = os.path.join(submission_dir, SUB_CLI_CONFIG_FILE)
config_data = {
SUB_CLI_CONFIG_KEY_SUBMISSION_ID: submission_id,
SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL: submission_upload_url
}
with open(submission_config_file, 'w') as open_file:
yaml.safe_dump(config_data, open_file)

def get_submission_id_and_upload_url(self, submission_dir):
submission_config_file = os.path.join(submission_dir, SUB_CLI_CONFIG_FILE)
if submission_config_file:
with (open(submission_config_file, 'r') as f):
submission_config_data = yaml.safe_load(f)
return submission_config_data[SUB_CLI_CONFIG_KEY_SUBMISSION_ID], submission_config_data[
SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL]
if submission_config:
self.sub_config = submission_config
else:
raise FileNotFoundError(f'Could not upload. No config file found for the submission in {submission_dir}.')
config_file = os.path.join(submission_dir, SUB_CLI_CONFIG_FILE)
self.sub_config = WritableConfig(config_file, version=__version__)

def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):
self.sub_config.backup()
self.sub_config.write()

def update_config_with_submission_id_and_upload_url(self, submission_id, upload_url):
self.sub_config.set(SUB_CLI_CONFIG_KEY_SUBMISSION_ID, value=submission_id)
self.sub_config.set(SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL, value=upload_url)

def upload_submission(self, submission_upload_url=None):
if READY_FOR_SUBMISSION_TO_EVA not in self.sub_config or not self.sub_config[READY_FOR_SUBMISSION_TO_EVA]:
raise Exception(f'There are still validation errors that needs to be addressed. '
f'Please review, address and re-validate before uploading.')

def upload_submission(self, submission_dir, submission_upload_url=None):
if not submission_upload_url:
submission_id, submission_upload_url = self.get_submission_id_and_upload_url(submission_dir)
submission_upload_url = self.sub_config[SUB_CLI_CONFIG_KEY_SUBMISSION_UPLOAD_URL]

for f in self.vcf_files:
self.upload_file(submission_upload_url, f)
self.upload_file(submission_upload_url, self.metadata_file)
Expand All @@ -63,13 +67,21 @@ def verify_submission_dir(self, submission_dir):
if not os.access(submission_dir, os.W_OK):
raise Exception(f"The directory '{submission_dir}' does not have write permissions.")

def submit(self, submission_dir):
self.verify_submission_dir(submission_dir)
def submit(self):
if READY_FOR_SUBMISSION_TO_EVA not in self.sub_config or not self.sub_config[READY_FOR_SUBMISSION_TO_EVA]:
raise Exception(f'There are still validation errors that need to be addressed. '
f'Please review, address and re-validate before submitting.')

self.verify_submission_dir(self.submission_dir)
response = requests.post(self.submission_initiate_url,
headers={'Accept': 'application/hal+json',
'Authorization': 'Bearer ' + self.auth.token})
response.raise_for_status()
response_json = response.json()
self.info("Submission ID {} received!!".format(response_json["submissionId"]))
self.create_submission_config_file(submission_dir, response_json["submissionId"], response_json["uploadUrl"])
self.upload_submission(submission_dir, response_json["uploadUrl"])

# update config with submission id and upload url
self.update_config_with_submission_id_and_upload_url(response_json["submissionId"], response_json["uploadUrl"])

# upload submission
self.upload_submission(response_json["uploadUrl"])
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ minify_html
openpyxl
requests
jsonschema
ebi_eva_common_pyutils==0.5.6
ebi_eva_common_pyutils==0.6.1
Loading

0 comments on commit ddb9d9d

Please sign in to comment.