From 7c5f73add967144989cb51e6f90521306224b2bd Mon Sep 17 00:00:00 2001 From: tcezard Date: Wed, 21 Aug 2024 14:09:41 +0100 Subject: [PATCH 1/8] Ensure all relative path are made into full path Raise if metadata spreadsheet is not provided Remove requirement for VCF file type since it has been removed from the metadata Output logs for main nextflow steps to help debug Add debug option to main CLI Lower some docker validator log message to debug --- eva_sub_cli/executables/cli.py | 12 +++++++++--- eva_sub_cli/executables/xlsx2json.py | 2 +- eva_sub_cli/metadata_utils.py | 3 +-- eva_sub_cli/nextflow/validation.nf | 14 +++++++++----- eva_sub_cli/orchestrator.py | 22 +++++++++++++++++----- eva_sub_cli/submission_ws.py | 6 +++--- eva_sub_cli/validators/docker_validator.py | 16 ++++++++-------- 7 files changed, 48 insertions(+), 27 deletions(-) diff --git a/eva_sub_cli/executables/cli.py b/eva_sub_cli/executables/cli.py index d834d73..c760de5 100755 --- a/eva_sub_cli/executables/cli.py +++ b/eva_sub_cli/executables/cli.py @@ -40,7 +40,7 @@ def main(): argparser = ArgumentParser(prog='eva-sub-cli', description='EVA Submission CLI - validate and submit data to EVA') argparser.add_argument('--version', action='version', version=f'%(prog)s {eva_sub_cli.__version__}') argparser.add_argument('--submission_dir', required=True, type=str, - help='Full path to the directory where all processing will be done ' + help='Path to the directory where all processing will be done ' 'and submission info is/will be stored') vcf_group = argparser.add_argument_group( 'Input VCF and assembly', @@ -57,7 +57,7 @@ def main(): help="Json file that describe the project, analysis, samples and files") metadata_group.add_argument("--metadata_xlsx", help="Excel spreadsheet that describe the project, analysis, samples and files") - argparser.add_argument('--tasks', nargs='*', choices=[VALIDATE, SUBMIT], default=[SUBMIT], type=str.lower, + argparser.add_argument('--tasks', nargs='+', choices=[VALIDATE, SUBMIT], default=[SUBMIT], type=str.lower, help='Select a task to perform. Selecting VALIDATE will run the validation regardless of the' ' outcome of previous runs. Selecting SUBMIT will run validate only if the validation' ' was not performed successfully before and then run the submission.') @@ -67,12 +67,18 @@ def main(): 'upload to the EVA') credential_group.add_argument("--username", help="Username used for connecting to the ENA webin account") credential_group.add_argument("--password", help="Password used for connecting to the ENA webin account") + argparser.add_argument('--debug', action='store_true', default=False, help='Set the script to output debug messages') args = argparser.parse_args() validate_command_line_arguments(args, argparser) - logging_config.add_stdout_handler() + args.submission_dir = os.path.abspath(args.submission_dir) + + if args.debug: + logging_config.add_stdout_handler(logging.DEBUG) + else: + logging_config.add_stdout_handler() logging_config.add_file_handler(os.path.join(args.submission_dir, 'eva_submission.log'), logging.DEBUG) try: diff --git a/eva_sub_cli/executables/xlsx2json.py b/eva_sub_cli/executables/xlsx2json.py index d10960f..179c0d3 100644 --- a/eva_sub_cli/executables/xlsx2json.py +++ b/eva_sub_cli/executables/xlsx2json.py @@ -372,6 +372,6 @@ def main(): try: parser.json(args.metadata_json) except Exception as e: - parser.add_error(e) + parser.add_error(f'An Error was raised while converting the spreadsheet to JSON: {repr(e)}') finally: parser.save_errors(args.errors_yaml) diff --git a/eva_sub_cli/metadata_utils.py b/eva_sub_cli/metadata_utils.py index ce967ae..df7f436 100644 --- a/eva_sub_cli/metadata_utils.py +++ b/eva_sub_cli/metadata_utils.py @@ -22,8 +22,7 @@ def get_files_per_analysis(metadata): """Returns mapping of analysis alias to filenames, based on metadata.""" files_per_analysis = defaultdict(list) for file_info in metadata.get('files', []): - if file_info.get('fileType') == 'vcf': - files_per_analysis[file_info.get('analysisAlias')].append(file_info.get('fileName')) + files_per_analysis[file_info.get('analysisAlias')].append(file_info.get('fileName')) return { analysis_alias: set(filepaths) for analysis_alias, filepaths in files_per_analysis.items() diff --git a/eva_sub_cli/nextflow/validation.nf b/eva_sub_cli/nextflow/validation.nf index a6253a5..eade06f 100644 --- a/eva_sub_cli/nextflow/validation.nf +++ b/eva_sub_cli/nextflow/validation.nf @@ -195,12 +195,13 @@ process convert_xlsx_2_json { output: path "metadata.json", emit: metadata_json, optional: true path "metadata_conversion_errors.yml", emit: errors_yaml + path "xlsx2json.log", emit: xlsx2json_log script: metadata_json = metadata_xlsx.getBaseName() + '.json' """ - $params.python_scripts.xlsx2json --metadata_xlsx $metadata_xlsx --metadata_json metadata.json --errors_yaml metadata_conversion_errors.yml --conversion_configuration $conversion_configuration + $params.python_scripts.xlsx2json --metadata_xlsx $metadata_xlsx --metadata_json metadata.json --errors_yaml metadata_conversion_errors.yml --conversion_configuration $conversion_configuration > xlsx2json.log 2>&1 """ } @@ -217,7 +218,7 @@ process metadata_json_validation { script: """ - $params.executable.biovalidator --schema $schema_dir/eva_schema.json --ref $schema_dir/eva-biosamples.json --data $metadata_json > metadata_validation.txt + $params.executable.biovalidator --schema $schema_dir/eva_schema.json --ref $schema_dir/eva-biosamples.json --data $metadata_json > metadata_validation.txt 2>&1 """ } @@ -232,10 +233,11 @@ process sample_name_concordance { output: path "sample_checker.yml", emit: sample_checker_yml + path "sample_checker.log", emit: sample_checker_log script: """ - $params.python_scripts.samples_checker --metadata_json $metadata_json --vcf_files $vcf_files --output_yaml sample_checker.yml + $params.python_scripts.samples_checker --metadata_json $metadata_json --vcf_files $vcf_files --output_yaml sample_checker.yml > sample_checker.log """ } @@ -251,10 +253,11 @@ process insdc_checker { output: path "${fasta_file}_check.yml", emit: fasta_checker_yml + path "fasta_checker.log", emit: fasta_checker_log script: """ - $params.python_scripts.fasta_checker --metadata_json $metadata_json --vcf_files $vcf_files --input_fasta $fasta_file --output_yaml ${fasta_file}_check.yml + $params.python_scripts.fasta_checker --metadata_json $metadata_json --vcf_files $vcf_files --input_fasta $fasta_file --output_yaml ${fasta_file}_check.yml > fasta_checker.log 2>&1 """ } @@ -269,9 +272,10 @@ process metadata_semantic_check { output: path "metadata_semantic_check.yml", emit: metadata_semantic_check_yml + path "semantic_checker.log", emit: semantic_checker_log script: """ - $params.python_scripts.semantic_checker --metadata_json $metadata_json --output_yaml metadata_semantic_check.yml + $params.python_scripts.semantic_checker --metadata_json $metadata_json --output_yaml metadata_semantic_check.yml > semantic_checker.log """ } diff --git a/eva_sub_cli/orchestrator.py b/eva_sub_cli/orchestrator.py index 51d1de2..10f157d 100755 --- a/eva_sub_cli/orchestrator.py +++ b/eva_sub_cli/orchestrator.py @@ -54,7 +54,7 @@ def get_project_title_and_create_vcf_files_mapping(submission_dir, vcf_files, re project_title, vcf_files_mapping = get_project_and_vcf_fasta_mapping_from_metadata_xlsx(metadata_xlsx, True) for mapping in vcf_files_mapping: - writer.writerow(mapping); + writer.writerow(mapping) return project_title, mapping_file @@ -80,6 +80,7 @@ def get_project_and_vcf_fasta_mapping_from_metadata_json(metadata_json, mapping_ return project_title, vcf_fasta_report_mapping + def get_project_and_vcf_fasta_mapping_from_metadata_xlsx(metadata_xlsx, mapping_req=False): workbook = load_workbook(metadata_xlsx) @@ -111,12 +112,16 @@ def get_project_and_vcf_fasta_mapping_from_metadata_xlsx(metadata_xlsx, mapping_ file_name = row[files_headers['File Name']] analysis_alias = row[files_headers['Analysis Alias']] reference_fasta = analysis_alias_dict[analysis_alias] + if not (file_name and os.path.isfile(file_name)): + raise FileNotFoundError(f'The variant file {file_name} provided in spreadsheet {metadata_xlsx} does not exist') + if not (reference_fasta and os.path.isfile(reference_fasta)): + raise FileNotFoundError(f'The reference fasta {reference_fasta} in spreadsheet {metadata_xlsx} does not exist') vcf_fasta_report_mapping.append([os.path.abspath(file_name), os.path.abspath(reference_fasta), '']) return project_title, vcf_fasta_report_mapping -def check_validation_required(tasks, sub_config): +def check_validation_required(tasks, sub_config, username=None, password=None): # Validation is mandatory so if submit is requested then VALIDATE must have run before or be requested as well if SUBMIT in tasks: if not sub_config.get(READY_FOR_SUBMISSION_TO_EVA, False): @@ -124,7 +129,7 @@ def check_validation_required(tasks, sub_config): submission_id = sub_config.get(SUB_CLI_CONFIG_KEY_SUBMISSION_ID, None) if submission_id: try: - submission_status = SubmissionWSClient().get_submission_status(submission_id) + submission_status = SubmissionWSClient(username, password).get_submission_status(submission_id) if submission_status == 'FAILED': return True else: @@ -152,11 +157,18 @@ def orchestrate_process(submission_dir, vcf_files, reference_fasta, metadata_jso if not os.path.exists(os.path.abspath(metadata_file)): raise FileNotFoundError(f'The provided metadata file {metadata_file} does not exist') + if metadata_json: + metadata_json = os.path.abspath(metadata_json) + if metadata_xlsx: + metadata_xlsx = os.path.abspath(metadata_xlsx) + # Get the provided Project Title and VCF files mapping (VCF, Fasta and Report) - project_title, vcf_files_mapping = get_project_title_and_create_vcf_files_mapping(submission_dir, vcf_files, reference_fasta, metadata_json, metadata_xlsx) + project_title, vcf_files_mapping = get_project_title_and_create_vcf_files_mapping( + submission_dir, vcf_files, reference_fasta, metadata_json, metadata_xlsx + ) vcf_files = get_vcf_files(vcf_files_mapping) - if VALIDATE not in tasks and check_validation_required(tasks, sub_config): + if VALIDATE not in tasks and check_validation_required(tasks, sub_config, username, password): tasks.append(VALIDATE) if VALIDATE in tasks: diff --git a/eva_sub_cli/submission_ws.py b/eva_sub_cli/submission_ws.py index bb7cd02..92b6f0b 100644 --- a/eva_sub_cli/submission_ws.py +++ b/eva_sub_cli/submission_ws.py @@ -43,19 +43,19 @@ def _submission_status_url(self, submission_id): def mark_submission_uploaded(self, submission_id, metadata_json): response = requests.put(self._submission_uploaded_url(submission_id), - headers={'Accept': 'application/hal+json', 'Authorization': 'Bearer ' + self.auth.token}, + headers={'Accept': 'application/json', 'Authorization': 'Bearer ' + self.auth.token}, data=metadata_json) response.raise_for_status() return response.json() def initiate_submission(self): - response = requests.post(self._submission_initiate_url(), headers={'Accept': 'application/hal+json', + response = requests.post(self._submission_initiate_url(), headers={'Accept': 'application/json', 'Authorization': 'Bearer ' + self.auth.token}) response.raise_for_status() return response.json() @retry(exceptions=(HTTPError,), tries=3, delay=2, backoff=1.2, jitter=(1, 3)) def get_submission_status(self, submission_id): - response = requests.get(self.get_submission_status_url(submission_id)) + response = requests.get(self._submission_status_url(submission_id)) response.raise_for_status() return response.text diff --git a/eva_sub_cli/validators/docker_validator.py b/eva_sub_cli/validators/docker_validator.py index fcf4051..ea4a600 100644 --- a/eva_sub_cli/validators/docker_validator.py +++ b/eva_sub_cli/validators/docker_validator.py @@ -100,10 +100,10 @@ def verify_container_is_running(self): try: container_run_cmd_output = self._run_quiet_command("check if container is running", f"{self.docker_path} ps", return_process_output=True) if container_run_cmd_output is not None and self.container_name in container_run_cmd_output: - logger.info(f"Container ({self.container_name}) is running") + logger.debug(f"Container ({self.container_name}) is running") return True else: - logger.info(f"Container ({self.container_name}) is not running") + logger.debug(f"Container ({self.container_name}) is not running") return False except subprocess.CalledProcessError as ex: logger.error(ex) @@ -114,10 +114,10 @@ def verify_container_is_stopped(self): "check if container is stopped", f"{self.docker_path} ps -a", return_process_output=True ) if container_stop_cmd_output is not None and self.container_name in container_stop_cmd_output: - logger.info(f"Container ({self.container_name}) is in stop state") + logger.debug(f"Container ({self.container_name}) is in stop state") return True else: - logger.info(f"Container ({self.container_name}) is not in stop state") + logger.debug(f"Container ({self.container_name}) is not in stop state") return False def try_restarting_container(self): @@ -137,14 +137,14 @@ def verify_image_available_locally(self): return_process_output=True ) if container_images_cmd_ouptut is not None and re.search(container_image + r'\s+' + container_tag, container_images_cmd_ouptut): - logger.info(f"Container ({container_image}) image is available locally") + logger.debug(f"Container ({container_image}) image is available locally") return True else: - logger.info(f"Container ({container_image}) image is not available locally") + logger.debug(f"Container ({container_image}) image is not available locally") return False def run_container(self): - logger.info(f"Trying to run container {self.container_name}") + logger.debug(f"Trying to run container {self.container_name}") try: self._run_quiet_command( "Try running container", @@ -166,7 +166,7 @@ def stop_running_container(self): ) def download_container_image(self): - logger.info(f"Pulling container ({container_image}) image") + logger.debug(f"Pulling container ({container_image}) image") try: self._run_quiet_command("pull container image", f"{self.docker_path} pull {container_image}:{container_tag}") except subprocess.CalledProcessError as ex: From f345707ca08ff02005eb8bcf212b5fe6713afd76 Mon Sep 17 00:00:00 2001 From: tcezard Date: Thu, 22 Aug 2024 13:50:01 +0100 Subject: [PATCH 2/8] Remove requirement for VCF file type since it has been removed from the metadata --- eva_sub_cli/submission_ws.py | 2 +- eva_sub_cli/validators/validator.py | 17 +++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/eva_sub_cli/submission_ws.py b/eva_sub_cli/submission_ws.py index 92b6f0b..5512a51 100644 --- a/eva_sub_cli/submission_ws.py +++ b/eva_sub_cli/submission_ws.py @@ -44,7 +44,7 @@ def _submission_status_url(self, submission_id): def mark_submission_uploaded(self, submission_id, metadata_json): response = requests.put(self._submission_uploaded_url(submission_id), headers={'Accept': 'application/json', 'Authorization': 'Bearer ' + self.auth.token}, - data=metadata_json) + json=metadata_json) response.raise_for_status() return response.json() diff --git a/eva_sub_cli/validators/validator.py b/eva_sub_cli/validators/validator.py index 5ffa833..c0c9681 100755 --- a/eva_sub_cli/validators/validator.py +++ b/eva_sub_cli/validators/validator.py @@ -545,6 +545,10 @@ def _collect_file_info_to_metadata(self): file_name_2_md5[os.path.basename(vcf_file)] = md5sum file_path_2_file_size[vcf_file] = file_size file_name_2_file_size[os.path.basename(vcf_file)] = file_size + else: + self.error( + f"Cannot locate file_info.txt at {os.path.join(self.output_dir, 'other_validations', 'file_info.txt')}" + ) if self.metadata_json_post_validation: with open(self.metadata_json_post_validation) as open_file: try: @@ -553,12 +557,11 @@ def _collect_file_info_to_metadata(self): files_from_metadata = json_data.get('files', []) if files_from_metadata: for file_dict in json_data.get('files', []): - if file_dict.get('fileType') == 'vcf': - file_path = self._validation_file_path_for(file_dict.get('fileName')) - file_dict['md5'] = file_path_2_md5.get(file_path) or \ - file_name_2_md5.get(file_dict.get('fileName')) or '' - file_dict['fileSize'] = file_path_2_file_size.get(file_path) or \ - file_name_2_file_size.get(file_dict.get('fileName')) or '' + file_path = self._validation_file_path_for(file_dict.get('fileName')) + file_dict['md5'] = file_path_2_md5.get(file_path) or \ + file_name_2_md5.get(file_dict.get('fileName')) or '' + file_dict['fileSize'] = file_path_2_file_size.get(file_path) or \ + file_name_2_file_size.get(file_dict.get('fileName')) or '' file_rows.append(file_dict) else: self.error('No file found in metadata and multiple analysis alias exist: ' @@ -570,6 +573,8 @@ def _collect_file_info_to_metadata(self): if json_data: with open(self.metadata_json_post_validation, 'w') as open_file: json.dump(json_data, open_file) + else: + self.error(f'Cannot locate the metadata in JSON format in {os.path.join(self.output_dir, "metadata.json")}') def get_vcf_fasta_analysis_mapping(self): vcf_fasta_analysis_mapping = [] From bafae455bbbdeecd46155cc184acab1ab97e5d67 Mon Sep 17 00:00:00 2001 From: tcezard Date: Thu, 22 Aug 2024 14:36:52 +0100 Subject: [PATCH 3/8] Fix tests --- eva_sub_cli/orchestrator.py | 4 ++-- tests/test_orchestrator.py | 13 ++++++++++--- tests/test_submit.py | 6 +++--- tests/test_utils.py | 5 +++++ 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/eva_sub_cli/orchestrator.py b/eva_sub_cli/orchestrator.py index 10f157d..c59079a 100755 --- a/eva_sub_cli/orchestrator.py +++ b/eva_sub_cli/orchestrator.py @@ -109,9 +109,9 @@ def get_project_and_vcf_fasta_mapping_from_metadata_xlsx(metadata_xlsx, mapping_ files_headers[cell.value] = cell.column - 1 for row in files_sheet.iter_rows(min_row=2, values_only=True): - file_name = row[files_headers['File Name']] + file_name = os.path.abspath(row[files_headers['File Name']]) analysis_alias = row[files_headers['Analysis Alias']] - reference_fasta = analysis_alias_dict[analysis_alias] + reference_fasta = os.path.abspath(analysis_alias_dict[analysis_alias]) if not (file_name and os.path.isfile(file_name)): raise FileNotFoundError(f'The variant file {file_name} provided in spreadsheet {metadata_xlsx} does not exist') if not (reference_fasta and os.path.isfile(reference_fasta)): diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py index 1d39590..2af0edd 100644 --- a/tests/test_orchestrator.py +++ b/tests/test_orchestrator.py @@ -13,6 +13,7 @@ from eva_sub_cli.orchestrator import orchestrate_process, VALIDATE, SUBMIT, DOCKER, check_validation_required from eva_sub_cli.submit import SUB_CLI_CONFIG_KEY_SUBMISSION_ID from eva_sub_cli.validators.validator import READY_FOR_SUBMISSION_TO_EVA +from tests.test_utils import touch class TestOrchestrator(unittest.TestCase): @@ -32,9 +33,17 @@ def setUp(self) -> None: shutil.rmtree(self.test_sub_dir) os.makedirs(self.test_sub_dir) shutil.copy(os.path.join(self.resource_dir, 'EVA_Submission_test.json'), self.metadata_json) + shutil.copy(os.path.join(self.resource_dir, 'EVA_Submission_test.xlsx'), self.metadata_xlsx) + for file_name in ['example1.vcf.gz', 'example2.vcf', 'example3.vcf', 'GCA_000001405.27_fasta.fa']: + touch(os.path.join(self.test_sub_dir, file_name)) + self.curr_wd = os.curdir + os.chdir(self.test_sub_dir) + def tearDown(self) -> None: - shutil.rmtree(self.test_sub_dir) + os.chdir(self.curr_wd) + if os.path.exists(self.test_sub_dir): + shutil.rmtree(self.test_sub_dir) def test_check_validation_required(self): tasks = ['submit'] @@ -213,8 +222,6 @@ def test_orchestrate_vcf_files_takes_precedence_over_metadata(self): def test_orchestrate_with_metadata_xlsx(self): - shutil.copy(os.path.join(self.resource_dir, 'EVA_Submission_test.xlsx'), self.metadata_xlsx) - with patch('eva_sub_cli.orchestrator.WritableConfig') as m_config, \ patch('eva_sub_cli.orchestrator.DockerValidator') as m_docker_validator: orchestrate_process(self.test_sub_dir, None, None, None, self.metadata_xlsx, diff --git a/tests/test_submit.py b/tests/test_submit.py index f3b9ac6..21408d1 100644 --- a/tests/test_submit.py +++ b/tests/test_submit.py @@ -58,12 +58,12 @@ def test_submit(self): mock_post.assert_called_once_with( os.path.join(test_submission_ws_client.SUBMISSION_WS_URL, 'submission/initiate'), - headers={'Accept': 'application/hal+json', 'Authorization': 'Bearer a token'}) + headers={'Accept': 'application/json', 'Authorization': 'Bearer a token'}) mock_put.assert_called_once_with( os.path.join(test_submission_ws_client.SUBMISSION_WS_URL, 'submission/mock_submission_id/uploaded'), - headers={'Accept': 'application/hal+json', 'Authorization': 'Bearer a token'}, - data=self.metadata_json) + headers={'Accept': 'application/json', 'Authorization': 'Bearer a token'}, + json=self.metadata_json) print(mock_put.mock_calls) def test_submit_with_config(self): diff --git a/tests/test_utils.py b/tests/test_utils.py index 8ccf613..6cf26d2 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -7,3 +7,8 @@ def create_mapping_file(mapping_file, vcf_files, fasta_files, assembly_reports): writer.writerow(['vcf', 'fasta', 'report']) for vcf_file, fasta_file, assembly_reports in zip(vcf_files, fasta_files, assembly_reports): writer.writerow([vcf_file, fasta_file, assembly_reports]) + + +def touch(file_path, content=''): + with open(file_path, 'w') as open_file: + open_file.write(content) \ No newline at end of file From c964fe48a5c60e41d55e7f5dd5a7700d87dd54bb Mon Sep 17 00:00:00 2001 From: tcezard Date: Thu, 22 Aug 2024 14:37:22 +0100 Subject: [PATCH 4/8] remove print statement --- tests/test_submit.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_submit.py b/tests/test_submit.py index 21408d1..8c697f3 100644 --- a/tests/test_submit.py +++ b/tests/test_submit.py @@ -64,7 +64,6 @@ def test_submit(self): os.path.join(test_submission_ws_client.SUBMISSION_WS_URL, 'submission/mock_submission_id/uploaded'), headers={'Accept': 'application/json', 'Authorization': 'Bearer a token'}, json=self.metadata_json) - print(mock_put.mock_calls) def test_submit_with_config(self): mock_initiate_response = MagicMock() From 1b78f2f9fb8b7191419b7b6b3bca438d642c882a Mon Sep 17 00:00:00 2001 From: tcezard Date: Fri, 23 Aug 2024 12:12:37 +0100 Subject: [PATCH 5/8] Couple of fixes --- eva_sub_cli/orchestrator.py | 2 +- eva_sub_cli/submit.py | 3 ++- eva_sub_cli/validators/docker_validator.py | 2 +- eva_sub_cli/validators/native_validator.py | 4 ++++ tests/test_orchestrator.py | 10 ++++++---- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/eva_sub_cli/orchestrator.py b/eva_sub_cli/orchestrator.py index c59079a..a14c31d 100755 --- a/eva_sub_cli/orchestrator.py +++ b/eva_sub_cli/orchestrator.py @@ -155,7 +155,7 @@ def orchestrate_process(submission_dir, vcf_files, reference_fasta, metadata_jso metadata_file = metadata_json or metadata_xlsx if not os.path.exists(os.path.abspath(metadata_file)): - raise FileNotFoundError(f'The provided metadata file {metadata_file} does not exist') + raise FileNotFoundError(f'The provided metadata file {os.path.abspath(metadata_file)} does not exist') if metadata_json: metadata_json = os.path.abspath(metadata_json) diff --git a/eva_sub_cli/submit.py b/eva_sub_cli/submit.py index 8f71128..62d1a94 100644 --- a/eva_sub_cli/submit.py +++ b/eva_sub_cli/submit.py @@ -64,7 +64,8 @@ def _upload_submission(self): def _upload_file(self, submission_upload_url, input_file): base_name = os.path.basename(input_file) self.debug(f'Transfer {base_name} to EVA FTP') - r = requests.put(os.path.join(submission_upload_url, base_name), data=open(input_file, 'rb')) + with open(input_file, 'rb') as f: + r = requests.put(os.path.join(submission_upload_url, base_name), data=open(input_file, 'rb')) r.raise_for_status() self.debug(f'Upload of {base_name} completed') diff --git a/eva_sub_cli/validators/docker_validator.py b/eva_sub_cli/validators/docker_validator.py index ea4a600..6e2ec0b 100644 --- a/eva_sub_cli/validators/docker_validator.py +++ b/eva_sub_cli/validators/docker_validator.py @@ -12,7 +12,7 @@ logger = logging_config.get_logger(__name__) container_image = 'ebivariation/eva-sub-cli' -container_tag = 'v0.0.1.dev14' +container_tag = 'v0.0.1.dev15' container_validation_dir = '/opt/vcf_validation' container_validation_output_dir = 'vcf_validation_output' diff --git a/eva_sub_cli/validators/native_validator.py b/eva_sub_cli/validators/native_validator.py index f125907..eb95939 100644 --- a/eva_sub_cli/validators/native_validator.py +++ b/eva_sub_cli/validators/native_validator.py @@ -24,11 +24,15 @@ def _validate(self): def run_validator(self): self.verify_executables_installed() + curr_wd = os.getcwd() try: command = self.get_validation_cmd() + os.chdir(self.submission_dir) self._run_quiet_command("Run Validation using Nextflow", command) except subprocess.CalledProcessError as ex: logger.error(ex) + finally: + os.chdir(curr_wd) def get_validation_cmd(self): if self.metadata_xlsx and not self.metadata_json: diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py index 2af0edd..5433ffe 100644 --- a/tests/test_orchestrator.py +++ b/tests/test_orchestrator.py @@ -239,11 +239,13 @@ def test_orchestrate_with_metadata_xlsx(self): ) m_docker_validator().validate_and_report.assert_called_once_with() - def test_metadata_file_does_not_exist_error(self): with self.assertRaises(Exception) as context: - orchestrate_process(self.test_sub_dir, None, None, None, self.metadata_xlsx, - tasks=[VALIDATE], executor=DOCKER) - self.assertRegex(str(context.exception),r"The provided metadata file .*/resources/test_sub_dir/sub_metadata.xlsx does not exist") + orchestrate_process(self.test_sub_dir, None, None, None, 'Non_existing_metadata.xlsx', + tasks=[VALIDATE], executor=DOCKER) + self.assertRegex( + str(context.exception), + r"The provided metadata file .*/resources/test_sub_dir/Non_existing_metadata.xlsx does not exist" + ) From 06e3b2346b0c6c0315cc9ae0fa86b127752ea2eb Mon Sep 17 00:00:00 2001 From: tcezard Date: Fri, 23 Aug 2024 12:32:22 +0100 Subject: [PATCH 6/8] Fix test --- tests/test_orchestrator.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py index 5433ffe..b11a856 100644 --- a/tests/test_orchestrator.py +++ b/tests/test_orchestrator.py @@ -36,10 +36,9 @@ def setUp(self) -> None: shutil.copy(os.path.join(self.resource_dir, 'EVA_Submission_test.xlsx'), self.metadata_xlsx) for file_name in ['example1.vcf.gz', 'example2.vcf', 'example3.vcf', 'GCA_000001405.27_fasta.fa']: touch(os.path.join(self.test_sub_dir, file_name)) - self.curr_wd = os.curdir + self.curr_wd = os.getcwd() os.chdir(self.test_sub_dir) - def tearDown(self) -> None: os.chdir(self.curr_wd) if os.path.exists(self.test_sub_dir): @@ -159,7 +158,6 @@ def test_orchestrate_with_vcf_files(self): ) m_docker_validator().validate_and_report.assert_called_once_with() - def test_orchestrate_with_metadata_json_without_asm_report(self): with patch('eva_sub_cli.orchestrator.WritableConfig') as m_config, \ patch('eva_sub_cli.orchestrator.DockerValidator') as m_docker_validator: @@ -198,7 +196,6 @@ def test_orchestrate_with_metadata_json_with_asm_report(self): ) m_docker_validator().validate_and_report.assert_called_once_with() - def test_orchestrate_vcf_files_takes_precedence_over_metadata(self): shutil.copy(os.path.join(self.resource_dir, 'EVA_Submission_test_with_asm_report.json'), self.metadata_json) From f76b155ea31aa23e6917bb659b9a0b9d59d4b141 Mon Sep 17 00:00:00 2001 From: Timothee Cezard Date: Fri, 30 Aug 2024 16:50:42 +0100 Subject: [PATCH 7/8] Apply suggestions from code review Co-authored-by: April Shen --- eva_sub_cli/nextflow/validation.nf | 2 +- eva_sub_cli/submit.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/eva_sub_cli/nextflow/validation.nf b/eva_sub_cli/nextflow/validation.nf index eade06f..25ac050 100644 --- a/eva_sub_cli/nextflow/validation.nf +++ b/eva_sub_cli/nextflow/validation.nf @@ -237,7 +237,7 @@ process sample_name_concordance { script: """ - $params.python_scripts.samples_checker --metadata_json $metadata_json --vcf_files $vcf_files --output_yaml sample_checker.yml > sample_checker.log + $params.python_scripts.samples_checker --metadata_json $metadata_json --vcf_files $vcf_files --output_yaml sample_checker.yml > sample_checker.log 2>&1 """ } diff --git a/eva_sub_cli/submit.py b/eva_sub_cli/submit.py index 62d1a94..4b7842c 100644 --- a/eva_sub_cli/submit.py +++ b/eva_sub_cli/submit.py @@ -65,7 +65,7 @@ def _upload_file(self, submission_upload_url, input_file): base_name = os.path.basename(input_file) self.debug(f'Transfer {base_name} to EVA FTP') with open(input_file, 'rb') as f: - r = requests.put(os.path.join(submission_upload_url, base_name), data=open(input_file, 'rb')) + r = requests.put(os.path.join(submission_upload_url, base_name), data=f) r.raise_for_status() self.debug(f'Upload of {base_name} completed') From 4732580b1132ff64d462539a8c9474b36cdc101c Mon Sep 17 00:00:00 2001 From: tcezard Date: Fri, 30 Aug 2024 16:53:07 +0100 Subject: [PATCH 8/8] fix from review --- eva_sub_cli/nextflow/validation.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eva_sub_cli/nextflow/validation.nf b/eva_sub_cli/nextflow/validation.nf index 25ac050..1b28e06 100644 --- a/eva_sub_cli/nextflow/validation.nf +++ b/eva_sub_cli/nextflow/validation.nf @@ -276,6 +276,6 @@ process metadata_semantic_check { script: """ - $params.python_scripts.semantic_checker --metadata_json $metadata_json --output_yaml metadata_semantic_check.yml > semantic_checker.log + $params.python_scripts.semantic_checker --metadata_json $metadata_json --output_yaml metadata_semantic_check.yml > semantic_checker.log 2>&1 """ }