From 88fb3f4e9c47a8ffc21eed447cf955c9c9ed2cfd Mon Sep 17 00:00:00 2001 From: RotemZilberman Date: Sat, 22 Jun 2024 08:40:45 +0300 Subject: [PATCH 1/6] online running update bash files --- .../pose_to_signwriting/Online_bash_file.sh | 71 +++++++++++++++++++ .../pose_to_signwriting/bin.py | 2 +- .../pose_to_signwriting/data/config.py | 8 +-- .../pose_to_signwriting/start.sh | 8 +++ 4 files changed, 84 insertions(+), 5 deletions(-) create mode 100644 signwriting_transcription/pose_to_signwriting/Online_bash_file.sh create mode 100644 signwriting_transcription/pose_to_signwriting/start.sh diff --git a/signwriting_transcription/pose_to_signwriting/Online_bash_file.sh b/signwriting_transcription/pose_to_signwriting/Online_bash_file.sh new file mode 100644 index 0000000..b203919 --- /dev/null +++ b/signwriting_transcription/pose_to_signwriting/Online_bash_file.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +# Clone the repository +git clone https://github.com/sign-language-processing/signwriting-transcription.git +cd signwriting-transcription + +# Install the required packages +pip install .[dev,pose_to_signwriting] +pip install ruamel.yaml + +# Download and unzip the sign-vq.zip file +wget 'https://drive.usercontent.google.com/download?id=1V_Af2oqY28QgkE1e8jZzEvuxf6N3bLkB&export=download&authuser=0&confirm=t&uuid=c134a3b0-59c6-4279-b3d9-e918523fd913&at=APZUnTXILGGtZIF-UVbFYXHddcFX%3A1718897741079' -O sign-vq.zip +unzip sign-vq.zip -d sign-vq +cd sign-vq +pip install . +cd .. + +# Download and unzip the transcription data set +wget -O transcription.zip "https://firebasestorage.googleapis.com/v0/b/sign-language-datasets/o/poses%2Fholistic%2Ftranscription.zip?alt=media" +unzip transcription.zip -d transcription_data_set + +# Run preprocessing script +python signwriting_transcription/pose_to_signwriting/data/preprocessing.py --src-dir transcription_data_set --trg-dir normalized_data_set --normalization True + +# Prepare segmentation data set +mkdir -p segment_data_set +cp data/data_segmentation.csv segment_data_set/target.csv +cp data/data.csv normalized_data_set/target.csv + +# Run prepare_poses script +python signwriting_transcription/pose_to_signwriting/data/prepare_poses.py \ + --dataset-root normalized_data_set \ + --data-root vectorized_data_set \ + --dataset-name poses \ + --tokenizer-type pose-vpf \ + --data-segment segment_data_set + +# Run config script +python signwriting_transcription/pose_to_signwriting/data/config.py --data-path vectorized_data_set/poses --experiment-dir experiment + +# Prepare experiment directory +mkdir -p experiment +cp vectorized_data_set/poses/config.yaml experiment/config.yaml + +# Run training script +python signwriting_transcription/pose_to_signwriting/joeynmt_pose/training.py vectorized_data_set/poses/config.yaml + +# Download token.json +wget 'https://drive.google.com/uc?export=download&id=1EwgVIAxa_VcPWMtaFXru19ZBqc8NPq8K' -O signwriting_transcription/pose_to_signwriting/joeynmt_pose/token.json + +# Modify the config.yaml file to set eval_all_metrics to True +python - < Date: Sat, 22 Jun 2024 08:47:27 +0300 Subject: [PATCH 2/6] fix pylint --- signwriting_transcription/pose_to_signwriting/bin.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/signwriting_transcription/pose_to_signwriting/bin.py b/signwriting_transcription/pose_to_signwriting/bin.py index 8f758c1..58dc7f4 100644 --- a/signwriting_transcription/pose_to_signwriting/bin.py +++ b/signwriting_transcription/pose_to_signwriting/bin.py @@ -73,8 +73,9 @@ def preprocessing_signs(preprocessed_pose: Pose, sign_annotations: list, strateg else: # tight strategy - add padding(PADDING_PACTOR) to the tight segment # add padding to the segment by the distance between the segments np_pose, frame_rate = pose_to_matrix(preprocessed_pose) - np_pose = pose_ndarray_to_matrix(np_pose, sign_start - (sign_start - start_point) * PADDING_PACTOR, frame_rate, - sign_end + (end_point - sign_end) * PADDING_PACTOR).filled(fill_value=0) + np_pose = (pose_ndarray_to_matrix(np_pose, sign_start - (sign_start - start_point) * PADDING_PACTOR, + frame_rate, sign_end + (end_point - sign_end) * PADDING_PACTOR) + .filled(fill_value=0)) start_point = sign_end pose_path = temp_path / f'{index}.npy' np.save(pose_path, np_pose) From 63296d9ac8a4faeda55794a2fb2d2e4eb9131c66 Mon Sep 17 00:00:00 2001 From: RotemZilberman Date: Sat, 22 Jun 2024 11:37:50 +0300 Subject: [PATCH 3/6] simple modification for the code --- pyproject.toml | 1 + .../pose_to_signwriting/data/config.py | 10 ++++--- .../joeynmt_pose/prediction.py | 6 ++--- .../{Online_bash_file.sh => pipeline.sh} | 26 +------------------ .../pose_to_signwriting/start.sh | 2 +- 5 files changed, 12 insertions(+), 33 deletions(-) rename signwriting_transcription/pose_to_signwriting/{Online_bash_file.sh => pipeline.sh} (71%) diff --git a/pyproject.toml b/pyproject.toml index e942424..997f518 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ pose_to_signwriting = [ # Uploads results to Google Sheets "google-auth-oauthlib", "google-api-python-client", + "ruamel.yaml" ] pose_to_vq_to_signwriting = [ "sign-vq @ git+https://github.com/sign-language-processing/sign-vq.git" # Used for getting codes from poses diff --git a/signwriting_transcription/pose_to_signwriting/data/config.py b/signwriting_transcription/pose_to_signwriting/data/config.py index 9f37742..6a56534 100644 --- a/signwriting_transcription/pose_to_signwriting/data/config.py +++ b/signwriting_transcription/pose_to_signwriting/data/config.py @@ -3,7 +3,7 @@ from pathlib import Path -def create_config(data_path="/output/poses", experiment_dir='/model/poses'): +def create_config(data_path="/output/poses", experiment_dir='/model/poses', test_eval_matrices='False'): data_path = Path(data_path) experiment_dir = Path(experiment_dir) @@ -51,7 +51,7 @@ def create_config(data_path="/output/poses", experiment_dir='/model/poses'): pretokenize: "none" testing: - eval_all_metrics: False + eval_all_metrics: {test_eval_matrices} n_best: 1 beam_size: 5 beam_alpha: 1.0 @@ -128,7 +128,8 @@ def create_config(data_path="/output/poses", experiment_dir='/model/poses'): dropout: 0.1 layer_norm: "pre" """.format(data_dir=data_path.as_posix(), - experiment_dir=experiment_dir.as_posix()) + experiment_dir=experiment_dir.as_posix(), + test_eval_matrices=test_eval_matrices) (data_path / 'config.yaml').write_text(config) @@ -267,8 +268,9 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("--data-path", "-d", required=True, type=str) parser.add_argument("--experiment-dir", "-e", required=True, type=str) + parser.add_argument("--test-eval-matrices", required=False, default='False') args = parser.parse_args() - create_config(args.data_path, args.experiment_dir) + create_config(args.data_path, args.experiment_dir, args.test_eval_matrices) if __name__ == '__main__': diff --git a/signwriting_transcription/pose_to_signwriting/joeynmt_pose/prediction.py b/signwriting_transcription/pose_to_signwriting/joeynmt_pose/prediction.py index 9eb8190..272adcb 100644 --- a/signwriting_transcription/pose_to_signwriting/joeynmt_pose/prediction.py +++ b/signwriting_transcription/pose_to_signwriting/joeynmt_pose/prediction.py @@ -537,9 +537,9 @@ def _translate_data(test_data, cfg): trg_cfg = cfg["data"]["trg"] task = cfg["data"].get("task", "MT").upper() - pkg_version = make_logger(model_dir, mode="translate") # version string returned - if "joeynmt_version" in cfg: - check_version(pkg_version, cfg["joeynmt_version"]) + # pkg_version = make_logger(model_dir, mode="translate") # version string returned + # if "joeynmt_version" in cfg: + # check_version(pkg_version, cfg["joeynmt_version"]) # when checkpoint is not specified, take latest (best) from model dir load_model = load_model if ckpt is None else Path(ckpt) diff --git a/signwriting_transcription/pose_to_signwriting/Online_bash_file.sh b/signwriting_transcription/pose_to_signwriting/pipeline.sh similarity index 71% rename from signwriting_transcription/pose_to_signwriting/Online_bash_file.sh rename to signwriting_transcription/pose_to_signwriting/pipeline.sh index b203919..32d28ad 100644 --- a/signwriting_transcription/pose_to_signwriting/Online_bash_file.sh +++ b/signwriting_transcription/pose_to_signwriting/pipeline.sh @@ -6,14 +6,6 @@ cd signwriting-transcription # Install the required packages pip install .[dev,pose_to_signwriting] -pip install ruamel.yaml - -# Download and unzip the sign-vq.zip file -wget 'https://drive.usercontent.google.com/download?id=1V_Af2oqY28QgkE1e8jZzEvuxf6N3bLkB&export=download&authuser=0&confirm=t&uuid=c134a3b0-59c6-4279-b3d9-e918523fd913&at=APZUnTXILGGtZIF-UVbFYXHddcFX%3A1718897741079' -O sign-vq.zip -unzip sign-vq.zip -d sign-vq -cd sign-vq -pip install . -cd .. # Download and unzip the transcription data set wget -O transcription.zip "https://firebasestorage.googleapis.com/v0/b/sign-language-datasets/o/poses%2Fholistic%2Ftranscription.zip?alt=media" @@ -49,23 +41,7 @@ python signwriting_transcription/pose_to_signwriting/joeynmt_pose/training.py ve wget 'https://drive.google.com/uc?export=download&id=1EwgVIAxa_VcPWMtaFXru19ZBqc8NPq8K' -O signwriting_transcription/pose_to_signwriting/joeynmt_pose/token.json # Modify the config.yaml file to set eval_all_metrics to True -python - < Date: Sat, 22 Jun 2024 11:42:45 +0300 Subject: [PATCH 4/6] Revert "simple modification for the code" This reverts commit 63296d9ac8a4faeda55794a2fb2d2e4eb9131c66. --- pyproject.toml | 1 - .../{pipeline.sh => Online_bash_file.sh} | 26 ++++++++++++++++++- .../pose_to_signwriting/data/config.py | 10 +++---- .../joeynmt_pose/prediction.py | 6 ++--- .../pose_to_signwriting/start.sh | 2 +- 5 files changed, 33 insertions(+), 12 deletions(-) rename signwriting_transcription/pose_to_signwriting/{pipeline.sh => Online_bash_file.sh} (71%) diff --git a/pyproject.toml b/pyproject.toml index 997f518..e942424 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,6 @@ pose_to_signwriting = [ # Uploads results to Google Sheets "google-auth-oauthlib", "google-api-python-client", - "ruamel.yaml" ] pose_to_vq_to_signwriting = [ "sign-vq @ git+https://github.com/sign-language-processing/sign-vq.git" # Used for getting codes from poses diff --git a/signwriting_transcription/pose_to_signwriting/pipeline.sh b/signwriting_transcription/pose_to_signwriting/Online_bash_file.sh similarity index 71% rename from signwriting_transcription/pose_to_signwriting/pipeline.sh rename to signwriting_transcription/pose_to_signwriting/Online_bash_file.sh index 32d28ad..b203919 100644 --- a/signwriting_transcription/pose_to_signwriting/pipeline.sh +++ b/signwriting_transcription/pose_to_signwriting/Online_bash_file.sh @@ -6,6 +6,14 @@ cd signwriting-transcription # Install the required packages pip install .[dev,pose_to_signwriting] +pip install ruamel.yaml + +# Download and unzip the sign-vq.zip file +wget 'https://drive.usercontent.google.com/download?id=1V_Af2oqY28QgkE1e8jZzEvuxf6N3bLkB&export=download&authuser=0&confirm=t&uuid=c134a3b0-59c6-4279-b3d9-e918523fd913&at=APZUnTXILGGtZIF-UVbFYXHddcFX%3A1718897741079' -O sign-vq.zip +unzip sign-vq.zip -d sign-vq +cd sign-vq +pip install . +cd .. # Download and unzip the transcription data set wget -O transcription.zip "https://firebasestorage.googleapis.com/v0/b/sign-language-datasets/o/poses%2Fholistic%2Ftranscription.zip?alt=media" @@ -41,7 +49,23 @@ python signwriting_transcription/pose_to_signwriting/joeynmt_pose/training.py ve wget 'https://drive.google.com/uc?export=download&id=1EwgVIAxa_VcPWMtaFXru19ZBqc8NPq8K' -O signwriting_transcription/pose_to_signwriting/joeynmt_pose/token.json # Modify the config.yaml file to set eval_all_metrics to True -python signwriting_transcription/pose_to_signwriting/data/config.py --data-path experiment --experiment-dir experiment --test-eval-matrices True +python - < Date: Sat, 22 Jun 2024 11:48:59 +0300 Subject: [PATCH 5/6] simple modification for the code --- pyproject.toml | 1 + .../pose_to_signwriting/data/config.py | 10 ++++--- .../{Online_bash_file.sh => pipeline.sh} | 26 +------------------ 3 files changed, 8 insertions(+), 29 deletions(-) rename signwriting_transcription/pose_to_signwriting/{Online_bash_file.sh => pipeline.sh} (71%) diff --git a/pyproject.toml b/pyproject.toml index e942424..7c7f748 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ pose_to_signwriting = [ # Uploads results to Google Sheets "google-auth-oauthlib", "google-api-python-client", + "ruamel.yaml", ] pose_to_vq_to_signwriting = [ "sign-vq @ git+https://github.com/sign-language-processing/sign-vq.git" # Used for getting codes from poses diff --git a/signwriting_transcription/pose_to_signwriting/data/config.py b/signwriting_transcription/pose_to_signwriting/data/config.py index 9f37742..6a56534 100644 --- a/signwriting_transcription/pose_to_signwriting/data/config.py +++ b/signwriting_transcription/pose_to_signwriting/data/config.py @@ -3,7 +3,7 @@ from pathlib import Path -def create_config(data_path="/output/poses", experiment_dir='/model/poses'): +def create_config(data_path="/output/poses", experiment_dir='/model/poses', test_eval_matrices='False'): data_path = Path(data_path) experiment_dir = Path(experiment_dir) @@ -51,7 +51,7 @@ def create_config(data_path="/output/poses", experiment_dir='/model/poses'): pretokenize: "none" testing: - eval_all_metrics: False + eval_all_metrics: {test_eval_matrices} n_best: 1 beam_size: 5 beam_alpha: 1.0 @@ -128,7 +128,8 @@ def create_config(data_path="/output/poses", experiment_dir='/model/poses'): dropout: 0.1 layer_norm: "pre" """.format(data_dir=data_path.as_posix(), - experiment_dir=experiment_dir.as_posix()) + experiment_dir=experiment_dir.as_posix(), + test_eval_matrices=test_eval_matrices) (data_path / 'config.yaml').write_text(config) @@ -267,8 +268,9 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("--data-path", "-d", required=True, type=str) parser.add_argument("--experiment-dir", "-e", required=True, type=str) + parser.add_argument("--test-eval-matrices", required=False, default='False') args = parser.parse_args() - create_config(args.data_path, args.experiment_dir) + create_config(args.data_path, args.experiment_dir, args.test_eval_matrices) if __name__ == '__main__': diff --git a/signwriting_transcription/pose_to_signwriting/Online_bash_file.sh b/signwriting_transcription/pose_to_signwriting/pipeline.sh similarity index 71% rename from signwriting_transcription/pose_to_signwriting/Online_bash_file.sh rename to signwriting_transcription/pose_to_signwriting/pipeline.sh index b203919..32d28ad 100644 --- a/signwriting_transcription/pose_to_signwriting/Online_bash_file.sh +++ b/signwriting_transcription/pose_to_signwriting/pipeline.sh @@ -6,14 +6,6 @@ cd signwriting-transcription # Install the required packages pip install .[dev,pose_to_signwriting] -pip install ruamel.yaml - -# Download and unzip the sign-vq.zip file -wget 'https://drive.usercontent.google.com/download?id=1V_Af2oqY28QgkE1e8jZzEvuxf6N3bLkB&export=download&authuser=0&confirm=t&uuid=c134a3b0-59c6-4279-b3d9-e918523fd913&at=APZUnTXILGGtZIF-UVbFYXHddcFX%3A1718897741079' -O sign-vq.zip -unzip sign-vq.zip -d sign-vq -cd sign-vq -pip install . -cd .. # Download and unzip the transcription data set wget -O transcription.zip "https://firebasestorage.googleapis.com/v0/b/sign-language-datasets/o/poses%2Fholistic%2Ftranscription.zip?alt=media" @@ -49,23 +41,7 @@ python signwriting_transcription/pose_to_signwriting/joeynmt_pose/training.py ve wget 'https://drive.google.com/uc?export=download&id=1EwgVIAxa_VcPWMtaFXru19ZBqc8NPq8K' -O signwriting_transcription/pose_to_signwriting/joeynmt_pose/token.json # Modify the config.yaml file to set eval_all_metrics to True -python - < Date: Sat, 22 Jun 2024 11:49:41 +0300 Subject: [PATCH 6/6] update start.sh file --- signwriting_transcription/pose_to_signwriting/start.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signwriting_transcription/pose_to_signwriting/start.sh b/signwriting_transcription/pose_to_signwriting/start.sh index f0e8e2b..f96e0fd 100644 --- a/signwriting_transcription/pose_to_signwriting/start.sh +++ b/signwriting_transcription/pose_to_signwriting/start.sh @@ -5,4 +5,4 @@ OUTPUT_FILE="output.log" # Run the run_bash.sh script, capture both stdout and stderr # Display the output on the screen and write to the file -./run_bash.sh | tee -a $OUTPUT_FILE +./pipeline.sh | tee -a $OUTPUT_FILE