Skip to content

Commit

Permalink
Merge pull request #11 from sign-language-processing/pose_to_signwrit…
Browse files Browse the repository at this point in the history
…ing_static_pretraining

online running update bash files
  • Loading branch information
AmitMY authored Jun 22, 2024
2 parents 2c18ffa + ffc2548 commit 97d885c
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 10 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ pose_to_signwriting = [
# Uploads results to Google Sheets
"google-auth-oauthlib",
"google-api-python-client",
"ruamel.yaml",
]
pose_to_vq_to_signwriting = [
"sign-vq @ git+https://github.com/sign-language-processing/sign-vq.git" # Used for getting codes from poses
Expand Down
5 changes: 3 additions & 2 deletions signwriting_transcription/pose_to_signwriting/bin.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,9 @@ def preprocessing_signs(preprocessed_pose: Pose, sign_annotations: list, strateg
else: # tight strategy - add padding(PADDING_PACTOR) to the tight segment
# add padding to the segment by the distance between the segments
np_pose, frame_rate = pose_to_matrix(preprocessed_pose)
pose_ndarray_to_matrix(np_pose, sign_start - (sign_start - start_point) * PADDING_PACTOR, frame_rate,
sign_end + (end_point - sign_end) * PADDING_PACTOR).filled(fill_value=0)
np_pose = (pose_ndarray_to_matrix(np_pose, sign_start - (sign_start - start_point) * PADDING_PACTOR,
frame_rate, sign_end + (end_point - sign_end) * PADDING_PACTOR)
.filled(fill_value=0))
start_point = sign_end
pose_path = temp_path / f'{index}.npy'
np.save(pose_path, np_pose)
Expand Down
18 changes: 10 additions & 8 deletions signwriting_transcription/pose_to_signwriting/data/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pathlib import Path


def create_config(data_path="/output/poses", experiment_dir='/model/poses'):
def create_config(data_path="/output/poses", experiment_dir='/model/poses', test_eval_matrices='False'):
data_path = Path(data_path)
experiment_dir = Path(experiment_dir)

Expand Down Expand Up @@ -51,7 +51,7 @@ def create_config(data_path="/output/poses", experiment_dir='/model/poses'):
pretokenize: "none"
testing:
eval_all_metrics: False
eval_all_metrics: {test_eval_matrices}
n_best: 1
beam_size: 5
beam_alpha: 1.0
Expand All @@ -70,17 +70,17 @@ def create_config(data_path="/output/poses", experiment_dir='/model/poses'):
adam_betas: [0.9, 0.98]
scheduling: "plateau"
patience: 10
learning_rate: 0.0002
learning_rate_min: 0.00000001
learning_rate: 0.00015
learning_rate_min: 0.000000015
weight_decay: 0.0
label_smoothing: 0.1
loss: "crossentropy-ctc" # use CrossEntropyLoss + CTCLoss
ctc_weight: 0.3 # ctc weight in interpolation
batch_size: 4 # much bigger than text! your "tokens" are "frames" now.
batch_type: "sentence"
batch_multiplier: 1
# early_stopping_metric: # by default, early stopping uses "fsw_eval" metric
epochs: 15 # Decrease for when playing around and checking of working.
early_stopping_metric: chrf # by default, early stopping uses "fsw_eval" metric
epochs: 100 # Decrease for when playing around and checking of working.
validation_freq: 1000 # Set to at least once per epoch.
logging_freq: 100
model_dir: "{experiment_dir}"
Expand Down Expand Up @@ -128,7 +128,8 @@ def create_config(data_path="/output/poses", experiment_dir='/model/poses'):
dropout: 0.1
layer_norm: "pre"
""".format(data_dir=data_path.as_posix(),
experiment_dir=experiment_dir.as_posix())
experiment_dir=experiment_dir.as_posix(),
test_eval_matrices=test_eval_matrices)

(data_path / 'config.yaml').write_text(config)

Expand Down Expand Up @@ -267,8 +268,9 @@ def main():
parser = argparse.ArgumentParser()
parser.add_argument("--data-path", "-d", required=True, type=str)
parser.add_argument("--experiment-dir", "-e", required=True, type=str)
parser.add_argument("--test-eval-matrices", required=False, default='False')
args = parser.parse_args()
create_config(args.data_path, args.experiment_dir)
create_config(args.data_path, args.experiment_dir, args.test_eval_matrices)


if __name__ == '__main__':
Expand Down
47 changes: 47 additions & 0 deletions signwriting_transcription/pose_to_signwriting/pipeline.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash

# Clone the repository
git clone https://github.com/sign-language-processing/signwriting-transcription.git
cd signwriting-transcription

# Install the required packages
pip install .[dev,pose_to_signwriting]

# Download and unzip the transcription data set
wget -O transcription.zip "https://firebasestorage.googleapis.com/v0/b/sign-language-datasets/o/poses%2Fholistic%2Ftranscription.zip?alt=media"
unzip transcription.zip -d transcription_data_set

# Run preprocessing script
python signwriting_transcription/pose_to_signwriting/data/preprocessing.py --src-dir transcription_data_set --trg-dir normalized_data_set --normalization True

# Prepare segmentation data set
mkdir -p segment_data_set
cp data/data_segmentation.csv segment_data_set/target.csv
cp data/data.csv normalized_data_set/target.csv

# Run prepare_poses script
python signwriting_transcription/pose_to_signwriting/data/prepare_poses.py \
--dataset-root normalized_data_set \
--data-root vectorized_data_set \
--dataset-name poses \
--tokenizer-type pose-vpf \
--data-segment segment_data_set

# Run config script
python signwriting_transcription/pose_to_signwriting/data/config.py --data-path vectorized_data_set/poses --experiment-dir experiment

# Prepare experiment directory
mkdir -p experiment
cp vectorized_data_set/poses/config.yaml experiment/config.yaml

# Run training script
python signwriting_transcription/pose_to_signwriting/joeynmt_pose/training.py vectorized_data_set/poses/config.yaml

# Download token.json
wget 'https://drive.google.com/uc?export=download&id=1EwgVIAxa_VcPWMtaFXru19ZBqc8NPq8K' -O signwriting_transcription/pose_to_signwriting/joeynmt_pose/token.json

# Modify the config.yaml file to set eval_all_metrics to True
python signwriting_transcription/pose_to_signwriting/data/config.py --data-path experiment --experiment-dir experiment --test-eval-matrices True

# Run prediction script
python signwriting_transcription/pose_to_signwriting/joeynmt_pose/prediction.py experiment/config.yaml test none
8 changes: 8 additions & 0 deletions signwriting_transcription/pose_to_signwriting/start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

# File to store the output
OUTPUT_FILE="output.log"

# Run the run_bash.sh script, capture both stdout and stderr
# Display the output on the screen and write to the file
./pipeline.sh | tee -a $OUTPUT_FILE

0 comments on commit 97d885c

Please sign in to comment.