Skip to content

Commit

Permalink
basic adaptation model
Browse files Browse the repository at this point in the history
  • Loading branch information
RotemZilberman authored Dec 28, 2023
1 parent 483fbf9 commit 2b1aa92
Show file tree
Hide file tree
Showing 33 changed files with 9,960 additions and 0 deletions.
125 changes: 125 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# Create the config
from pathlib import Path

data_path = Path('output/posses')
experiment_dir = Path('models/posses')

config = """
name: "posses"
joeynmt_version: 2.0.0
data:
task: "S2T" # "S2T" for speech-to-text, "MT" for (text) translation
train: "{data_dir}/train"
dev: "{data_dir}/dev"
test: "{data_dir}/test"
dataset_type: "speech" # SpeechDataset takes tsv as input
src:
lang: "en_ng"
num_freq: 534 # number of frequencies of audio inputs
max_length: 3000 # much longer than text sequence!
min_length: 10 # have to be specified so that 1d-conv works!
level: "frame" # Here we specify we're working on BPEs.
tokenizer_type: "speech"
tokenizer_cfg:
specaugment:
freq_mask_n: 1
freq_mask_f: 5
time_mask_n: 1
time_mask_t: 10
time_mask_p: 1.0
cmvn:
norm_means: True
norm_vars: True
before: True
trg:
lang: "en_ng"
max_length: 100
lowercase: False
level: "bpe" # Here we specify we're working on BPEs.
voc_file: "{data_dir}/spm_bpe40.vocab"
tokenizer_type: "sentencepiece"
tokenizer_cfg:
model_file: "{data_dir}/spm_bpe40.model"
pretokenize: "none"
testing:
n_best: 1
beam_size: 5
beam_alpha: 1.0
batch_size: 4
batch_type: "sentence"
max_output_length: 100 # Don't generate translations longer than this.
eval_metrics: ["wer"] # Use "wer" for ASR task, "bleu" for ST task
sacrebleu_cfg: # sacrebleu options
tokenize: "intl" # `tokenize` option in sacrebleu.corpus_bleu() function (options include: "none" (use for already tokenized test data), "13a" (default minimal tokenizer), "intl" which mostly does punctuation and unicode, etc)
training:
#load_model: "{experiment_dir}/1.ckpt" # if uncommented, load a pre-trained model from this checkpoint
random_seed: 42
optimizer: "adam"
normalization: "tokens"
adam_betas: [0.9, 0.98]
scheduling: "plateau"
patience: 5
learning_rate: 0.0002
learning_rate_min: 0.00000001
weight_decay: 0.0
label_smoothing: 0.1
loss: "crossentropy-ctc" # use CrossEntropyLoss + CTCLoss
ctc_weight: 0.3 # ctc weight in interpolation
batch_size: 4 # much bigger than text! your "tokens" are "frames" now.
batch_type: "sentence"
batch_multiplier: 1
early_stopping_metric: "wer"
epochs: 10 # Decrease for when playing around and checking of working.
validation_freq: 1000 # Set to at least once per epoch.
logging_freq: 100
model_dir: "{experiment_dir}"
overwrite: True
shuffle: True
use_cuda: True
print_valid_sents: [0, 1, 2, 3]
keep_best_ckpts: 2
model:
initializer: "xavier_uniform"
bias_initializer: "zeros"
init_gain: 1.0
embed_initializer: "xavier_uniform"
embed_init_gain: 1.0
tied_embeddings: False # DIsable embeddings sharing between enc(audio) and dec(text)
tied_softmax: False
encoder:
type: "transformer"
num_layers: 12 # Common to use doubly bigger encoder than decoder in S2T.
num_heads: 4
embeddings:
embedding_dim: 534 # Must be same as the frequency of the filterbank features!
# typically ff_size = 4 x hidden_size
hidden_size: 256
ff_size: 1024
dropout: 0.1
layer_norm: "pre"
# new for S2T:
subsample: True # enable 1d conv module
conv_kernel_sizes: [5, 5] # convolution kernel sizes (window width)
conv_channels: 512 # convolution channels
in_channels: 534 # Must be same as the embedding_dim
decoder:
type: "transformer"
num_layers: 6
num_heads: 4
embeddings:
embedding_dim: 256
scale: True
dropout: 0.0
# typically ff_size = 4 x hidden_size
hidden_size: 256
ff_size: 1024
dropout: 0.1
layer_norm: "pre"
""".format(data_dir=data_path.as_posix(),
experiment_dir=experiment_dir.as_posix())

(data_path / 'config.yaml').write_text(config)
32 changes: 32 additions & 0 deletions data_preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import argparse
from pathlib import Path

from pose_format import Pose
from pose_format.utils.generic import pose_normalization_info, correct_wrists, reduce_holistic


def preprocess(srcDir, trgDir):
srcDir = Path(srcDir)
trgDir = Path(trgDir)
trgDir.mkdir(parents=True, exist_ok=True)
for path in srcDir.iterdir():
if path.is_file() and path.suffix == ".pose":
with open(srcDir / path.name, 'rb') as pose_file:
pose = Pose.read(pose_file.read())
pose = reduce_holistic(pose)
correct_wrists(pose)
pose = pose.normalize(pose_normalization_info(pose.header))
with open(trgDir / path.name, 'w+b') as pose_file:
pose.write(pose_file)


def main():
parser = argparse.ArgumentParser()
parser.add_argument("--srcDir", required=True, type=str)
parser.add_argument("--trgDir", required=True, type=str)
args = parser.parse_args()
preprocess(args.srcDir, args.trgDir)


if __name__ == "__main__":
main()
47 changes: 47 additions & 0 deletions datasets_pose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import numpy as np
from pose_format import Pose
import pandas as pd
from swu_representation import swu2data

FrameRate = 29.97003


def ms2frame(ms) -> int:
return int(ms / 1000 * FrameRate)


def pose_to_matrix(file_path, start_ms, end_ms):
with open(file_path, "rb") as f:
pose = Pose.read(f.read())
pose = pose.body.data
pose = pose.reshape(pose.shape[0], pose.shape[2] * pose.shape[3])
pose = pose[ms2frame(start_ms):ms2frame(end_ms)]
return pose


def load_dataset(folder_name):

target = pd.read_csv(f'{folder_name}/target.csv')
dataset = []
for line in target.values:
pose = pose_to_matrix(f'{folder_name}/{line[0]}', line[2], line[3])
pose = pose.filled(fill_value=0)
utt_id = line[0].split('.')[0]
utt_id = f'{utt_id}({line[2]})'
dataset.append((utt_id, pose, swu2data(line[4])))
return dataset


def extract_to_fbank(pose_data, output_path, overwrite: bool = False):
if output_path is not None and output_path.is_file() and not overwrite:
return np.load(output_path.as_posix())
if output_path is not None:
np.save(output_path.as_posix(), pose_data)
assert output_path.is_file(), output_path
return pose_data


if __name__ == "__main__":
dataSet = load_dataset("Dataset")

print(dataSet)
Empty file added joeynmt/__init__.py
Empty file.
64 changes: 64 additions & 0 deletions joeynmt/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import argparse

from joeynmt.prediction import test, translate
from joeynmt.training import train


def main():
ap = argparse.ArgumentParser("Joey NMT")

ap.add_argument(
"mode",
choices=["train", "test", "translate"],
help="train a model or test or translate",
)

ap.add_argument("config_path", type=str, help="path to YAML config file")

ap.add_argument("-c", "--ckpt", type=str, help="checkpoint for prediction")

ap.add_argument("-o",
"--output_path",
type=str,
help="path for saving translation output")

ap.add_argument(
"-a",
"--save_attention",
action="store_true",
help="save attention visualizations",
)

ap.add_argument("-s", "--save_scores", action="store_true", help="save scores")

ap.add_argument(
"-t",
"--skip_test",
action="store_true",
help="Skip test after training",
)

args = ap.parse_args()

if args.mode == "train":
train(cfg_file=args.config_path, skip_test=args.skip_test)
elif args.mode == "test":
test(
cfg_file=args.config_path,
ckpt=args.ckpt,
output_path=args.output_path,
save_attention=args.save_attention,
save_scores=args.save_scores,
)
elif args.mode == "translate":
translate(
cfg_file=args.config_path,
ckpt=args.ckpt,
output_path=args.output_path,
)
else:
raise ValueError("Unknown mode")


if __name__ == "__main__":
main()
Loading

0 comments on commit 2b1aa92

Please sign in to comment.