This repository has been archived by the owner on Oct 6, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathvalidate.py
executable file
·85 lines (69 loc) · 1.97 KB
/
validate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python2
'''
Author: Brandon Roberts <[email protected]>
Description:
Sentence Segmentation from unstructured, non-punctuated
text. Relies on a dual model system:
1. For a given window of text, determine the
probability of a sentence boundary lying inside
of it.
a. if no, shift the window forward
b. if yes, send the window to model 2
2. For a given text window, determine where the
sentence boundary lies.
This expands on earlier work:
Statistical Models for Text Segmentation
BEEFERMAN, BERGER, LAFFERTY
School of Computer Science, Carnegie Mellon University
'''
from __future__ import print_function
from keras.models import load_model
import numpy as np
import sys
import os
from models import binary_model, multiclass_model
# window sizes in chars
multiclass = False
# multiclass = True
window_size = 56
window_step = 4
batch_size = 1
lstm_size = 5480
embedding_size = 105
epochs = 1
if __name__ == "__main__":
try:
model_file = sys.argv[1]
text_file = sys.argv[2]
except IndexError:
print('USAGE: ./validate.py [model_file.h5] [text_file.txt]')
sys.exit(1)
else:
print('Using model', model_file, 'and text corpus', text_file)
larger_class, remove_items, N = precompute(
filename=text_file,
multiclass=multiclass,
balance=False,
window_step=window_step,
window_size=window_size
)
data_generator = gen_training_data(
filename=text_file,
multiclass=multiclass,
balance=False,
larger_class=None,
remove_items=0,
N=N,
window_step=window_step,
window_size=window_size,
batch_size=batch_size
)
print('Loading model...')
model = load_model(model_file)
print('Running...')
score, acc = model.evaluate_generator(
batch_data,
num_steps=N/batch_size
)
print('Validate score:', score)
print('Validate accuracy:', acc)