forked from pstage-ocr-team6/ocr-teamcode
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcustom_augment.py
161 lines (137 loc) · 6.23 KB
/
custom_augment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import torchvision.transforms as transforms
import numpy as np
import cv2
from pre_processing import *
from PIL import Image
from exp.nb_SparseImageWarp import sparse_image_warp
import random
import torch
class to_binary(object):
""" Gray scale image to binary image.
"""
def __call__(self, sample):
gray = np.array(sample)
orig_mean = gray.mean()
_max,_min = sliding_window1(gray)
if orig_mean < 127:
reszied_gray, show = remove_brightness(gray)
else:
if (_max - orig_mean) > 40 or (_min - orig_mean) < -40 :
reszied_gray, show = remove_brightness(gray)
else:
reszied_gray,show = global_threshold1(gray)
blurred = cv2.GaussianBlur(reszied_gray, (11, 11), 1)
reszied_gray_th = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY, 15, 2)
masked_gray = np.where(show < 127, reszied_gray_th, 0)
show = masked_gray + show
im = Image.fromarray(show)
return im
class cutout(object):
"""Cutout
Args:
mask_size (int): Size of boxes to cut out of each image.
p (float): Cutout probability. (0~1)
cutout_inside (bool): If true, generate maskes inside image. If false, maskes can be generated outside of image.
max_boxes (int): Number of boxes to cut out of each image.
"""
def __init__(self, mask_size, p, cutout_inside, max_boxes):
self.mask_size = mask_size
self.p = p
self.cutout_inside = cutout_inside
self.max_boxes = max_boxes
def __call__(self,image):
mask_size_half = self.mask_size // 2
offset = 1 if self.mask_size % 2 == 0 else 0
image = np.array(image).copy()
for _ in range(self.max_boxes):
if np.random.random() > self.p:
image = Image.fromarray(image)
return image
h, w = image.shape[:2]
if self.cutout_inside:
cxmin, cxmax = mask_size_half, w + offset - mask_size_half
cymin, cymax = mask_size_half, h + offset - mask_size_half
else:
cxmin, cxmax = 0, w + offset
cymin, cymax = 0, h + offset
cx = np.random.randint(cxmin, cxmax)
cy = np.random.randint(cymin, cymax)
xmin = cx - mask_size_half
ymin = cy - mask_size_half
xmax = xmin + self.mask_size
ymax = ymin + self.mask_size
xmin = max(0, xmin)
ymin = max(0, ymin)
xmax = min(w, xmax)
ymax = min(h, ymax)
image[ymin:ymax, xmin:xmax] = self.mask_size
image = Image.fromarray(image)
return image
class specAugment(object):
"""https://github.com/zcaceres/spec_augment
Args:
row_num_masks (int) : Number of row_maskes to draw image of each image.
col_num_masks (int) : Number of column_maskes to draw image of each image.
replace_with_zero (bool) : If true, masked with zero. Defaults to True.
"""
def __init__(self, row_num_masks, col_num_masks, replace_with_zero=True):
self.row_num_masks = row_num_masks
self.col_num_masks = col_num_masks
self.replace_with_zero = replace_with_zero
def __call__(self, image):
if isinstance(image, torch.Tensor):
pass
else:
tf=transforms.ToTensor()
image=tf(image)
combined = col_mask(row_mask(image, num_masks=2, replace_with_zero=True), num_masks=1, replace_with_zero=True)
combined = transforms.ToPILImage()(combined).convert("L")
return combined
def row_mask(spec, F=10, num_masks=1, replace_with_zero=False):
""" Row masking is applied so that f consecutive mel frequency channels [f0, f0 + f) are masked,
where f is first chosen from a uniform distribution from 0 to the frequency mask parameter F,
and f0 is chosen from 0, ν − f). ν is the number of mel frequency channels.
Args:
spec (image, tensor type) : Image
F (int) : Max length of line. Defaults to 10.
num_masks (int) : Number of maskes to draw image of each image. Defaults to 1.
replace_with_zero (bool) : If true, masked with zero. Defaults to False.
Return:
cloned (image,tensor) : image with line
"""
cloned = spec.clone()
num_mel_channels = cloned.shape[1]
num_masks=random.randrange(0, 2)
for i in range(0, num_masks):
f = random.randrange(1, F)
f_zero = random.randrange(0, num_mel_channels - f)
# avoids randrange error if values are equal and range is empty
if (f_zero == f_zero + f): return cloned
mask_end = random.randrange(f_zero, f_zero + f)
if (replace_with_zero): cloned[0][f_zero:mask_end] = 0
else: cloned[0][f_zero:mask_end] = cloned.mean()
return cloned
def col_mask(spec, T=10, num_masks=1, replace_with_zero=False):
""" Column masking is applied so that t consecutive time steps [t0, t0 + t) are masked,
where t is first chosen from a uniform distribution from 0 to the time mask parameter T, and t0 is chosen from [0, τ − t).
We introduce an upper bound on the time mask so that a time mask cannot be wider than p times the number of time steps.
Args:
spec (image, tensor type) : Image
T (int) : Max length of line. Defaults to 10.
num_masks (int) : Number of maskes to draw image of each image. Defaults to 1.
replace_with_zero (bool) : If true, masked with zero. Defaults to False.
Return:
cloned (image,tensor) : image with line
"""
cloned = spec.clone()
len_spectro = cloned.shape[2]
num_masks=random.randrange(0, 2)
for i in range(1, num_masks):
t = random.randrange(0, T)
t_zero = random.randrange(0, len_spectro - t)
# avoids randrange error if values are equal and range is empty
if (t_zero == t_zero + t): return cloned
mask_end = random.randrange(t_zero, t_zero + t)
if (replace_with_zero): cloned[0][:, t_zero:mask_end] = 0
else: cloned[0][:, t_zero:mask_end] = cloned.mean()
return cloned