-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
25 lines (25 loc) · 862 Bytes
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def preprocess(text):
if type(text)!=float:
text = re.sub("https?://[\w!\?/\+\-_~=;\.,\*&@#\$%\(\)'\[\]]+", ' ', text)
text = text.lower()
text = re.sub('\(.*?\)',' ',text)
text = re.sub('\s',' ',text)
text = re.sub("’","'",text)
text = re.sub(r"[^a-zA-z0-9.,?!/&%$']",' ',text)
text = re.sub(",",' , ',text)
text = re.sub("!",'. ',text)
text = re.sub("\.",'. ',text)
text = re.sub("\s+",' ',text)
text = re.sub(r"[\s]*\.[\.\s]+",". ",text)
text = re.sub("' ","'",text)
text = re.sub(" l "," i ",text)
text = re.sub(" ,",",",text)
if text[-1]==" ":
text=text[:-1]
if text[0]==" ":
text=text[1:]
if text[:2]=="l ":
text="i "+text[2:]
return text
else:
return np.nan