-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEvaluator.py
78 lines (63 loc) · 2.24 KB
/
Evaluator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import sys
import json
import os
from nltk.tokenize import sent_tokenize, word_tokenize
import pyrouge
from pyrouge import Rouge155
import logging
from logging import config
'''
@Author: Khyathi Raghavi Chandu
@Date: October 17 2017
This code contains the wrapper around the ROUGE Evaluation to provide ROUGE-2 and Rouge-SU4.
'''
logging.config.fileConfig('logging.ini')
logger = logging.getLogger('bioAsqLogger')
class Evaluator:
def __init__(self):
self.filePath = "../input/BioASQ-trainingDataset5b.json"
self.systemDirectory = "./system/"
self.goldDirectory = "./gold/"
def extractIdealAnswer(self, questionBody):
infile = open(self.filePath, 'r')
data = json.load(infile)
for (i, question) in enumerate(data['questions']):
if question['body'].strip() == questionBody.strip():
return question['ideal_answer'][0]
return None
def fillSummaries(self, questionBody, finalSummary):
goldIdealAnswer = self.extractIdealAnswer(questionBody)
if goldIdealAnswer == None:
return "No gold summary available"
elif finalSummary == "":
return "No system symmary available"
else:
modelFile = open(str(self.goldDirectory)+"bioasq.1.txt",'w+')
for sentence in sent_tokenize(goldIdealAnswer):
modelFile.write(sentence+"\n")
systemFile = open(str(self.systemDirectory)+"bioasq.1.txt",'w+')
for sentence in sent_tokenize(finalSummary):
systemFile.write(sentence+"\n")
return goldIdealAnswer
def parseRougeOutput(self, output):
r2 = None
rsu = None
lines = output.strip().split("\n")
for line in lines:
#print line
if "1 ROUGE-2 Average_F:" in line:
r2 = line.strip().split(":")[1].split("(")[0]
if "ROUGE-S* Average_F:" in line:
rsu = line.strip().split(":")[1].split("(")[0]
return r2, rsu
def calculateRouge(self, questionBody, finalSummary):
goldIdealAnswer = self.fillSummaries(questionBody, finalSummary)
r = Rouge155("/home/khyathi/installations/RELEASE-1.5.5")
r.system_dir = self.systemDirectory #our summaries
r.model_dir = self.goldDirectory #gold summaries
r.system_filename_pattern = "bioasq.(\d+).txt"
r.model_filename_pattern = "bioasq.#ID#.txt"
output = r.convert_and_evaluate()
r2, rsu = self.parseRougeOutput(output)
return goldIdealAnswer, r2, rsu
#print type(output)