Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
juanmirocks committed Jun 23, 2016
1 parent 1f0b5c7 commit cc24c79
Showing 1 changed file with 38 additions and 3 deletions.
41 changes: 38 additions & 3 deletions nalaf/preprocessing/edges.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import abc
from nalaf.structures.data import Edge
from nltk.stem import PorterStemmer

class EdgeGenerator:
"""
Expand Down Expand Up @@ -37,14 +38,48 @@ def __init__(self, entity1_class, entity2_class, relation_type):
self.relation_type = relation_type

def generate(self, dataset):
from itertools import product
from itertools import product, chain
for part in dataset.parts():
part.edges = []
for ann_1, ann_2 in product(
(ann for ann in part.annotations if ann.class_id == self.entity1_class),
(ann for ann in part.annotations if ann.class_id == self.entity2_class)):
(ann for ann in chain(part.annotations, part.predicted_annotations) if ann.class_id == self.entity1_class),
(ann for ann in chain(part.annotations, part.predicted_annotations) if ann.class_id == self.entity2_class)):
index_1 = part.get_sentence_index_for_annotation(ann_1)
index_2 = part.get_sentence_index_for_annotation(ann_2)
if index_1 == index_2 and index_1 != None:
part.edges.append(
Edge(ann_1, ann_2, self.relation_type,
part.sentences[index_1], index_1, part))


class WordFilterEdgeGenerator(EdgeGenerator):
"""
Simple implementation of generating edges between the two entities
if they are contained in the same sentence.
Implements the abstract class EdgeGenerator.
:type entity1_class: str
:type entity2_class: str
:type relation_type: str
"""
def __init__(self, entity1_class, entity2_class, relation_type, words):
self.entity1_class = entity1_class
self.entity2_class = entity2_class
self.relation_type = relation_type
self.words = words

def generate(self, dataset):
from itertools import product
for part in dataset.parts():
for ann_1, ann_2 in product(
(ann for ann in part.annotations if ann.class_id == self.entity1_class),
(ann for ann in part.annotations if ann.class_id == self.entity2_class)):
index_1 = part.get_sentence_index_for_annotation(ann_1)
index_2 = part.get_sentence_index_for_annotation(ann_2)
if index_1 == index_2 and index_1 != None:
for token in part.sentences[index_1]:
if token.word in self.words:
part.edges.append(
Edge(ann_1, ann_2, self.relation_type,
part.sentences[index_1], index_1, part))

0 comments on commit cc24c79

Please sign in to comment.