-
Notifications
You must be signed in to change notification settings - Fork 0
/
action.py
37 lines (35 loc) · 1.26 KB
/
action.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import regexp
class Actions:
# fills people data with actions that they did according to article text
def find(self, tagged_words, tagged_sentences, people):
for i, (fullname, data) in enumerate(people.items()):
names = set()
for short in data['shortnames']:
names.update(short.lower().split(" "))
names.add(short.lower())
names.update(fullname.lower().split(" "))
names.add(fullname.lower())
data['actions'] = set() # empty action list
for index, sentence in enumerate(tagged_sentences):
found = False
verb = False
act = []
for word, tag in sentence:
if word.lower() in names:
found = True
#print "found", word, "in sentence", index
continue
elif found:
# reached end of name, so scan now for verbs
if tag.startswith('V'):
#print "adding action:", word
verb = True
act.append(word.lower())
#TODO: check the action after regexp based chunker
elif verb: # all verbs collected & this word found is a non verb
# ok, we're done here, so bail out of sentence
#print "all verbs collected, so bail out!"
break
if len(act) > 0: # dont include empty actions
data['actions'].add(" ".join(act));
return people