-
Notifications
You must be signed in to change notification settings - Fork 0
/
evaluate.py
107 lines (78 loc) · 3.88 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import sys
import argparse
from clstk.utils import fs
from clstk.utils import nlp
from clstk.evaluation import RougeScore
from clstk.evaluation import ExternalRougeScore
from clstk import linBilmes
from clstk import coRank
from clstk import simFusion
def runSummarizer(inDir, outFile, summarizer, args):
args = argparse.Namespace(**vars(args))
args.source_directory = inDir
summary = summarizer(args, silent=True)
with open(outFile, "w") as f:
f.write(summary.getTargetSummary().encode('utf8'))
def summarizeAll(docNames, docsDir, outDir, summarizer, args):
fs.ensureDir(outDir)
total = len(docNames)
for i, inDirName in enumerate(docNames):
inDir = os.path.join(docsDir, inDirName)
outFile = os.path.join(outDir, inDirName)
print "Summarizing:", i + 1, "/", total, "\r",
sys.stdout.flush()
runSummarizer(inDir, outFile, summarizer, args)
print
def getAvailableReferences(refsDir):
return os.walk(refsDir).next()[1]
def getRougeScore(summaryNames, summariesDir, refsDir):
summaryRefsList = []
for summaryName in summaryNames:
summaryPath = os.path.join(summariesDir, summaryName)
summaryRefsDir = os.path.join(refsDir, summaryName)
refPaths = map(lambda f: os.path.join(summaryRefsDir, f),
os.walk(summaryRefsDir).next()[2])
summaryRefsList.append((summaryPath, refPaths))
ExternalRougeScore().rouge(summaryRefsList)
print "-"
RougeScore(stemmer=nlp.getStemmer()).rouge(summaryRefsList)
if __name__ == '__main__':
common_parser = argparse.ArgumentParser(add_help=False)
common_parser.add_argument('source_path',
help='Directory containing all the source '
'files to be summarized. Each set of documents '
'are expected to be in different directories '
'inside this path.')
common_parser.add_argument('models_path',
help='Directory containing all the model '
'summaries. Each set of summaires are expected '
'to be in different directory inside this '
'path, having the same name as the '
'corresponding directory in the source '
'directory.')
common_parser.add_argument('summaries_path',
help='Directory to store the generated '
'summaries. The directory will be created if '
'not already exists.')
common_parser.add_argument('--only-rouge', action='store_true',
help='Do not run summarizer. '
'Only compule ROUGE score for existing '
'summaries in summaries_path')
parser = argparse.ArgumentParser(
description='Evaluate the summarizer',
epilog='Set ROUGE_HOME environment variable for this to work')
subparsers = parser.add_subparsers(title='methods',
description='Summarization method')
linBilmes.setupArgparse(subparsers.add_parser('linBilmes',
parents=[common_parser]))
coRank.setupArgparse(subparsers.add_parser('coRank',
parents=[common_parser]))
simFusion.setupArgparse(subparsers.add_parser('simFusion',
parents=[common_parser]))
args = parser.parse_args()
docNames = getAvailableReferences(args.models_path)
if not args.only_rouge:
summarizeAll(docNames, args.source_path, args.summaries_path,
args.func, args)
getRougeScore(docNames, args.summaries_path, args.models_path)