Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace printing with logging #51

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions data_processing/pair_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
import os
from tqdm import tqdm

from summ_eval import logger


logger = logger.getChild(__name__)


def parse_story_file(content):
"""
Expand All @@ -17,7 +22,7 @@ def parse_story_file(content):
return content

def annotation_pairing(args):
print("Processing file:", args.data_annotations)
logger.debug("Processing file: %s", args.data_annotations)
with open(args.data_annotations) as fd:
dataset = [json.loads(line) for line in fd]

Expand Down Expand Up @@ -46,7 +51,7 @@ def output_pairing(args):
if not (".jsonl" in filename and "aligned" in filename and os.path.isfile(unpaired_path)):
continue

print("Processing file:", unpaired_path)
logger.info("Processing file: %s", unpaired_path)
with open(unpaired_path) as fd:
dataset = [json.loads(line) for line in fd]

Expand Down
3 changes: 3 additions & 0 deletions evaluation/summ_eval/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import logging

logger = logging.getLogger(__name__)
8 changes: 6 additions & 2 deletions evaluation/summ_eval/bert_score_metric.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import gin
import bert_score

from summ_eval import logger
from summ_eval.metric import Metric


logger = logger.getChild(__name__)


@gin.configurable
class BertScoreMetric(Metric):
def __init__(self, lang='en', model_type='bert-base-uncased', num_layers=8, verbose=False, idf=False,\
Expand Down Expand Up @@ -46,7 +50,7 @@ def evaluate_example(self, summary, reference):
verbose=self.verbose, idf=self.idf, batch_size=self.batch_size,
nthreads=self.nthreads, lang=self.lang, return_hash=True,
rescale_with_baseline=self.rescale_with_baseline)
print(f"hash_code: {hash_code}")
logger.debug(f"hash_code: {hash_code}")
score = {"bert_score_precision": all_preds[0].cpu().item(), "bert_score_recall": all_preds[1].cpu().item(), "bert_score_f1":
all_preds[2].cpu().item()}
return score
Expand All @@ -57,7 +61,7 @@ def evaluate_batch(self, summaries, references, aggregate=True):
verbose=self.verbose, idf=self.idf, batch_size=self.batch_size,
nthreads=self.nthreads, lang=self.lang, return_hash=True,
rescale_with_baseline=self.rescale_with_baseline)
print(f"hash_code: {hash_code}")
logger.debug(f"hash_code: {hash_code}")
if aggregate:
avg_scores = [s.mean(dim=0) for s in all_preds]
p_val = avg_scores[0].cpu().item()
Expand Down
4 changes: 3 additions & 1 deletion evaluation/summ_eval/bleu_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import gin
import sacrebleu
from summ_eval.metric import Metric
import warnings


@gin.configurable
class BleuMetric(Metric):
Expand Down Expand Up @@ -33,7 +35,7 @@ def __init__(self, sent_smooth_method='exp', sent_smooth_value=None, sent_use_ef
self.n_workers = n_workers

def evaluate_example(self, summary, reference):
#print("BLEU is intended as a corpus-level metric. Be careful!")
warnings.warn("BLEU is intended as a corpus-level metric. Be careful!")
if isinstance(reference, str):
reference = [reference]
score = sacrebleu.sentence_bleu(summary, reference, smooth_method=self.sent_smooth_method, \
Expand Down
29 changes: 19 additions & 10 deletions evaluation/summ_eval/calc_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
from nltk.tokenize import RegexpTokenizer
import spacy

from summ_eval import logger


logger = logger.getChild(__name__)


def cli_main():
#parser = argparse.ArgumentParser(description=metrics_description, formatter_class=argparse.RawDescriptionHelpFormatter)
parser = argparse.ArgumentParser(description="predictor")
Expand Down Expand Up @@ -103,7 +109,7 @@ def cli_main():

# =====================================
# READ INPUT
print("Reading the input")
logger.info("Reading the input")
ids = []
articles = []
references = []
Expand Down Expand Up @@ -133,10 +139,13 @@ def cli_main():
except:
bad_lines += 1
except Exception as e:
print("Input did not match required format")
print(e)
logger.error("Input did not match required format")
logger.error(e)
sys.exit()
print(f"This many bad lines encountered during loading: {bad_lines}")
if bad_lines > 0:
logger.warning("This many bad lines encountered during loading: %s", bad_lines)
else:
logger.debug(f"No bad lines encountered during loading")

if args.summ_file is not None:
with open(args.summ_file) as inputf:
Expand All @@ -162,7 +171,7 @@ def cli_main():

# =====================================
# TOKENIZATION
print("Preparing the input")
logger.info("Preparing the input")
references_delimited = None
summaries_delimited = None
if len(references) > 0:
Expand Down Expand Up @@ -216,8 +225,8 @@ def cli_main():
try:
nlp = spacy.load('en_core_web_md')
except OSError:
print('Downloading the spacy en_core_web_md model\n'
"(don't worry, this will only happen once)", file=stderr)
logger.info('Downloading the spacy en_core_web_md model\n'
"(don't worry, this will only happen once)")
from spacy.cli import download
download('en_core_web_md')
nlp = spacy.load('en_core_web_md')
Expand Down Expand Up @@ -256,7 +265,7 @@ def cli_main():
final_output = defaultdict(lambda: defaultdict(int))
#import pdb;pdb.set_trace()
for metric, metric_cls in metrics_dict.items():
print(f"Calculating scores for the {metric} metric.")
logger.info(f"Calculating scores for the {metric} metric.")
try:
if metric == "rouge":
output = metric_cls.evaluate_batch(summaries_delimited, references_delimited, aggregate=args.aggregate)
Expand All @@ -283,8 +292,8 @@ def cli_main():
for cur_id, cur_output in zip(ids, output):
final_output[cur_id].update(cur_output)
except Exception as e:
print(e)
print(f"An error was encountered with the {metric} metric.")
logger.error(f"An error was encountered with the {metric} metric.")
logger.error(e)
# =====================================


Expand Down
24 changes: 18 additions & 6 deletions evaluation/summ_eval/data_stats_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,19 @@
from multiprocessing import Pool
import gin
import spacy

from summ_eval import logger
from summ_eval.data_stats_utils import Fragments
from summ_eval.metric import Metric


logger = logger.getChild(__name__)

try:
_en = spacy.load('en_core_web_sm')
except OSError:
print('Downloading the spacy en_core_web_sm model\n'
"(don't worry, this will only happen once)", file=stderr)
logger.info('Downloading the spacy en_core_web_sm model\n'
"(don't worry, this will only happen once)")
from spacy.cli import download
download('en_core_web_sm')
_en = spacy.load('en_core_web_sm')
Expand Down Expand Up @@ -56,7 +61,11 @@ def evaluate_example(self, summary, input_text):
coverage = fragments.coverage()
density = fragments.density()
compression = fragments.compression()
score_dict = {"coverage": coverage, "density": density, "compression": compression}
score_dict = {
"coverage": coverage,
"density": density,
"compression": compression,
}
tokenized_summary = fragments._norm_summary
tokenized_text = fragments._norm_text
score_dict["summary_length"] = len(tokenized_summary)
Expand All @@ -67,12 +76,15 @@ def evaluate_example(self, summary, input_text):
summ_ngrams_set = set(summ_ngrams)
intersect = summ_ngrams_set.intersection(input_ngrams_set)
try:
score_dict[f"percentage_novel_{i}-gram"] = (len(summ_ngrams_set) \
- len(intersect))/float(len(summ_ngrams_set))
score_dict[f"percentage_novel_{i}-gram"] = (
len(summ_ngrams_set) - len(intersect)
) / float(len(summ_ngrams_set))
ngramCounter = Counter()
ngramCounter.update(summ_ngrams)
repeated = [key for key, val in ngramCounter.items() if val > 1]
score_dict[f"percentage_repeated_{i}-gram_in_summ"] = len(repeated)/float(len(summ_ngrams_set))
score_dict[f"percentage_repeated_{i}-gram_in_summ"] = len(
repeated
) / float(len(summ_ngrams_set))
except ZeroDivisionError:
continue
return score_dict
Expand Down
18 changes: 10 additions & 8 deletions evaluation/summ_eval/meteor_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,18 @@
import os
import re
import subprocess
import sys
import threading
import psutil
import requests

from summ_eval import logger
from summ_eval.metric import Metric

dirname = os.path.dirname(__file__)
logger = logger.getChild(__name__)

if not os.path.exists(os.path.join(dirname, "meteor-1.5.jar")):
print("Downloading the meteor jar")
logger.info("Downloading the meteor jar")
url = 'https://github.com/Maluuba/nlg-eval/blob/master/nlgeval/pycocoevalcap/meteor/meteor-1.5.jar?raw=true'
r = requests.get(url)
with open(os.path.join(dirname, "meteor-1.5.jar"), "wb") as outputf:
Expand Down Expand Up @@ -102,9 +104,9 @@ def evaluate_example(self, summary, reference):
try:
scores.append(float(dec(v.strip())))
except:
sys.stderr.write("Error handling value: {}\n".format(v))
sys.stderr.write("Decoded value: {}\n".format(dec(v.strip())))
sys.stderr.write("eval_line: {}\n".format(eval_line))
logger.error("Error handling value: {}\n".format(v))
logger.debug("Decoded value: {}\n".format(dec(v.strip())))
logger.debug("eval_line: {}\n".format(eval_line))
raise
score = float(dec(self.meteor_p.stdout.readline()).strip())
score_dict = {"meteor" : score}
Expand All @@ -126,9 +128,9 @@ def evaluate_batch(self, summaries, references, aggregate=True):
try:
scores.append(float(dec(v.strip())))
except:
sys.stderr.write("Error handling value: {}\n".format(v))
sys.stderr.write("Decoded value: {}\n".format(dec(v.strip())))
sys.stderr.write("eval_line: {}\n".format(eval_line))
logger.error("Error handling value: {}\n".format(v))
logger.debug("Decoded value: {}\n".format(dec(v.strip())))
logger.debug("eval_line: {}\n".format(eval_line))
score = float(dec(self.meteor_p.stdout.readline()).strip())
if aggregate:
score_dict = {"meteor" : score}
Expand Down
11 changes: 6 additions & 5 deletions evaluation/summ_eval/rouge_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@
from pathlib import Path
import tempfile
import shutil
import logging
import gin
import logging
import subprocess

from summ_eval import logger
from summ_eval.metric import Metric
from summ_eval.test_util import rouge_empty
import subprocess
import logging


logger = logging.getLogger()
logger = logger.getChild(__name__)

ROUGE_HOME = os.environ['ROUGE_HOME'] or Path(__file__).parent / "ROUGE-1.5.5"
if "ROUGE_HOME" not in os.environ:
Expand Down Expand Up @@ -55,7 +56,7 @@ def __init__(self, rouge_dir=ROUGE_HOME, rouge_args=None, verbose=False):
try:
self.r = Rouge155(rouge_dir=rouge_dir, rouge_args=rouge_args, log_level=log_level)
except:
print(f'Please run this command: \n pip install -U git+https://github.com/bheinzerling/pyrouge.git')
logger.error(f'Please run this command: \n pip install -U git+https://github.com/bheinzerling/pyrouge.git')
exit()
self.rouge_args = rouge_args

Expand Down
7 changes: 6 additions & 1 deletion evaluation/summ_eval/rouge_we_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,20 @@
from collections import Counter
import gin
import bz2

from summ_eval import logger
from summ_eval.s3_utils import rouge_n_we, load_embeddings
from summ_eval.metric import Metric


dirname = os.path.dirname(__file__)
logger = logger.getChild(__name__)


if not os.path.exists(os.path.join(dirname, "embeddings")):
os.mkdir(os.path.join(dirname, "embeddings"))
if not os.path.exists(os.path.join(dirname, "embeddings/deps.words")):
print("Downloading the embeddings; this may take a while")
logger.info("Downloading the embeddings; this may take a while")
url = "http://u.cs.biu.ac.il/~yogo/data/syntemb/deps.words.bz2"
r = requests.get(url)
d = bz2.decompress(r.content)
Expand Down
7 changes: 6 additions & 1 deletion evaluation/summ_eval/s3_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,20 @@
from multiprocessing import Pool
import gin
import bz2

from summ_eval import logger
from summ_eval.metric import Metric
from summ_eval.s3_utils import S3, load_embeddings


dirname = os.path.dirname(__file__)
logger = logger.getChild(__name__)


if not os.path.exists(os.path.join(dirname, "embeddings")):
os.mkdir(os.path.join(dirname, "embeddings"))
if not os.path.exists(os.path.join(dirname, "embeddings/deps.words")):
print("Downloading the embeddings; this may take a while")
logger.info("Downloading the embeddings; this may take a while")
url = "http://u.cs.biu.ac.il/~yogo/data/syntemb/deps.words.bz2"
r = requests.get(url)
d = bz2.decompress(r.content)
Expand Down
16 changes: 9 additions & 7 deletions evaluation/summ_eval/sentence_movers_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,17 @@
from nltk.corpus import stopwords
#from allennlp.commands.elmo import ElmoEmbedder

stop_words = set(stopwords.words('english'))
from summ_eval import logger

print("loading spacy")
logger = logger.getChild(__name__)
stop_words = set(stopwords.words('english'))

try:
logger.info("Loading spacy")
nlp = spacy.load('en_core_web_sm')
except OSError:
print('Downloading the spacy en_core_web_sm model\n'
"(don't worry, this will only happen once)", file=stderr)
logger.info('Downloading the spacy en_core_web_sm model\n'
"(don't worry, this will only happen once)")
from spacy.cli import download
download('en_core_web_sm')
nlp = spacy.load('en_core_web_sm')
Expand Down Expand Up @@ -228,7 +230,7 @@ def calc_smd(input_f, output_f="", WORD_REP='elmo', METRIC='sms'):
inF = open(input_f, 'r')
inLines = inF.readlines()
inF.close()
#print("Found", len(inLines), "documents")
logger.debug("Found", len(inLines), "documents")
token_doc_list, text_doc_list = tokenize_texts(inLines, WORD_REP, tokenize=True)
count = 0
results_list = []
Expand All @@ -245,11 +247,11 @@ def calc_smd(input_f, output_f="", WORD_REP='elmo', METRIC='sms'):
try:
dist = calc.nearest_neighbors(str(0), k=1, early_stop=1)[0][1] # how far is hyp from ref?
except Exception as e:
print(e)
logger.error(e)
sim = math.exp(-dist) # switch to similarity
results_list.append(sim)
if doc_id == int((len(token_doc_list) / 10.) * count):
print(str(count * 10) + "% done with calculations")
logger.info(str(count * 10) + "% done with calculations")
count += 1
if output_f != "":
print_score(inLines, output_f, results_list)
Expand Down
Loading