From 5fa1a1cf701a96ec4d818b59ec16a08f294054f5 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 26 Nov 2021 14:33:17 +0900 Subject: [PATCH] non tc expressions are case sensitive --- grobid_superconductors/linking/linking_module.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/grobid_superconductors/linking/linking_module.py b/grobid_superconductors/linking/linking_module.py index 6c8142e..a351858 100644 --- a/grobid_superconductors/linking/linking_module.py +++ b/grobid_superconductors/linking/linking_module.py @@ -443,16 +443,17 @@ def process_doc(self, doc): tc_expressions = list(filter(lambda w: w.ent_type_ in ['', 'tc'], doc)) - tc_expressions_standard = ["superconductivity"] - + # This is case sensitive non_tc_expressions_before = ["T N", "TN", "t n", "tn", "Curie", "curie", "Neel", "neel", "at T ", "at T =", "at T=", "is suppressed at ", "ΔT c", "ΔTc", "Δ T c", "T =", "T=", "T = ", "T= "] - + # This is case insensitive tc_expressions_before = ["superconducts at", "superconductive at around", "superconducts around", "superconductivity at", "superconductivity around", "exibits superconductivity at", "T c =", "Tc ="] + + # This is case insensitive non_tc_expressions_after = ['higher', 'lower'] marked_as_tc = [] @@ -488,8 +489,8 @@ def process_doc(self, doc): continue for non_tc in non_tc_expressions_before: - if temp.i - len(non_tc.split(" ")) >= 0 and str.lower(doc[ - temp.i - len(non_tc.split(" ")):temp.i].text) == non_tc: + if temp.i - len(non_tc.split(" ")) >= 0 and doc[ + temp.i - len(non_tc.split(" ")):temp.i].text == non_tc: marked_as_non_tc.append(temp) break