Skip to content

Commit

Permalink
Merge pull request #174 from lfoppiano/fix-word-to-numbers
Browse files Browse the repository at this point in the history
Extend support to Word2Number #176 #91
  • Loading branch information
lfoppiano authored Mar 28, 2024
2 parents 769a213 + 81a910e commit 4b18b6d
Show file tree
Hide file tree
Showing 13 changed files with 494 additions and 151 deletions.
12 changes: 6 additions & 6 deletions doc/evaluation-scores.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ Task 8).
The scores in the following table are the micro average. MeasEval was annotated to allow approximated entities, which
are not supported in grobid-quantities.

| Type (Ref) | Matching method | Precision | Recall | F1-score | Support |
|---------------------------|------------------|-----------|--------|----------|---------|
| Quantities (QUANT) | strict | 54.09 | 54.47 | 54.28 | 1137 |
| Quantities (QUANT) | soft | 67.02 | 67.49 | 67.26 | 1137 |
| Quantified substance (ME) | strict | 13.82 | 9.67 | 11.38 | 615 |
| Quantified substance (ME) | soft | 21.63 | 15.13 | 17.80 | 615 |
| Type (Ref) | Matching method | Precision | Recall | F1-score | Support |
|----------------------|------------------|-----------|--------|----------|---------|
| Quantities | strict | 54.62 | 59.17 | 56.80 | 1223 |
| Quantities | soft | 66.72 | 72.28 | 69.39 | 1223 |
| Quantified substance | strict | 12.80 | 10.24 | 11.38 | 703 |
| Quantified substance | soft | 23.19 | 18.54 | 20.61 | 703 |

Note: the ME (Measured Entity) is still experimental in Grobid-quantities.

Expand Down
4 changes: 2 additions & 2 deletions scripts/measeval_e2e_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,8 @@ def parse_measurements_output(result):
if quantity_type is not None or has_unit:
measurement_output_object['type'] = quantity_type

if parsed_value_type is None or parsed_value_type not in ['ALPHABETIC', 'TIME']:
measurements_output.append(measurement_output_object)
# if parsed_value_type is None or parsed_value_type not in ['ALPHABETIC', 'TIME']:
measurements_output.append(measurement_output_object)

return measurements_output

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/grobid/core/data/Quantity.java
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ public String toJson() {
if (!first) {
json.append(", ");
}
json.append("\"normalizedValue\":" + value.toString());
json.append("\"normalizedValue\":" + value.toPlainString());
}

json.append("}");
Expand All @@ -363,7 +363,7 @@ public String toJson() {
public String toString() {
final StringBuilder sb = new StringBuilder("Normalized{");
sb.append("rawValue='").append(rawValue).append('\'');
sb.append(", value=").append(value);
sb.append(", value=").append(value.toPlainString());
sb.append(", unit=").append(unit);
sb.append('}');
return sb.toString();
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/grobid/core/data/Value.java
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ public String toJson() {
} else {
json.append(", ");
}
json.append("\"numeric\" : " + getNumeric());
json.append("\"numeric\" : " + getNumeric().toPlainString());
}

if (getStructure() != null) {
Expand Down
10 changes: 7 additions & 3 deletions src/main/java/org/grobid/core/engines/ValueParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

import static org.apache.commons.lang3.StringUtils.*;
import static org.grobid.core.engines.label.QuantitiesTaggingLabels.*;
import static org.grobid.core.engines.utilities.LabellingUtils.correctLabelling;

/**
* Parser for the value part of a recognized quantity. The goal of the present parser is
Expand Down Expand Up @@ -156,12 +157,13 @@ protected BigDecimal parseValueBlock(ValueBlock block, Locale locale) {
try {
return w2n.normalize(block.getAlphaAsString(), locale);
} catch (NormalizationException e) {
LOGGER.error("Cannot parse " + block.toString() + " with Locale " + locale, e);
LOGGER.error("Skipping parsing of a value. Cannot parse " + block + " with Locale " + locale, e);
}
break;

case TIME:
//we do not parse it for the moment
// We do not parse it for the moment.
// Time/Date should be parsed upstream as it requires some additional context.
break;

}
Expand Down Expand Up @@ -204,7 +206,9 @@ public ValueBlock tagValue(String text) {
} catch (Exception e) {
throw new GrobidException("CRF labeling for quantity parsing failed.", e);
}
parsedValue = resultExtraction(res, layoutTokens);
String fixedRes = correctLabelling(res);

parsedValue = resultExtraction(fixedRes, layoutTokens);
} catch (Exception e) {
throw new GrobidException("An exception occurred while running Grobid.", e);
}
Expand Down
Loading

0 comments on commit 4b18b6d

Please sign in to comment.