diff --git a/Dockerfile b/Dockerfile index 9f8e10ca1..fcdd093c3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ # First build the app on a maven open jdk 11 container FROM maven:3-eclipse-temurin-11-focal as dev-builder ARG JHOVE_VERSION -ENV JHOVE_VERSION=${JHOVE_VERSION:-1.27.0-SNAPSHOT} +ENV JHOVE_VERSION=${JHOVE_VERSION:-1.32.0-RC1} # Copy the current dev source branch to a local build dir COPY . /build/jhove/ @@ -31,7 +31,7 @@ ARG JAVA_OPTS ENV JAVA_OPTS=$JAVA_OPTS # Specify the veraPDF REST version if you want to (to be used in build automation) ARG JHOVE_VERSION -ENV JHOVE_VERSION=${JHOVE_VERSION:-1.27.0-SNAPSHOT} +ENV JHOVE_VERSION=${JHOVE_VERSION:-1.32.0-RC1} # Copy the JRE from the previous stage ENV JAVA_HOME=/opt/java/openjdk diff --git a/jhove-apps/pom.xml b/jhove-apps/pom.xml index fb5c427dc..bd1098bb7 100644 --- a/jhove-apps/pom.xml +++ b/jhove-apps/pom.xml @@ -5,12 +5,12 @@ org.openpreservation.jhove jhove - 1.31.0-SNAPSHOT + 1.32.0-RC1 jhove-apps jar - 1.31.0-SNAPSHOT + 1.32.0-RC1 JHOVE Applications @@ -60,7 +60,7 @@ org.openpreservation.jhove jhove-core - 1.31.0-SNAPSHOT + 1.32.0-RC1 diff --git a/jhove-bbt/scripts/create-1.32-target.sh b/jhove-bbt/scripts/create-1.32-target.sh new file mode 100755 index 000000000..6857fa33d --- /dev/null +++ b/jhove-bbt/scripts/create-1.32-target.sh @@ -0,0 +1,213 @@ +#!/usr/bin/env bash + +testRoot="test-root" +paramCandidateVersion="" +paramBaselineVersion="" +baselineRoot="${testRoot}/baselines" +candidateRoot="${testRoot}/candidates" +targetRoot="${testRoot}/targets" +# Check the passed params to avoid disapointment +checkParams () { + OPTIND=1 # Reset in case getopts previously used + + while getopts "h?b:c:" opt; do # Grab the options + case "$opt" in + h|\?) + showHelp + exit 0 + ;; + b) paramBaselineVersion=$OPTARG + ;; + c) paramCandidateVersion=$OPTARG + ;; + esac + done + + if [ -z "$paramBaselineVersion" ] || [ -z "$paramCandidateVersion" ] + then + showHelp + exit 0 + fi + + baselineRoot="${baselineRoot}/${paramBaselineVersion}" + candidateRoot="${candidateRoot}/${paramCandidateVersion}" + targetRoot="${targetRoot}/${paramCandidateVersion}" +} + +# Show usage message +showHelp() { + echo "usage: create-target [-b ] [-c ] [-h|?]" + echo "" + echo " baselineVersion : The version number id for the baseline data." + echo " candidateVersion : The version number id for the candidate data." + echo "" + echo " -h|? : This message." +} + +# Execution starts here +checkParams "$@"; +if [[ -d "${targetRoot}" ]]; then + echo " - removing existing baseline at ${targetRoot}." + rm -rf "${targetRoot}" +fi + +echo "TEST BASELINE: Creating baseline" +# Simply copy baseline for now we're not making any changes +echo " - copying ${baselineRoot} baseline to ${targetRoot}" +cp -R "${baselineRoot}" "${targetRoot}" + +# Patch release details of the reporting module in the audit file +find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/outputHandler release="1.11">XML/outputHandler release="1.12">XML/' {} \; +find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/outputHandler release="1.2">JSON/outputHandler release="1.3">JSON/' {} \; +find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/outputHandler release="1.6">TEXT/outputHandler release="1.7">TEXT/' {} \; + +# Update release details for HTML module +find "${targetRoot}" -type f -name "*.html.jhove.xml" -exec sed -i 's/HTML-hul<\/reportingModule>/HTML-hul<\/reportingModule>/' {} \; +find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/HTML-hul<\/module>/HTML-hul<\/module>/' {} \; +find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/1.4.3<\/release>/1.4.4<\/release>/' {} \; +find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/2023-03-16/2024-08-22/' {} \; + +# Update release details for PDF module +find "${targetRoot}" -type f -name "*.pdf.jhove.xml" -exec sed -i 's/PDF-hul<\/reportingModule>/PDF-hul<\/reportingModule>/' {} \; +find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/PDF-hul<\/module>/PDF-hul<\/module>/' {} \; +find "${targetRoot}" -type f -name "audit-PDF-hul.jhove.xml" -exec sed -i 's/1.12.6<\/release>/1.12.7<\/release>/' {} \; +find "${targetRoot}" -type f -name "audit-PDF-hul.jhove.xml" -exec sed -i 's/2024-07-31/2024-08-22/' {} \; + +# Update release details for TIFF module +find "${targetRoot}" -type f -name "*.tiff.jhove.xml" -exec sed -i 's/TIFF-hul<\/reportingModule>/TIFF-hul<\/reportingModule>/' {} \; +find "${targetRoot}" -type f -name "*.tif.jhove.xml" -exec sed -i 's/TIFF-hul<\/reportingModule>/TIFF-hul<\/reportingModule>/' {} \; +find "${targetRoot}" -type f -name "*.g3.jhove.xml" -exec sed -i 's/TIFF-hul<\/reportingModule>/TIFF-hul<\/reportingModule>/' {} \; +find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/TIFF-hul<\/module>/TIFF-hul<\/module>/' {} \; +find "${targetRoot}" -type f -name "audit-TIFF-hul.jhove.xml" -exec sed -i 's/1.9.4<\/release>/1.9.5<\/release>/' {} \; +find "${targetRoot}" -type f -name "audit-TIFF-hul.jhove.xml" -exec sed -i 's/2023-03-16/2024-08-22/' {} \; + +# Update release details for XML module +find "${targetRoot}" -type f -name "*.xml.jhove.xml" -exec sed -i 's/XML-hul<\/reportingModule>/XML-hul<\/reportingModule>/' {} \; +find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/XML-hul<\/module>/XML-hul<\/module>/' {} \; +find "${targetRoot}" -type f -name "audit-XML-hul.jhove.xml" -exec sed -i 's/1.5.4<\/release>/1.5.5<\/release>/' {} \; +find "${targetRoot}" -type f -name "audit-XML-hul.jhove.xml" -exec sed -i 's/2024-03-05/2024-08-22/' {} \; + +# Copy the TIFF Module results changed by https://github.com/openpreserve/jhove/pull/915 +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/AA_Banner.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/AA_Banner.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/AA_Banner.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/strike.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/strike.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/strike.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/testpage-large.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/testpage-large.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/testpage-large.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/testpage-medium.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/testpage-medium.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/testpage-medium.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/oxford.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/oxford.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/oxford.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/jim___gg.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/jim___gg.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/jim___gg.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/bathy1.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/bathy1.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/bathy1.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/jim___cg.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/jim___cg.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/jim___cg.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/quad-tile.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/quad-tile.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/quad-tile.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/compos.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/compos.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/compos.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/pagemaker.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/pagemaker.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/pagemaker.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/jello.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/jello.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/jello.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/little-endian.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/little-endian.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/little-endian.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/cramps-tile.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/cramps-tile.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/cramps-tile.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/jim___ah.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/jim___ah.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/jim___ah.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/g3test.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/g3test.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/g3test.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/6mp_soft.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/6mp_soft.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/6mp_soft.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/ycbcr-cat.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/ycbcr-cat.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/ycbcr-cat.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/quad-lzw.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/quad-lzw.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/quad-lzw.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/jim___dg.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/jim___dg.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/jim___dg.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/fax2d.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/fax2d.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/fax2d.tif.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/peppers.tif.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/TIFF-hul/peppers.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/peppers.tif.jhove.xml" +fi + +# Copy the TIFF fix affected files from the candidate to the target +declare -a tiff_affected=("examples/modules/TIFF-hul/cramps.tif.jhove.xml" + "examples/modules/TIFF-hul/text.tif.jhove.xml" + "examples/modules/TIFF-hul/testpage-small.tif.jhove.xml") +for filename in "${tiff_affected[@]}" +do + if [[ -f "${candidateRoot}/${filename}" ]]; then + cp "${candidateRoot}/${filename}" "${targetRoot}/${filename}" + fi +done + +# Copy the XHTML fix affected files from the candidate to the target +declare -a xhtml_affected=("errors/modules/HTML-hul/xhtml-trans-no-xml-dec.html.jhove.xml" + "errors/modules/HTML-hul/xhtml-strict-no-xml-dec.html.jhove.xml" + "errors/modules/HTML-hul/xhtml-frames-no-xml-dec.html.jhove.xml" + "errors/modules/HTML-hul/xhtml-1-1-no-xml-dec.html.jhove.xml") +for filename in "${xhtml_affected[@]}" +do + if [[ -f "${candidateRoot}/${filename}" ]]; then + cp "${candidateRoot}/${filename}" "${targetRoot}/${filename}" + fi +done + +# Copy the XML fix affected files from the candidate to the target +declare -a xhtml_affected=("errors/modules/HTML-hul/xhtml-trans-xml-dec.html.jhove.xml" + "errors/modules/HTML-hul/xhtml-strict-xml-dec.html.jhove.xml" + "errors/modules/HTML-hul/xhtml-frames-xml-dec.html.jhove.xml" + "errors/modules/HTML-hul/xhtml-1-1-xml-dec.html.jhove.xml" + "examples/modules/XML-hul/valid-external.dtd.jhove.xml" + "examples/modules/XML-hul/external-unparsed-entity.ent.jhove.xml" + "examples/modules/XML-hul/external-parsed-entity.ent.jhove.xml") +for filename in "${xhtml_affected[@]}" +do + if [[ -f "${candidateRoot}/${filename}" ]]; then + cp "${candidateRoot}/${filename}" "${targetRoot}/${filename}" + fi +done + +# Copy all of the AIF and WAV results as these are changed by the AES schema changes +cp -rf "${candidateRoot}/examples/modules/AIFF-hul" "${targetRoot}/examples/modules/" +cp -rf "${candidateRoot}/examples/modules/WAVE-hul" "${targetRoot}/examples/modules/" +cp -rf "${candidateRoot}/errors/modules/WAVE-hul" "${targetRoot}/errors/modules/" + +# Copy the results of the new XML fixes for multiple redirect lookups and to ensure no regression for repeat XML warnings +cp -rf "${candidateRoot}/errors/modules/XML-hul" "${targetRoot}/errors/modules/" + +# Copy the results of the PDF offset message fix +declare -a pdf_offset_affected=("errors/modules/PDF-hul/pdf-hul-5-govdocs-659152.pdf.jhove.xml" + "errors/modules/PDF-hul/pdf-hul-10-govdocs-803945.pdf.jhove.xml" + "regression/modules/PDF-hul/issue_306.pdf.jhove.xml") +for filename in "${pdf_offset_affected[@]}" +do + if [[ -f "${candidateRoot}/${filename}" ]]; then + cp "${candidateRoot}/${filename}" "${targetRoot}/${filename}" + fi +done diff --git a/jhove-core/pom.xml b/jhove-core/pom.xml index e09a033a4..4ac656bdb 100644 --- a/jhove-core/pom.xml +++ b/jhove-core/pom.xml @@ -5,7 +5,7 @@ org.openpreservation.jhove jhove - 1.31.0-SNAPSHOT + 1.32.0-RC1 jhove-core diff --git a/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/JsonHandler.java b/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/JsonHandler.java index c6d4a23fe..e6cfa2cc6 100644 --- a/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/JsonHandler.java +++ b/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/JsonHandler.java @@ -68,13 +68,13 @@ public class JsonHandler extends HandlerBase { private static final String NAME = "JSON"; /** Handler release identifier. */ - private static final String RELEASE = "1.2"; + private static final String RELEASE = "1.3"; /** String release. */ private static final String RELEASE_CONSTANT = "release"; /** Handler release date. */ - private static final int[] DATE = { 2024, 03, 05 }; + private static final int[] DATE = { 2024, 8, 22 }; private static final String DATE_CONSTANT = "date"; diff --git a/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/TextHandler.java b/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/TextHandler.java index 979c2d8f7..7e5c6030e 100644 --- a/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/TextHandler.java +++ b/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/TextHandler.java @@ -59,8 +59,8 @@ public class TextHandler extends HandlerBase { ******************************************************************/ private static final String NAME = "TEXT"; - private static final String RELEASE = "1.6"; - private static final int[] DATE = { 2018, 03, 29 }; + private static final String RELEASE = "1.7"; + private static final int[] DATE = { 2022, 8, 22 }; private static final String NOTE = "This is the default JHOVE output " + "handler"; private static final String RIGHTS = "Derived from software Copyright 2004-2011 " diff --git a/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/XmlHandler.java b/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/XmlHandler.java index cce73f5ae..e84b4fee3 100644 --- a/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/XmlHandler.java +++ b/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/XmlHandler.java @@ -83,10 +83,10 @@ protected NumberFormat initialValue() { private static final String NAME = "XML"; /** Handler release identifier. */ - private static final String RELEASE = "1.11"; + private static final String RELEASE = "1.12"; /** Handler release date. */ - private static final int[] DATE = { 2024, 03, 05 }; + private static final int[] DATE = { 2024, 8, 22 }; /** Handler informative note. */ private static final String NOTE = "This output handler is defined by the XML Schema " @@ -3786,7 +3786,7 @@ protected void showNisoImageCaptureMetadata20(NisoImageMetadata niso, n = niso.getOrientation(); if (n != NisoImageMetadata.NULL) { - // Values defined in the MIX 2.0 schema + // Values defined in the MIX 2.0 schema final String[] orient = { "", "normal*", "normal, image flipped", "normal, rotated 180\u00B0", "normal, image flipped, rotated 180\u00B0", @@ -4383,10 +4383,10 @@ protected void showAESAudioMetadata(AESAudioMetadata aes) { sampleRate != AESAudioMetadata.NILL || wordSize != AESAudioMetadata.NULL) { _writer.println(margn2 + elementStart("aes:formatList")); - String[][] frAttr = { { "ID", formatRegionID }, - {"xsi:type", "aes:formatRegionType"}, - {"ownerRef", faceRegionID}, - {"label", "JHOVE"}}; + String[][] frAttr = { { "ID", formatRegionID }, + { "xsi:type", "aes:formatRegionType" }, + { "ownerRef", faceRegionID }, + { "label", "JHOVE" } }; _writer.println(margn3 + elementStart("aes:formatRegion", frAttr)); if (bitDepth != AESAudioMetadata.NULL) { _writer.println(margn4 + element("aes:bitDepth", @@ -4446,10 +4446,10 @@ private void writeAESTimeRangePart(String indent, String elementName, AESAudioMe } String[][] attributes = { - {"editRate", formatters.get().format(sampleRate)}, - {"factorNumerator", "1"}, - {"factorDenominator", "1"} - }; + { "editRate", formatters.get().format(sampleRate) }, + { "factorNumerator", "1" }, + { "factorDenominator", "1" } + }; _writer.println(indent + element(elementName, attributes, String.valueOf(timeDesc.getSamples()))); diff --git a/jhove-ext-modules/pom.xml b/jhove-ext-modules/pom.xml index d52517052..662f7edbd 100644 --- a/jhove-ext-modules/pom.xml +++ b/jhove-ext-modules/pom.xml @@ -3,7 +3,7 @@ org.openpreservation.jhove jhove - 1.31.0-SNAPSHOT + 1.32.0-RC1 jhove-ext-modules diff --git a/jhove-installer/pom.xml b/jhove-installer/pom.xml index 76dc74c70..875c5bace 100644 --- a/jhove-installer/pom.xml +++ b/jhove-installer/pom.xml @@ -5,11 +5,11 @@ org.openpreservation.jhove jhove - 1.31.0-SNAPSHOT + 1.32.0-RC1 jhove-installer - 1.31.0-SNAPSHOT + 1.32.0-RC1 JHOVE Installer Maven-built IzPack installer for JHOVE. @@ -22,14 +22,14 @@ 1.6.2 1.4.2 1.4.3 - 1.4.3 + 1.4.4 1.4.4 1.5.4 - 1.12.6 - 1.9.4 + 1.12.7 + 1.9.5 1.7.3 1.8.3 - 1.5.4 + 1.5.5 @@ -175,7 +175,7 @@ org.openpreservation.jhove jhove-ext-modules - 1.31.0-SNAPSHOT + ${project.version} org.openpreservation.jhove.modules diff --git a/jhove-modules/aiff-hul/pom.xml b/jhove-modules/aiff-hul/pom.xml index 72180500d..62c8b5191 100644 --- a/jhove-modules/aiff-hul/pom.xml +++ b/jhove-modules/aiff-hul/pom.xml @@ -3,7 +3,7 @@ org.openpreservation.jhove.modules jhove-modules - 1.31.0-SNAPSHOT + 1.32.0-RC1 aiff-hul 1.6.2 diff --git a/jhove-modules/ascii-hul/pom.xml b/jhove-modules/ascii-hul/pom.xml index 250678d7b..45e59316b 100644 --- a/jhove-modules/ascii-hul/pom.xml +++ b/jhove-modules/ascii-hul/pom.xml @@ -3,7 +3,7 @@ org.openpreservation.jhove.modules jhove-modules - 1.31.0-SNAPSHOT + 1.32.0-RC1 ascii-hul 1.4.2 diff --git a/jhove-modules/gif-hul/pom.xml b/jhove-modules/gif-hul/pom.xml index ad7998d18..519953a30 100644 --- a/jhove-modules/gif-hul/pom.xml +++ b/jhove-modules/gif-hul/pom.xml @@ -3,7 +3,7 @@ org.openpreservation.jhove.modules jhove-modules - 1.31.0-SNAPSHOT + 1.32.0-RC1 gif-hul 1.4.3 diff --git a/jhove-modules/html-hul/pom.xml b/jhove-modules/html-hul/pom.xml index a8af3637b..31e43b42a 100644 --- a/jhove-modules/html-hul/pom.xml +++ b/jhove-modules/html-hul/pom.xml @@ -3,10 +3,10 @@ org.openpreservation.jhove.modules jhove-modules - 1.31.0-SNAPSHOT + 1.32.0-RC1 html-hul - 1.4.3 + 1.4.4 JHOVE HTML Module HUL HTML module developed by Harvard University Library @@ -14,7 +14,7 @@ org.openpreservation.jhove.modules xml-hul - 1.5.4 + 1.5.5 diff --git a/jhove-modules/html-hul/src/main/java/edu/harvard/hul/ois/jhove/module/HtmlModule.java b/jhove-modules/html-hul/src/main/java/edu/harvard/hul/ois/jhove/module/HtmlModule.java index 56b8cdd3a..9d3f8d727 100644 --- a/jhove-modules/html-hul/src/main/java/edu/harvard/hul/ois/jhove/module/HtmlModule.java +++ b/jhove-modules/html-hul/src/main/java/edu/harvard/hul/ois/jhove/module/HtmlModule.java @@ -93,589 +93,589 @@ */ public class HtmlModule extends ModuleBase { - /****************************************************************** - * PRIVATE CLASS FIELDS. - ******************************************************************/ - private static final String TRANSITIONAL = "Transitional"; - private static final String STRICT = "Strict"; - private static final String FRAMESET = "Frameset"; - private static final String HTML_4_0 = "HTML 4.0"; - private static final String HTML_4_01 = "HTML 4.01"; - private static final String XHTML_1_0 = "XHTML 1.0"; - private static final String XHTML_1_1_STR = "XHTML 1.1"; - - private static final String NAME = "HTML-hul"; - private static final String RELEASE = "1.4.3"; - private static final int[] DATE = { 2023, 03, 16 }; - private static final String[] FORMAT = { "HTML" }; - private static final String COVERAGE = "HTML 3.2, HTML 4.0 Strict," - + "HTML 4.0 Transitional, HTML 4.0 Frameset, " - + "HTML 4.01 Strict, HTML 4.01 Transitional, HTML 4.01 Frameset" - + "XHTML 1.0 Strict, XHTML 1.0 Transitional, XHTML 1.0 Frameset" - + "XHTML 1.1"; - - private static final String[] MIMETYPE = { "text/html" }; - private static final String WELLFORMED = "An HTML file is well-formed " - + "if it meets the criteria defined in the HTML 3.2 specification " - + "(W3C Recommendation, 14-Jan-1997), " - + "the HTML 4.0 specification (W3C Recommendation, 24-Apr-1998, " - + "the HTML 4.01 specification (W3C Recommendation, 24-Dec-1999, " - + "the XHTML 1.0 specification (W3C Recommendation, 26-Jan-2000, " - + "revised 1-Aug-2002, " - + "or the XHTML 1.1 specification (W3C Recommendation, 31-May-2001"; - private static final String VALIDITY = "An HTML file is valid if it is " - + "well-formed and has a valid DOCTYPE declaration."; - private static final String REPINFO = "Languages, title, META tags, " - + "frames, links, scripts, images, citations, defined terms, " - + "abbreviations, entities, Unicode entity blocks"; - private static final String NOTE = ""; - private static final String RIGHTS = "Copyright 2004-2007 by JSTOR and " - + "the President and Fellows of Harvard College. " - + "Released under the GNU Lesser General Public License."; - - /****************************************************************** - * PRIVATE INSTANCE FIELDS. - ******************************************************************/ - - /* Doctype extracted from document */ - protected String _doctype; - - /* Constants for the recognized flavors of HTML */ - public static final int HTML_3_2 = 1, HTML_4_0_STRICT = 2, - HTML_4_0_FRAMESET = 3, HTML_4_0_TRANSITIONAL = 4, - HTML_4_01_STRICT = 5, HTML_4_01_FRAMESET = 6, - HTML_4_01_TRANSITIONAL = 7, XHTML_1_0_STRICT = 8, - XHTML_1_0_TRANSITIONAL = 9, XHTML_1_0_FRAMESET = 10, XHTML_1_1 = 11; - - /* Profile names, matching the above indices */ - private static final String[] PROFILENAMES = { null, null, // there are no - // profiles for - // HTML 3.2 - STRICT, FRAMESET, TRANSITIONAL, STRICT, FRAMESET, TRANSITIONAL, - STRICT, FRAMESET, TRANSITIONAL, null // there - // are no - // profiles - // for - // XHTML - // 1.1 - }; - - /* Version names, matching the above indices */ - private static final String[] VERSIONNAMES = { null, "HTML 3.2", HTML_4_0, - HTML_4_0, HTML_4_0, HTML_4_01, HTML_4_01, HTML_4_01, XHTML_1_0, - XHTML_1_0, XHTML_1_0, XHTML_1_1_STR }; - - /* Flag to know if the property TextMDMetadata is to be added */ - protected boolean _withTextMD = false; - /* Hold the information needed to generate a textMD metadata fragment */ - protected TextMDMetadata _textMD; - - /****************************************************************** - * CLASS CONSTRUCTOR. - ******************************************************************/ - /** - * Instantiate an HtmlModule object. - */ - public HtmlModule() { - super(NAME, RELEASE, DATE, FORMAT, COVERAGE, MIMETYPE, WELLFORMED, - VALIDITY, REPINFO, NOTE, RIGHTS, false); - - _vendor = Agent.harvardInstance(); - - /* HTML 3.2 spec */ - Document doc = new Document("HTML 3.2 Reference Specification", - DocumentType.REPORT); - Agent w3cAgent = Agent.newW3CInstance(); - doc.setPublisher(w3cAgent); - - Agent dRaggett = new Agent.Builder("Dave Raggett", AgentType.OTHER) - .build(); - doc.setAuthor(dRaggett); - - doc.setDate("1997-01-14"); - doc.setIdentifier( - new Identifier("http://www.w3c.org/TR/REC-html32-19970114", - IdentifierType.URL)); - _specification.add(doc); - - /* HTML 4.0 spec */ - doc = new Document("HTML 4.0 Specification", DocumentType.REPORT); - doc.setPublisher(w3cAgent); - doc.setAuthor(dRaggett); - Agent leHors = new Agent.Builder("Arnaud Le Hors", AgentType.OTHER) - .build(); - doc.setAuthor(leHors); - Agent jacobs = new Agent.Builder("Ian Jacobs", AgentType.OTHER).build(); - doc.setAuthor(jacobs); - doc.setDate("1998-04-24"); - doc.setIdentifier( - new Identifier("http://www.w3.org/TR/1998/REC-html40-19980424/", - IdentifierType.URL)); - _specification.add(doc); - - /* HTML 4.01 spec */ - doc = new Document("HTML 4.01 Specification", DocumentType.REPORT); - doc.setPublisher(w3cAgent); - doc.setAuthor(dRaggett); - doc.setAuthor(leHors); - doc.setAuthor(jacobs); - doc.setDate("1999-12-24"); - doc.setIdentifier(new Identifier( - "http://www.w3.org/TR/1999/REC-html401-19991224/", - IdentifierType.URL)); - _specification.add(doc); - - /* XHTML 1.0 spec */ - doc = new Document( - "XHTML(TM) 1.0 The Extensible HyperText Markup Language " - + "(Second Edition)", - DocumentType.REPORT); - doc.setPublisher(w3cAgent); - doc.setDate("01-08-2002"); - doc.setIdentifier(new Identifier("http://www.w3.org/TR/xhtml1/", - IdentifierType.URL)); - _specification.add(doc); - - /* XHTML 1.1 spec */ - doc = new Document(" XHTML(TM) 1.1 - Module-based XHTML", - DocumentType.REPORT); - doc.setPublisher(w3cAgent); - doc.setDate("31-05-2001"); - doc.setIdentifier(new Identifier( - "http://www.w3.org/TR/2001/REC-xhtml11-20010531/", - IdentifierType.URL)); - _specification.add(doc); - - /* - * XHTML 2.0 spec -- NOT included yet; this is presented in - * "conditionalized-out" form just as a note for future expansion. - * if (false) { - * doc = new Document("XHTML 2.0, W3C Working Draft", - * DocumentType.OTHER); - * doc.setPublisher(w3cAgent); - * doc.setDate("22-07-2004"); - * doc.setIdentifier(new Identifier( - * "http://www.w3.org/TR/2004/WD-xhtml2-20040722/", - * IdentifierType.URL)); - * _specification.add(doc); - * } - */ - - Signature sig = new ExternalSignature(".html", SignatureType.EXTENSION, - SignatureUseType.OPTIONAL); - _signature.add(sig); - sig = new ExternalSignature(".htm", SignatureType.EXTENSION, - SignatureUseType.OPTIONAL); - _signature.add(sig); - } - - /** - * Parse the content of a purported HTML stream digital object and store the - * results in RepInfo. - * - * - * @param stream - * An InputStream, positioned at its beginning, which is - * generated from the object to be parsed. If multiple calls - * to - * parse are made on the basis of a nonzero value - * being returned, a new InputStream must be provided each - * time. - * - * @param info - * A fresh (on the first call) RepInfo object which will be - * modified to reflect the results of the parsing If multiple - * calls to parse are made on the basis of a - * nonzero - * value being returned, the same RepInfo object should be - * passed - * with each call. - * - * @param parseIndex - * Must be 0 in first call to parse. If - * parse returns a nonzero value, it must be - * called - * again with parseIndex equal to that return - * value. - * - * @return parseInt - */ - @Override - public int parse(InputStream stream, RepInfo info, int parseIndex) { - if (parseIndex != 0) { - // Coming in with parseIndex = 1 indicates that we've determined - // this is XHTML; so we invoke the XML module to parse it. - // If parseIndex is 100, this is the first invocation of the - // XML module, so we call it with 0; otherwise we call it with - // the value of parseIndex. - if (isXmlAvailable()) { - edu.harvard.hul.ois.jhove.module.XmlModule xmlMod = new edu.harvard.hul.ois.jhove.module.XmlModule(); - if (parseIndex == 100) { - parseIndex = 0; - } - xmlMod.setApp(_app); - xmlMod.setBase(_je); - xmlMod.setDefaultParams(_defaultParams); - try { - xmlMod.applyDefaultParams(); - } catch (Exception e) { - // really shouldn't happen - } - xmlMod.setXhtmlDoctype(_doctype); - return xmlMod.parse(stream, info, parseIndex); - } - // The XML module shouldn't be missing from any installation, - // but someone who really wanted to could remove it. In - // that case, you deserve what you get. - info.setMessage(new ErrorMessage( - MessageConstants.JHOVE_1)); - info.setWellFormed(false); // Treat it as completely wrong - return 0; - } - /* parseIndex = 0, first call only */ - _doctype = null; - // Test if textMD is to be generated - if (_defaultParams != null) { - Iterator iter = _defaultParams.iterator(); - while (iter.hasNext()) { - String param = (String) iter.next(); - if ("withtextmd=true".equalsIgnoreCase(param)) { - _withTextMD = true; - } - } - } - - initParse(); - info.setFormat(_format[0]); - info.setMimeType(_mimeType[0]); - info.setModule(this); - - if (_textMD == null || parseIndex == 0) { - _textMD = new TextMDMetadata(); - } - /* - * We may have already done the checksums while converting a temporary - * file. - */ - setupDataStream(stream, info); - - ParseHtml parser; - HtmlMetadata metadata = null; - HtmlCharStream cstream; - try { - cstream = new HtmlCharStream(_dstream, "ISO-8859-1"); - parser = new ParseHtml(this, cstream); - } catch (UnsupportedEncodingException e) { - info.setMessage(new ErrorMessage( - MessageConstants.JHOVE_2, e.getMessage())); - info.setWellFormed(false); - return 0; // shouldn't happen! - } - int type = 0; - try { - List elements = parser.HtmlDoc(); - if (elements.isEmpty()) { - // Consider an empty document bad - info.setWellFormed(false); - info.setMessage(new ErrorMessage( - MessageConstants.JHOVE_3)); - return 0; - } - type = checkDoctype(elements); - if (type < 0) { - info.setWellFormed(false); - info.setMessage(new ErrorMessage( - MessageConstants.HTML_HUL_15)); - return 0; - } - /* - * Check if there is at least one html, head, body or title tag. A - * plain text document might be interpreted as a single PCDATA, - * which is in some ethereal sense well-formed HTML, but it's - * pointless to consider it such. It might also use angle brackets - * as a text delimiter, and that shouldn't count as HTML either. - */ - boolean hasElements = false; - Iterator iter = elements.iterator(); - while (iter.hasNext()) { - Object o = iter.next(); - if (o instanceof JHOpenTag) { - String name = ((JHOpenTag) o).getName(); - if ("html".equals(name) || "head".equals(name) - || "body".equals(name) || "title".equals(name)) { - hasElements = true; - } - break; - } - } - if (!hasElements) { - info.setMessage(new ErrorMessage( - MessageConstants.HTML_HUL_17)); - info.setWellFormed(false); - return 0; - } - - // CRLF from HtmlCharStream ... - String lineEnd = cstream.getKindOfLineEnd(); - if (lineEnd == null) { - info.setMessage( - new InfoMessage(MessageConstants.HTML_HUL_23)); - _textMD.setLinebreak(TextMDMetadata.NILL); - } else if ("CR".equalsIgnoreCase(lineEnd)) { - _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CR); - } else if ("LF".equalsIgnoreCase(lineEnd)) { - _textMD.setLinebreak(TextMDMetadata.LINEBREAK_LF); - } else if ("CRLF".equalsIgnoreCase(lineEnd)) { - _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CRLF); - } - - if (type == 0) { - /* - * If we can't find a doctype, it still might be XHTML if the - * elements start with an XML declaration and the root element - * is "html" - */ - switch (seemsToBeXHTML(elements)) { - case 0: // Not XML - break; // fall through - case 1: // XML but not HTML - info.setMessage(new ErrorMessage( - MessageConstants.HTML_HUL_14)); - info.setWellFormed(false); - return 0; - case 2: // probably XHTML - return 100; - default: - break; - } - info.setMessage(new ErrorMessage( - MessageConstants.HTML_HUL_16)); - info.setValid(false); - // But keep going - } - - HtmlDocDesc docDesc = null; - switch (type) { - case HTML_3_2: - - case HTML_4_0_FRAMESET: - docDesc = new Html4_0FrameDocDesc(); - _textMD.setMarkup_basis("HTML"); - _textMD.setMarkup_basis_version("4.0"); - break; - case HTML_4_0_TRANSITIONAL: - docDesc = new Html4_0TransDocDesc(); - _textMD.setMarkup_basis("HTML"); - _textMD.setMarkup_basis_version("4.0"); - break; - case HTML_4_0_STRICT: - docDesc = new Html4_0StrictDocDesc(); - _textMD.setMarkup_basis("HTML"); - _textMD.setMarkup_basis_version("4.0"); - break; - case HTML_4_01_FRAMESET: - docDesc = new Html4_01FrameDocDesc(); - _textMD.setMarkup_basis("HTML"); - _textMD.setMarkup_basis_version("4.01"); - break; - case HTML_4_01_TRANSITIONAL: - docDesc = new Html4_01TransDocDesc(); - _textMD.setMarkup_basis("HTML"); - _textMD.setMarkup_basis_version("4.01"); - break; - case HTML_4_01_STRICT: - docDesc = new Html4_01StrictDocDesc(); - _textMD.setMarkup_basis("HTML"); - _textMD.setMarkup_basis_version("4.01"); - break; - case XHTML_1_0_STRICT: - case XHTML_1_0_TRANSITIONAL: - case XHTML_1_0_FRAMESET: - case XHTML_1_1: - // Force a second call to parse as XML. 100 is a - // magic code for the first XML call. - return 100; - } - _textMD.setMarkup_language(_doctype); - if (docDesc == null) { - info.setMessage(new InfoMessage( - MessageConstants.HTML_HUL_22)); - docDesc = new Html3_2DocDesc(); - } - docDesc.validate(elements, info); - metadata = docDesc.getMetadata(); - - // Try to get the charset from the meta Content - if (metadata.getCharset() != null) { - _textMD.setCharset(metadata.getCharset()); - } else { - _textMD.setCharset(TextMDMetadata.CHARSET_ISO8859_1); - } - String textMDEncoding = _textMD.getCharset(); - if (textMDEncoding.contains("UTF")) { - _textMD.setByte_order(_bigEndian ? TextMDMetadata.BYTE_ORDER_BIG - : TextMDMetadata.BYTE_ORDER_LITTLE); - _textMD.setByte_size("8"); - _textMD.setCharacter_size("variable"); - } else { - _textMD.setByte_order(_bigEndian ? TextMDMetadata.BYTE_ORDER_BIG - : TextMDMetadata.BYTE_ORDER_LITTLE); - _textMD.setByte_size("8"); - _textMD.setCharacter_size("1"); - } - } catch (ParseException e) { - Token t = e.currentToken; - info.setMessage(new ErrorMessage( - MessageConstants.HTML_HUL_18, - "Line = " + t.beginLine + ", column = " + t.beginColumn)); - info.setWellFormed(false); - } catch (TokenMgrError f) { - info.setMessage(new ErrorMessage( - MessageConstants.HTML_HUL_19, - f.getLocalizedMessage())); - info.setWellFormed(false); - } - - if (info.getWellFormed() == RepInfo.FALSE) { - return 0; - } - - if (type != 0) { - if (PROFILENAMES[type] != null) { - info.setProfile(PROFILENAMES[type]); - } - info.setVersion(VERSIONNAMES[type]); - } - - if (metadata != null) { - Property property = metadata - .toProperty(_withTextMD ? _textMD : null); - if (property != null) { - info.setProperty(property); - } - } - - // Set the checksums in the report if they're calculated - setChecksums(this._ckSummer, info); - - return 0; - } - - /** - * Check if the digital object conforms to this Module's internal signature - * information. - * - * HTML is one of the most ill-defined of any open formats, so checking a - * "signature" really means using some heuristics. The only required tag is - * TITLE, but that could occur well into the file. So we look for any of - * three strings -- taking into account case-independence and white space -- - * within the first sigBytes bytes, and call that a signature check. - * - * @param file - * A File object for the object being parsed - * @param stream - * An InputStream, positioned at its beginning, which is - * generated from the object to be parsed - * @param info - * A fresh RepInfo object which will be modified to reflect the - * results of the test - * - * @throws IOException - */ - @Override - public void checkSignatures(File file, InputStream stream, RepInfo info) - throws IOException { - info.setFormat(_format[0]); - info.setMimeType(_mimeType[0]); - info.setModule(this); - char[][] sigtext = new char[3][]; - sigtext[0] = "= 2) { - firstElem = (JHElement) elements.get(1); - } - if (!(firstElem instanceof JHDoctype)) { - return 0; // no DOCTYPE found - } - List dt = ((JHDoctype) firstElem).getDoctypeElements(); - if (dt.size() < 3) { - return 0; - } - try { - // Is DOCTYPE case sensitive? Assume not. - String str = ((String) dt.get(0)).toUpperCase(); - if (!"HTML".equals(str)) { - // It's not HTML - return -1; - } - str = ((String) dt.get(1)).toUpperCase(); - if (!"PUBLIC".equals(str)) { - return 0; - } - str = stripQuotes(((String) dt.get(2)).toUpperCase()); - _doctype = str; - if (null != str) - switch (str) { - case "-//W3C//DTD HTML 3.2 FINAL//EN": - case "-//W3C//DTD HTML 3.2//EN": - return HTML_3_2; - case "-//W3C//DTD HTML 4.0//EN": - return HTML_4_0_STRICT; - case "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN": - return HTML_4_0_TRANSITIONAL; - case "-//W3C//DTD HTML 4.0 FRAMESET//EN": - return HTML_4_0_FRAMESET; - case "-//W3C//DTD HTML 4.01//EN": - return HTML_4_01_STRICT; - case "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN": - return HTML_4_01_TRANSITIONAL; - case "-//W3C//DTD HTML 4.01 FRAMESET//EN": - return HTML_4_01_FRAMESET; + /****************************************************************** + * PRIVATE CLASS FIELDS. + ******************************************************************/ + private static final String TRANSITIONAL = "Transitional"; + private static final String STRICT = "Strict"; + private static final String FRAMESET = "Frameset"; + private static final String HTML_4_0 = "HTML 4.0"; + private static final String HTML_4_01 = "HTML 4.01"; + private static final String XHTML_1_0 = "XHTML 1.0"; + private static final String XHTML_1_1_STR = "XHTML 1.1"; + + private static final String NAME = "HTML-hul"; + private static final String RELEASE = "1.4.4"; + private static final int[] DATE = { 2024, 8, 22 }; + private static final String[] FORMAT = { "HTML" }; + private static final String COVERAGE = "HTML 3.2, HTML 4.0 Strict," + + "HTML 4.0 Transitional, HTML 4.0 Frameset, " + + "HTML 4.01 Strict, HTML 4.01 Transitional, HTML 4.01 Frameset" + + "XHTML 1.0 Strict, XHTML 1.0 Transitional, XHTML 1.0 Frameset" + + "XHTML 1.1"; + + private static final String[] MIMETYPE = { "text/html" }; + private static final String WELLFORMED = "An HTML file is well-formed " + + "if it meets the criteria defined in the HTML 3.2 specification " + + "(W3C Recommendation, 14-Jan-1997), " + + "the HTML 4.0 specification (W3C Recommendation, 24-Apr-1998, " + + "the HTML 4.01 specification (W3C Recommendation, 24-Dec-1999, " + + "the XHTML 1.0 specification (W3C Recommendation, 26-Jan-2000, " + + "revised 1-Aug-2002, " + + "or the XHTML 1.1 specification (W3C Recommendation, 31-May-2001"; + private static final String VALIDITY = "An HTML file is valid if it is " + + "well-formed and has a valid DOCTYPE declaration."; + private static final String REPINFO = "Languages, title, META tags, " + + "frames, links, scripts, images, citations, defined terms, " + + "abbreviations, entities, Unicode entity blocks"; + private static final String NOTE = ""; + private static final String RIGHTS = "Copyright 2004-2007 by JSTOR and " + + "the President and Fellows of Harvard College. " + + "Released under the GNU Lesser General Public License."; + + /****************************************************************** + * PRIVATE INSTANCE FIELDS. + ******************************************************************/ + + /* Doctype extracted from document */ + protected String _doctype; + + /* Constants for the recognized flavors of HTML */ + public static final int HTML_3_2 = 1, HTML_4_0_STRICT = 2, + HTML_4_0_FRAMESET = 3, HTML_4_0_TRANSITIONAL = 4, + HTML_4_01_STRICT = 5, HTML_4_01_FRAMESET = 6, + HTML_4_01_TRANSITIONAL = 7, XHTML_1_0_STRICT = 8, + XHTML_1_0_TRANSITIONAL = 9, XHTML_1_0_FRAMESET = 10, XHTML_1_1 = 11; + + /* Profile names, matching the above indices */ + private static final String[] PROFILENAMES = { null, null, // there are no + // profiles for + // HTML 3.2 + STRICT, FRAMESET, TRANSITIONAL, STRICT, FRAMESET, TRANSITIONAL, + STRICT, FRAMESET, TRANSITIONAL, null // there + // are no + // profiles + // for + // XHTML + // 1.1 + }; + + /* Version names, matching the above indices */ + private static final String[] VERSIONNAMES = { null, "HTML 3.2", HTML_4_0, + HTML_4_0, HTML_4_0, HTML_4_01, HTML_4_01, HTML_4_01, XHTML_1_0, + XHTML_1_0, XHTML_1_0, XHTML_1_1_STR }; + + /* Flag to know if the property TextMDMetadata is to be added */ + protected boolean _withTextMD = false; + /* Hold the information needed to generate a textMD metadata fragment */ + protected TextMDMetadata _textMD; + + /****************************************************************** + * CLASS CONSTRUCTOR. + ******************************************************************/ + /** + * Instantiate an HtmlModule object. + */ + public HtmlModule() { + super(NAME, RELEASE, DATE, FORMAT, COVERAGE, MIMETYPE, WELLFORMED, + VALIDITY, REPINFO, NOTE, RIGHTS, false); + + _vendor = Agent.harvardInstance(); + + /* HTML 3.2 spec */ + Document doc = new Document("HTML 3.2 Reference Specification", + DocumentType.REPORT); + Agent w3cAgent = Agent.newW3CInstance(); + doc.setPublisher(w3cAgent); + + Agent dRaggett = new Agent.Builder("Dave Raggett", AgentType.OTHER) + .build(); + doc.setAuthor(dRaggett); + + doc.setDate("1997-01-14"); + doc.setIdentifier( + new Identifier("http://www.w3c.org/TR/REC-html32-19970114", + IdentifierType.URL)); + _specification.add(doc); + + /* HTML 4.0 spec */ + doc = new Document("HTML 4.0 Specification", DocumentType.REPORT); + doc.setPublisher(w3cAgent); + doc.setAuthor(dRaggett); + Agent leHors = new Agent.Builder("Arnaud Le Hors", AgentType.OTHER) + .build(); + doc.setAuthor(leHors); + Agent jacobs = new Agent.Builder("Ian Jacobs", AgentType.OTHER).build(); + doc.setAuthor(jacobs); + doc.setDate("1998-04-24"); + doc.setIdentifier( + new Identifier("http://www.w3.org/TR/1998/REC-html40-19980424/", + IdentifierType.URL)); + _specification.add(doc); + + /* HTML 4.01 spec */ + doc = new Document("HTML 4.01 Specification", DocumentType.REPORT); + doc.setPublisher(w3cAgent); + doc.setAuthor(dRaggett); + doc.setAuthor(leHors); + doc.setAuthor(jacobs); + doc.setDate("1999-12-24"); + doc.setIdentifier(new Identifier( + "http://www.w3.org/TR/1999/REC-html401-19991224/", + IdentifierType.URL)); + _specification.add(doc); + + /* XHTML 1.0 spec */ + doc = new Document( + "XHTML(TM) 1.0 The Extensible HyperText Markup Language " + + "(Second Edition)", + DocumentType.REPORT); + doc.setPublisher(w3cAgent); + doc.setDate("01-08-2002"); + doc.setIdentifier(new Identifier("http://www.w3.org/TR/xhtml1/", + IdentifierType.URL)); + _specification.add(doc); + + /* XHTML 1.1 spec */ + doc = new Document(" XHTML(TM) 1.1 - Module-based XHTML", + DocumentType.REPORT); + doc.setPublisher(w3cAgent); + doc.setDate("31-05-2001"); + doc.setIdentifier(new Identifier( + "http://www.w3.org/TR/2001/REC-xhtml11-20010531/", + IdentifierType.URL)); + _specification.add(doc); + + /* + * XHTML 2.0 spec -- NOT included yet; this is presented in + * "conditionalized-out" form just as a note for future expansion. + * if (false) { + * doc = new Document("XHTML 2.0, W3C Working Draft", + * DocumentType.OTHER); + * doc.setPublisher(w3cAgent); + * doc.setDate("22-07-2004"); + * doc.setIdentifier(new Identifier( + * "http://www.w3.org/TR/2004/WD-xhtml2-20040722/", + * IdentifierType.URL)); + * _specification.add(doc); + * } + */ + + Signature sig = new ExternalSignature(".html", SignatureType.EXTENSION, + SignatureUseType.OPTIONAL); + _signature.add(sig); + sig = new ExternalSignature(".htm", SignatureType.EXTENSION, + SignatureUseType.OPTIONAL); + _signature.add(sig); + } + + /** + * Parse the content of a purported HTML stream digital object and store the + * results in RepInfo. + * + * + * @param stream + * An InputStream, positioned at its beginning, which is + * generated from the object to be parsed. If multiple calls + * to + * parse are made on the basis of a nonzero value + * being returned, a new InputStream must be provided each + * time. + * + * @param info + * A fresh (on the first call) RepInfo object which will be + * modified to reflect the results of the parsing If multiple + * calls to parse are made on the basis of a + * nonzero + * value being returned, the same RepInfo object should be + * passed + * with each call. + * + * @param parseIndex + * Must be 0 in first call to parse. If + * parse returns a nonzero value, it must be + * called + * again with parseIndex equal to that return + * value. + * + * @return parseInt + */ + @Override + public int parse(InputStream stream, RepInfo info, int parseIndex) { + if (parseIndex != 0) { + // Coming in with parseIndex = 1 indicates that we've determined + // this is XHTML; so we invoke the XML module to parse it. + // If parseIndex is 100, this is the first invocation of the + // XML module, so we call it with 0; otherwise we call it with + // the value of parseIndex. + if (isXmlAvailable()) { + edu.harvard.hul.ois.jhove.module.XmlModule xmlMod = new edu.harvard.hul.ois.jhove.module.XmlModule(); + if (parseIndex == 100) { + parseIndex = 0; + } + xmlMod.setApp(_app); + xmlMod.setBase(_je); + xmlMod.setDefaultParams(_defaultParams); + try { + xmlMod.applyDefaultParams(); + } catch (Exception e) { + // really shouldn't happen + } + xmlMod.setXhtmlDoctype(_doctype); + return xmlMod.parse(stream, info, parseIndex); + } + // The XML module shouldn't be missing from any installation, + // but someone who really wanted to could remove it. In + // that case, you deserve what you get. + info.setMessage(new ErrorMessage( + MessageConstants.JHOVE_1)); + info.setWellFormed(false); // Treat it as completely wrong + return 0; + } + /* parseIndex = 0, first call only */ + _doctype = null; + // Test if textMD is to be generated + if (_defaultParams != null) { + Iterator iter = _defaultParams.iterator(); + while (iter.hasNext()) { + String param = (String) iter.next(); + if ("withtextmd=true".equalsIgnoreCase(param)) { + _withTextMD = true; + } + } + } + + initParse(); + info.setFormat(_format[0]); + info.setMimeType(_mimeType[0]); + info.setModule(this); + + if (_textMD == null || parseIndex == 0) { + _textMD = new TextMDMetadata(); + } + /* + * We may have already done the checksums while converting a temporary + * file. + */ + setupDataStream(stream, info); + + ParseHtml parser; + HtmlMetadata metadata = null; + HtmlCharStream cstream; + try { + cstream = new HtmlCharStream(_dstream, "ISO-8859-1"); + parser = new ParseHtml(this, cstream); + } catch (UnsupportedEncodingException e) { + info.setMessage(new ErrorMessage( + MessageConstants.JHOVE_2, e.getMessage())); + info.setWellFormed(false); + return 0; // shouldn't happen! + } + int type = 0; + try { + List elements = parser.HtmlDoc(); + if (elements.isEmpty()) { + // Consider an empty document bad + info.setWellFormed(false); + info.setMessage(new ErrorMessage( + MessageConstants.JHOVE_3)); + return 0; + } + type = checkDoctype(elements); + if (type < 0) { + info.setWellFormed(false); + info.setMessage(new ErrorMessage( + MessageConstants.HTML_HUL_15)); + return 0; + } + /* + * Check if there is at least one html, head, body or title tag. A + * plain text document might be interpreted as a single PCDATA, + * which is in some ethereal sense well-formed HTML, but it's + * pointless to consider it such. It might also use angle brackets + * as a text delimiter, and that shouldn't count as HTML either. + */ + boolean hasElements = false; + Iterator iter = elements.iterator(); + while (iter.hasNext()) { + Object o = iter.next(); + if (o instanceof JHOpenTag) { + String name = ((JHOpenTag) o).getName(); + if ("html".equals(name) || "head".equals(name) + || "body".equals(name) || "title".equals(name)) { + hasElements = true; + } + break; + } + } + if (!hasElements) { + info.setMessage(new ErrorMessage( + MessageConstants.HTML_HUL_17)); + info.setWellFormed(false); + return 0; + } + + // CRLF from HtmlCharStream ... + String lineEnd = cstream.getKindOfLineEnd(); + if (lineEnd == null) { + info.setMessage( + new InfoMessage(MessageConstants.HTML_HUL_23)); + _textMD.setLinebreak(TextMDMetadata.NILL); + } else if ("CR".equalsIgnoreCase(lineEnd)) { + _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CR); + } else if ("LF".equalsIgnoreCase(lineEnd)) { + _textMD.setLinebreak(TextMDMetadata.LINEBREAK_LF); + } else if ("CRLF".equalsIgnoreCase(lineEnd)) { + _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CRLF); + } + + if (type == 0) { + /* + * If we can't find a doctype, it still might be XHTML if the + * elements start with an XML declaration and the root element + * is "html" + */ + switch (seemsToBeXHTML(elements)) { + case 0: // Not XML + break; // fall through + case 1: // XML but not HTML + info.setMessage(new ErrorMessage( + MessageConstants.HTML_HUL_14)); + info.setWellFormed(false); + return 0; + case 2: // probably XHTML + return 100; + default: + break; + } + info.setMessage(new ErrorMessage( + MessageConstants.HTML_HUL_16)); + info.setValid(false); + // But keep going + } + + HtmlDocDesc docDesc = null; + switch (type) { + case HTML_3_2: + + case HTML_4_0_FRAMESET: + docDesc = new Html4_0FrameDocDesc(); + _textMD.setMarkup_basis("HTML"); + _textMD.setMarkup_basis_version("4.0"); + break; + case HTML_4_0_TRANSITIONAL: + docDesc = new Html4_0TransDocDesc(); + _textMD.setMarkup_basis("HTML"); + _textMD.setMarkup_basis_version("4.0"); + break; + case HTML_4_0_STRICT: + docDesc = new Html4_0StrictDocDesc(); + _textMD.setMarkup_basis("HTML"); + _textMD.setMarkup_basis_version("4.0"); + break; + case HTML_4_01_FRAMESET: + docDesc = new Html4_01FrameDocDesc(); + _textMD.setMarkup_basis("HTML"); + _textMD.setMarkup_basis_version("4.01"); + break; + case HTML_4_01_TRANSITIONAL: + docDesc = new Html4_01TransDocDesc(); + _textMD.setMarkup_basis("HTML"); + _textMD.setMarkup_basis_version("4.01"); + break; + case HTML_4_01_STRICT: + docDesc = new Html4_01StrictDocDesc(); + _textMD.setMarkup_basis("HTML"); + _textMD.setMarkup_basis_version("4.01"); + break; + case XHTML_1_0_STRICT: + case XHTML_1_0_TRANSITIONAL: + case XHTML_1_0_FRAMESET: + case XHTML_1_1: + // Force a second call to parse as XML. 100 is a + // magic code for the first XML call. + return 100; + } + _textMD.setMarkup_language(_doctype); + if (docDesc == null) { + info.setMessage(new InfoMessage( + MessageConstants.HTML_HUL_22)); + docDesc = new Html3_2DocDesc(); + } + docDesc.validate(elements, info); + metadata = docDesc.getMetadata(); + + // Try to get the charset from the meta Content + if (metadata.getCharset() != null) { + _textMD.setCharset(metadata.getCharset()); + } else { + _textMD.setCharset(TextMDMetadata.CHARSET_ISO8859_1); + } + String textMDEncoding = _textMD.getCharset(); + if (textMDEncoding.contains("UTF")) { + _textMD.setByte_order(_bigEndian ? TextMDMetadata.BYTE_ORDER_BIG + : TextMDMetadata.BYTE_ORDER_LITTLE); + _textMD.setByte_size("8"); + _textMD.setCharacter_size("variable"); + } else { + _textMD.setByte_order(_bigEndian ? TextMDMetadata.BYTE_ORDER_BIG + : TextMDMetadata.BYTE_ORDER_LITTLE); + _textMD.setByte_size("8"); + _textMD.setCharacter_size("1"); + } + } catch (ParseException e) { + Token t = e.currentToken; + info.setMessage(new ErrorMessage( + MessageConstants.HTML_HUL_18, + "Line = " + t.beginLine + ", column = " + t.beginColumn)); + info.setWellFormed(false); + } catch (TokenMgrError f) { + info.setMessage(new ErrorMessage( + MessageConstants.HTML_HUL_19, + f.getLocalizedMessage())); + info.setWellFormed(false); + } + + if (info.getWellFormed() == RepInfo.FALSE) { + return 0; + } + + if (type != 0) { + if (PROFILENAMES[type] != null) { + info.setProfile(PROFILENAMES[type]); + } + info.setVersion(VERSIONNAMES[type]); + } + + if (metadata != null) { + Property property = metadata + .toProperty(_withTextMD ? _textMD : null); + if (property != null) { + info.setProperty(property); + } + } + + // Set the checksums in the report if they're calculated + setChecksums(this._ckSummer, info); + + return 0; + } + + /** + * Check if the digital object conforms to this Module's internal signature + * information. + * + * HTML is one of the most ill-defined of any open formats, so checking a + * "signature" really means using some heuristics. The only required tag is + * TITLE, but that could occur well into the file. So we look for any of + * three strings -- taking into account case-independence and white space -- + * within the first sigBytes bytes, and call that a signature check. + * + * @param file + * A File object for the object being parsed + * @param stream + * An InputStream, positioned at its beginning, which is + * generated from the object to be parsed + * @param info + * A fresh RepInfo object which will be modified to reflect the + * results of the test + * + * @throws IOException + */ + @Override + public void checkSignatures(File file, InputStream stream, RepInfo info) + throws IOException { + info.setFormat(_format[0]); + info.setMimeType(_mimeType[0]); + info.setModule(this); + char[][] sigtext = new char[3][]; + sigtext[0] = "= 2) { + firstElem = (JHElement) elements.get(1); + } + if (!(firstElem instanceof JHDoctype)) { + return 0; // no DOCTYPE found + } + List dt = ((JHDoctype) firstElem).getDoctypeElements(); + if (dt.size() < 3) { + return 0; + } + try { + // Is DOCTYPE case sensitive? Assume not. + String str = ((String) dt.get(0)).toUpperCase(); + if (!"HTML".equals(str)) { + // It's not HTML + return -1; + } + str = ((String) dt.get(1)).toUpperCase(); + if (!"PUBLIC".equals(str)) { + return 0; + } + str = stripQuotes(((String) dt.get(2)).toUpperCase()); + _doctype = str; + if (null != str) + switch (str) { + case "-//W3C//DTD HTML 3.2 FINAL//EN": + case "-//W3C//DTD HTML 3.2//EN": + return HTML_3_2; + case "-//W3C//DTD HTML 4.0//EN": + return HTML_4_0_STRICT; + case "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN": + return HTML_4_0_TRANSITIONAL; + case "-//W3C//DTD HTML 4.0 FRAMESET//EN": + return HTML_4_0_FRAMESET; + case "-//W3C//DTD HTML 4.01//EN": + return HTML_4_01_STRICT; + case "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN": + return HTML_4_01_TRANSITIONAL; + case "-//W3C//DTD HTML 4.01 FRAMESET//EN": + return HTML_4_01_FRAMESET; case "-//W3C//DTD XHTML 1.0 STRICT//EN": return XHTML_1_0_STRICT; case "-//W3C//DTD XHTML 1.0 TRANSITIONAL//EN": @@ -685,68 +685,68 @@ protected int checkDoctype(List elements) { case "-//W3C//DTD XHTML 1.1//EN": return XHTML_1_1; default: - break; - } - } catch (Exception e) { - // Really shouldn't happen, but if it does we've got - // a bad doctype - return 0; - } - return 0; - } - - /* - * See if this document, even if it lacks a doctype, is most likely XHTML. - * The test is that the document starts with an XML declaration and has - * "html" for its first tag. - * - * Returns: 0 if there's no XML declaration 1 if there's an XML declaration - * but no html tag; in this case it's probably some other kind of XML 2 if - * there's an XML declaration and an html tag - */ - protected int seemsToBeXHTML(List elements) { - JHElement elem; - try { - elem = (JHElement) elements.get(0); - if (!(elem instanceof JHXmlDecl)) { - return 0; - } - Iterator iter = elements.iterator(); - while (iter.hasNext()) { - elem = (JHElement) iter.next(); - if (elem instanceof JHOpenTag) { - JHOpenTag tag = (JHOpenTag) elem; - return ("html".equals(tag.getName()) ? 2 : 1); - } - } - } catch (Exception e) { - return 0; // document must be really empty - } - return 1; - } - - /* - * Remove quotes from the beginning and end of a string. If it doesn't have - * quotes in both places, leave it alone. - */ - protected String stripQuotes(String str) { - int len = str.length(); - if (str.charAt(0) == '"' && str.charAt(len - 1) == '"') { - return str.substring(1, len - 1); - } - return str; - } - - /* - * Checks if the XML module is available. - */ - protected static boolean isXmlAvailable() { - try { - Class.forName("edu.harvard.hul.ois.jhove.module.XmlModule"); - return true; - } catch (Exception e) { - return false; - } - } + break; + } + } catch (Exception e) { + // Really shouldn't happen, but if it does we've got + // a bad doctype + return 0; + } + return 0; + } + + /* + * See if this document, even if it lacks a doctype, is most likely XHTML. + * The test is that the document starts with an XML declaration and has + * "html" for its first tag. + * + * Returns: 0 if there's no XML declaration 1 if there's an XML declaration + * but no html tag; in this case it's probably some other kind of XML 2 if + * there's an XML declaration and an html tag + */ + protected int seemsToBeXHTML(List elements) { + JHElement elem; + try { + elem = (JHElement) elements.get(0); + if (!(elem instanceof JHXmlDecl)) { + return 0; + } + Iterator iter = elements.iterator(); + while (iter.hasNext()) { + elem = (JHElement) iter.next(); + if (elem instanceof JHOpenTag) { + JHOpenTag tag = (JHOpenTag) elem; + return ("html".equals(tag.getName()) ? 2 : 1); + } + } + } catch (Exception e) { + return 0; // document must be really empty + } + return 1; + } + + /* + * Remove quotes from the beginning and end of a string. If it doesn't have + * quotes in both places, leave it alone. + */ + protected String stripQuotes(String str) { + int len = str.length(); + if (str.charAt(0) == '"' && str.charAt(len - 1) == '"') { + return str.substring(1, len - 1); + } + return str; + } + + /* + * Checks if the XML module is available. + */ + protected static boolean isXmlAvailable() { + try { + Class.forName("edu.harvard.hul.ois.jhove.module.XmlModule"); + return true; + } catch (Exception e) { + return false; + } + } } diff --git a/jhove-modules/jpeg-hul/pom.xml b/jhove-modules/jpeg-hul/pom.xml index ab0eebcaf..3dd165646 100644 --- a/jhove-modules/jpeg-hul/pom.xml +++ b/jhove-modules/jpeg-hul/pom.xml @@ -3,7 +3,7 @@ org.openpreservation.jhove.modules jhove-modules - 1.31.0-SNAPSHOT + 1.32.0-RC1 jpeg-hul 1.5.4 @@ -14,7 +14,7 @@ org.openpreservation.jhove.modules tiff-hul - 1.9.4 + 1.9.5 diff --git a/jhove-modules/jpeg2000-hul/pom.xml b/jhove-modules/jpeg2000-hul/pom.xml index 77ed54d56..5bac0118c 100644 --- a/jhove-modules/jpeg2000-hul/pom.xml +++ b/jhove-modules/jpeg2000-hul/pom.xml @@ -3,7 +3,7 @@ org.openpreservation.jhove.modules jhove-modules - 1.31.0-SNAPSHOT + 1.32.0-RC1 jpeg2000-hul 1.4.4 diff --git a/jhove-modules/pdf-hul/pom.xml b/jhove-modules/pdf-hul/pom.xml index 3e5a435ec..9b9893727 100644 --- a/jhove-modules/pdf-hul/pom.xml +++ b/jhove-modules/pdf-hul/pom.xml @@ -3,10 +3,10 @@ org.openpreservation.jhove.modules jhove-modules - 1.31.0-SNAPSHOT + 1.32.0-RC1 pdf-hul - 1.12.6 + 1.12.7 JHOVE PDF Module HUL PDF module developed by Harvard University Library diff --git a/jhove-modules/pdf-hul/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java b/jhove-modules/pdf-hul/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java index 012cc9428..c3014d307 100644 --- a/jhove-modules/pdf-hul/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java +++ b/jhove-modules/pdf-hul/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java @@ -380,8 +380,8 @@ public class PdfModule extends ModuleBase { ******************************************************************/ private static final String NAME = "PDF-hul"; - private static final String RELEASE = "1.12.6"; - private static final int[] DATE = { 2024, 07, 31 }; + private static final String RELEASE = "1.12.7"; + private static final int[] DATE = { 2024, 8, 22 }; private static final String[] FORMAT = { "PDF", "Portable Document Format" }; private static final String COVERAGE = "PDF 1.0-1.6; " diff --git a/jhove-modules/pdf-hul/src/test/java/edu/harvard/hul/ois/jhove/module/pdf/LiteralTests.java b/jhove-modules/pdf-hul/src/test/java/edu/harvard/hul/ois/jhove/module/pdf/LiteralTests.java index 820ac0b19..930665db2 100644 --- a/jhove-modules/pdf-hul/src/test/java/edu/harvard/hul/ois/jhove/module/pdf/LiteralTests.java +++ b/jhove-modules/pdf-hul/src/test/java/edu/harvard/hul/ois/jhove/module/pdf/LiteralTests.java @@ -2,10 +2,6 @@ import static org.junit.Assert.assertNotNull; -import java.util.Calendar; -import java.util.Date; -import java.util.TimeZone; - import org.junit.Test; /** diff --git a/jhove-modules/pom.xml b/jhove-modules/pom.xml index cbdf843cd..af3133987 100644 --- a/jhove-modules/pom.xml +++ b/jhove-modules/pom.xml @@ -5,13 +5,13 @@ org.openpreservation.jhove jhove - 1.31.0-SNAPSHOT + 1.32.0-RC1 org.openpreservation.jhove.modules jhove-modules pom - 1.31.0-SNAPSHOT + 1.32.0-RC1 JHOVE Validation Modules The JHOVE HUL validation modules. @@ -19,7 +19,7 @@ org.openpreservation.jhove jhove-core - 1.31.0-SNAPSHOT + 1.32.0-RC1 org.junit.vintage diff --git a/jhove-modules/tiff-hul/pom.xml b/jhove-modules/tiff-hul/pom.xml index 34ae9f03d..dc4661644 100644 --- a/jhove-modules/tiff-hul/pom.xml +++ b/jhove-modules/tiff-hul/pom.xml @@ -3,10 +3,10 @@ org.openpreservation.jhove.modules jhove-modules - 1.31.0-SNAPSHOT + 1.32.0-RC1 tiff-hul - 1.9.4 + 1.9.5 JHOVE TIFF Module HUL TIFF module developed by Harvard University Library diff --git a/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/TiffModule.java b/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/TiffModule.java index 7d0d6318f..26c61c894 100644 --- a/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/TiffModule.java +++ b/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/TiffModule.java @@ -121,8 +121,8 @@ public class TiffModule extends ModuleBase { protected Logger _logger; private static final String NAME = "TIFF-hul"; - private static final String RELEASE = "1.9.4"; - private static final int[] DATE = { 2023, 03, 16 }; + private static final String RELEASE = "1.9.5"; + private static final int[] DATE = { 2024, 8, 22 }; private static final String[] FORMAT = { "TIFF", "Tagged Image File Format" }; private static final String COVERAGE = "TIFF 4.0, 5.0, and 6.0; " + "TIFF/IT (ISO/DIS 12639:2003), including file types CT, LW, HC, MP, " @@ -1228,7 +1228,7 @@ protected IFD parseIFDChain(long next, RepInfo info, int type, ifd.setThumbnail(true); } list.add(ifd); - + if (list.size() > 50) { throw new TiffException(MessageConstants.TIFF_HUL_60); } diff --git a/jhove-modules/utf8-hul/pom.xml b/jhove-modules/utf8-hul/pom.xml index 3798fb6fd..568545ac3 100644 --- a/jhove-modules/utf8-hul/pom.xml +++ b/jhove-modules/utf8-hul/pom.xml @@ -3,7 +3,7 @@ org.openpreservation.jhove.modules jhove-modules - 1.31.0-SNAPSHOT + 1.32.0-RC1 utf8-hul 1.7.3 @@ -19,7 +19,7 @@ org.openpreservation.jhove.modules pdf-hul - 1.12.1 + 1.12.7 test diff --git a/jhove-modules/wave-hul/pom.xml b/jhove-modules/wave-hul/pom.xml index b03bf26cb..7dae61bda 100644 --- a/jhove-modules/wave-hul/pom.xml +++ b/jhove-modules/wave-hul/pom.xml @@ -3,7 +3,7 @@ org.openpreservation.jhove.modules jhove-modules - 1.31.0-SNAPSHOT + 1.32.0-RC1 wave-hul 1.8.3 diff --git a/jhove-modules/xml-hul/pom.xml b/jhove-modules/xml-hul/pom.xml index cf18c4cca..c074ddec0 100644 --- a/jhove-modules/xml-hul/pom.xml +++ b/jhove-modules/xml-hul/pom.xml @@ -3,10 +3,10 @@ org.openpreservation.jhove.modules jhove-modules - 1.31.0-SNAPSHOT + 1.32.0-RC1 xml-hul - 1.5.4 + 1.5.5 JHOVE XML Module HUL XML module developed by Harvard University Library diff --git a/jhove-modules/xml-hul/src/main/java/edu/harvard/hul/ois/jhove/module/XmlModule.java b/jhove-modules/xml-hul/src/main/java/edu/harvard/hul/ois/jhove/module/XmlModule.java index 374243814..1abf44cff 100644 --- a/jhove-modules/xml-hul/src/main/java/edu/harvard/hul/ois/jhove/module/XmlModule.java +++ b/jhove-modules/xml-hul/src/main/java/edu/harvard/hul/ois/jhove/module/XmlModule.java @@ -49,8 +49,8 @@ public class XmlModule extends ModuleBase { private static final String NAME = "XML-hul"; - private static final String RELEASE = "1.5.4"; - private static final int[] DATE = { 2024, 03, 05 }; + private static final String RELEASE = "1.5.5"; + private static final int[] DATE = { 2024, 8, 22 }; private static final String[] FORMAT = { "XML", "XHTML" }; private static final String COVERAGE = "XML 1.0"; private static final String[] MIMETYPE = { "text/xml", "application/xml", diff --git a/pom.xml b/pom.xml index f63ee57db..02ce407c8 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.openpreservation.jhove jhove - 1.31.0-SNAPSHOT + 1.32.0-RC1 pom JHOVE - JSTOR/Harvard Object Validation Environment