diff --git a/Dockerfile b/Dockerfile
index 9f8e10ca1..fcdd093c3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,7 +2,7 @@
# First build the app on a maven open jdk 11 container
FROM maven:3-eclipse-temurin-11-focal as dev-builder
ARG JHOVE_VERSION
-ENV JHOVE_VERSION=${JHOVE_VERSION:-1.27.0-SNAPSHOT}
+ENV JHOVE_VERSION=${JHOVE_VERSION:-1.32.0-RC1}
# Copy the current dev source branch to a local build dir
COPY . /build/jhove/
@@ -31,7 +31,7 @@ ARG JAVA_OPTS
ENV JAVA_OPTS=$JAVA_OPTS
# Specify the veraPDF REST version if you want to (to be used in build automation)
ARG JHOVE_VERSION
-ENV JHOVE_VERSION=${JHOVE_VERSION:-1.27.0-SNAPSHOT}
+ENV JHOVE_VERSION=${JHOVE_VERSION:-1.32.0-RC1}
# Copy the JRE from the previous stage
ENV JAVA_HOME=/opt/java/openjdk
diff --git a/jhove-apps/pom.xml b/jhove-apps/pom.xml
index fb5c427dc..bd1098bb7 100644
--- a/jhove-apps/pom.xml
+++ b/jhove-apps/pom.xml
@@ -5,12 +5,12 @@
org.openpreservation.jhove
jhove
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
jhove-apps
jar
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
JHOVE Applications
@@ -60,7 +60,7 @@
org.openpreservation.jhove
jhove-core
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
diff --git a/jhove-bbt/scripts/create-1.32-target.sh b/jhove-bbt/scripts/create-1.32-target.sh
new file mode 100755
index 000000000..6857fa33d
--- /dev/null
+++ b/jhove-bbt/scripts/create-1.32-target.sh
@@ -0,0 +1,213 @@
+#!/usr/bin/env bash
+
+testRoot="test-root"
+paramCandidateVersion=""
+paramBaselineVersion=""
+baselineRoot="${testRoot}/baselines"
+candidateRoot="${testRoot}/candidates"
+targetRoot="${testRoot}/targets"
+# Check the passed params to avoid disapointment
+checkParams () {
+ OPTIND=1 # Reset in case getopts previously used
+
+ while getopts "h?b:c:" opt; do # Grab the options
+ case "$opt" in
+ h|\?)
+ showHelp
+ exit 0
+ ;;
+ b) paramBaselineVersion=$OPTARG
+ ;;
+ c) paramCandidateVersion=$OPTARG
+ ;;
+ esac
+ done
+
+ if [ -z "$paramBaselineVersion" ] || [ -z "$paramCandidateVersion" ]
+ then
+ showHelp
+ exit 0
+ fi
+
+ baselineRoot="${baselineRoot}/${paramBaselineVersion}"
+ candidateRoot="${candidateRoot}/${paramCandidateVersion}"
+ targetRoot="${targetRoot}/${paramCandidateVersion}"
+}
+
+# Show usage message
+showHelp() {
+ echo "usage: create-target [-b ] [-c ] [-h|?]"
+ echo ""
+ echo " baselineVersion : The version number id for the baseline data."
+ echo " candidateVersion : The version number id for the candidate data."
+ echo ""
+ echo " -h|? : This message."
+}
+
+# Execution starts here
+checkParams "$@";
+if [[ -d "${targetRoot}" ]]; then
+ echo " - removing existing baseline at ${targetRoot}."
+ rm -rf "${targetRoot}"
+fi
+
+echo "TEST BASELINE: Creating baseline"
+# Simply copy baseline for now we're not making any changes
+echo " - copying ${baselineRoot} baseline to ${targetRoot}"
+cp -R "${baselineRoot}" "${targetRoot}"
+
+# Patch release details of the reporting module in the audit file
+find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/outputHandler release="1.11">XML/outputHandler release="1.12">XML/' {} \;
+find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/outputHandler release="1.2">JSON/outputHandler release="1.3">JSON/' {} \;
+find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/outputHandler release="1.6">TEXT/outputHandler release="1.7">TEXT/' {} \;
+
+# Update release details for HTML module
+find "${targetRoot}" -type f -name "*.html.jhove.xml" -exec sed -i 's/HTML-hul<\/reportingModule>/HTML-hul<\/reportingModule>/' {} \;
+find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/HTML-hul<\/module>/HTML-hul<\/module>/' {} \;
+find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/1.4.3<\/release>/1.4.4<\/release>/' {} \;
+find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/2023-03-16/2024-08-22/' {} \;
+
+# Update release details for PDF module
+find "${targetRoot}" -type f -name "*.pdf.jhove.xml" -exec sed -i 's/PDF-hul<\/reportingModule>/PDF-hul<\/reportingModule>/' {} \;
+find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/PDF-hul<\/module>/PDF-hul<\/module>/' {} \;
+find "${targetRoot}" -type f -name "audit-PDF-hul.jhove.xml" -exec sed -i 's/1.12.6<\/release>/1.12.7<\/release>/' {} \;
+find "${targetRoot}" -type f -name "audit-PDF-hul.jhove.xml" -exec sed -i 's/2024-07-31/2024-08-22/' {} \;
+
+# Update release details for TIFF module
+find "${targetRoot}" -type f -name "*.tiff.jhove.xml" -exec sed -i 's/TIFF-hul<\/reportingModule>/TIFF-hul<\/reportingModule>/' {} \;
+find "${targetRoot}" -type f -name "*.tif.jhove.xml" -exec sed -i 's/TIFF-hul<\/reportingModule>/TIFF-hul<\/reportingModule>/' {} \;
+find "${targetRoot}" -type f -name "*.g3.jhove.xml" -exec sed -i 's/TIFF-hul<\/reportingModule>/TIFF-hul<\/reportingModule>/' {} \;
+find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/TIFF-hul<\/module>/TIFF-hul<\/module>/' {} \;
+find "${targetRoot}" -type f -name "audit-TIFF-hul.jhove.xml" -exec sed -i 's/1.9.4<\/release>/1.9.5<\/release>/' {} \;
+find "${targetRoot}" -type f -name "audit-TIFF-hul.jhove.xml" -exec sed -i 's/2023-03-16/2024-08-22/' {} \;
+
+# Update release details for XML module
+find "${targetRoot}" -type f -name "*.xml.jhove.xml" -exec sed -i 's/XML-hul<\/reportingModule>/XML-hul<\/reportingModule>/' {} \;
+find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/XML-hul<\/module>/XML-hul<\/module>/' {} \;
+find "${targetRoot}" -type f -name "audit-XML-hul.jhove.xml" -exec sed -i 's/1.5.4<\/release>/1.5.5<\/release>/' {} \;
+find "${targetRoot}" -type f -name "audit-XML-hul.jhove.xml" -exec sed -i 's/2024-03-05/2024-08-22/' {} \;
+
+# Copy the TIFF Module results changed by https://github.com/openpreserve/jhove/pull/915
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/AA_Banner.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/AA_Banner.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/AA_Banner.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/strike.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/strike.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/strike.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/testpage-large.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/testpage-large.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/testpage-large.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/testpage-medium.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/testpage-medium.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/testpage-medium.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/oxford.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/oxford.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/oxford.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/jim___gg.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/jim___gg.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/jim___gg.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/bathy1.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/bathy1.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/bathy1.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/jim___cg.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/jim___cg.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/jim___cg.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/quad-tile.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/quad-tile.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/quad-tile.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/compos.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/compos.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/compos.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/pagemaker.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/pagemaker.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/pagemaker.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/jello.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/jello.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/jello.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/little-endian.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/little-endian.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/little-endian.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/cramps-tile.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/cramps-tile.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/cramps-tile.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/jim___ah.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/jim___ah.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/jim___ah.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/g3test.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/g3test.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/g3test.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/6mp_soft.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/6mp_soft.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/6mp_soft.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/ycbcr-cat.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/ycbcr-cat.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/ycbcr-cat.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/quad-lzw.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/quad-lzw.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/quad-lzw.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/jim___dg.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/jim___dg.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/jim___dg.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/fax2d.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/fax2d.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/fax2d.tif.jhove.xml"
+fi
+if [[ -f "${candidateRoot}/examples/modules/TIFF-hul/peppers.tif.jhove.xml" ]]; then
+ cp "${candidateRoot}/examples/modules/TIFF-hul/peppers.tif.jhove.xml" "${targetRoot}/examples/modules/TIFF-hul/peppers.tif.jhove.xml"
+fi
+
+# Copy the TIFF fix affected files from the candidate to the target
+declare -a tiff_affected=("examples/modules/TIFF-hul/cramps.tif.jhove.xml"
+ "examples/modules/TIFF-hul/text.tif.jhove.xml"
+ "examples/modules/TIFF-hul/testpage-small.tif.jhove.xml")
+for filename in "${tiff_affected[@]}"
+do
+ if [[ -f "${candidateRoot}/${filename}" ]]; then
+ cp "${candidateRoot}/${filename}" "${targetRoot}/${filename}"
+ fi
+done
+
+# Copy the XHTML fix affected files from the candidate to the target
+declare -a xhtml_affected=("errors/modules/HTML-hul/xhtml-trans-no-xml-dec.html.jhove.xml"
+ "errors/modules/HTML-hul/xhtml-strict-no-xml-dec.html.jhove.xml"
+ "errors/modules/HTML-hul/xhtml-frames-no-xml-dec.html.jhove.xml"
+ "errors/modules/HTML-hul/xhtml-1-1-no-xml-dec.html.jhove.xml")
+for filename in "${xhtml_affected[@]}"
+do
+ if [[ -f "${candidateRoot}/${filename}" ]]; then
+ cp "${candidateRoot}/${filename}" "${targetRoot}/${filename}"
+ fi
+done
+
+# Copy the XML fix affected files from the candidate to the target
+declare -a xhtml_affected=("errors/modules/HTML-hul/xhtml-trans-xml-dec.html.jhove.xml"
+ "errors/modules/HTML-hul/xhtml-strict-xml-dec.html.jhove.xml"
+ "errors/modules/HTML-hul/xhtml-frames-xml-dec.html.jhove.xml"
+ "errors/modules/HTML-hul/xhtml-1-1-xml-dec.html.jhove.xml"
+ "examples/modules/XML-hul/valid-external.dtd.jhove.xml"
+ "examples/modules/XML-hul/external-unparsed-entity.ent.jhove.xml"
+ "examples/modules/XML-hul/external-parsed-entity.ent.jhove.xml")
+for filename in "${xhtml_affected[@]}"
+do
+ if [[ -f "${candidateRoot}/${filename}" ]]; then
+ cp "${candidateRoot}/${filename}" "${targetRoot}/${filename}"
+ fi
+done
+
+# Copy all of the AIF and WAV results as these are changed by the AES schema changes
+cp -rf "${candidateRoot}/examples/modules/AIFF-hul" "${targetRoot}/examples/modules/"
+cp -rf "${candidateRoot}/examples/modules/WAVE-hul" "${targetRoot}/examples/modules/"
+cp -rf "${candidateRoot}/errors/modules/WAVE-hul" "${targetRoot}/errors/modules/"
+
+# Copy the results of the new XML fixes for multiple redirect lookups and to ensure no regression for repeat XML warnings
+cp -rf "${candidateRoot}/errors/modules/XML-hul" "${targetRoot}/errors/modules/"
+
+# Copy the results of the PDF offset message fix
+declare -a pdf_offset_affected=("errors/modules/PDF-hul/pdf-hul-5-govdocs-659152.pdf.jhove.xml"
+ "errors/modules/PDF-hul/pdf-hul-10-govdocs-803945.pdf.jhove.xml"
+ "regression/modules/PDF-hul/issue_306.pdf.jhove.xml")
+for filename in "${pdf_offset_affected[@]}"
+do
+ if [[ -f "${candidateRoot}/${filename}" ]]; then
+ cp "${candidateRoot}/${filename}" "${targetRoot}/${filename}"
+ fi
+done
diff --git a/jhove-core/pom.xml b/jhove-core/pom.xml
index e09a033a4..4ac656bdb 100644
--- a/jhove-core/pom.xml
+++ b/jhove-core/pom.xml
@@ -5,7 +5,7 @@
org.openpreservation.jhove
jhove
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
jhove-core
diff --git a/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/JsonHandler.java b/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/JsonHandler.java
index c6d4a23fe..e6cfa2cc6 100644
--- a/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/JsonHandler.java
+++ b/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/JsonHandler.java
@@ -68,13 +68,13 @@ public class JsonHandler extends HandlerBase {
private static final String NAME = "JSON";
/** Handler release identifier. */
- private static final String RELEASE = "1.2";
+ private static final String RELEASE = "1.3";
/** String release. */
private static final String RELEASE_CONSTANT = "release";
/** Handler release date. */
- private static final int[] DATE = { 2024, 03, 05 };
+ private static final int[] DATE = { 2024, 8, 22 };
private static final String DATE_CONSTANT = "date";
diff --git a/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/TextHandler.java b/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/TextHandler.java
index 979c2d8f7..7e5c6030e 100644
--- a/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/TextHandler.java
+++ b/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/TextHandler.java
@@ -59,8 +59,8 @@ public class TextHandler extends HandlerBase {
******************************************************************/
private static final String NAME = "TEXT";
- private static final String RELEASE = "1.6";
- private static final int[] DATE = { 2018, 03, 29 };
+ private static final String RELEASE = "1.7";
+ private static final int[] DATE = { 2022, 8, 22 };
private static final String NOTE = "This is the default JHOVE output "
+ "handler";
private static final String RIGHTS = "Derived from software Copyright 2004-2011 "
diff --git a/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/XmlHandler.java b/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/XmlHandler.java
index cce73f5ae..e84b4fee3 100644
--- a/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/XmlHandler.java
+++ b/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/XmlHandler.java
@@ -83,10 +83,10 @@ protected NumberFormat initialValue() {
private static final String NAME = "XML";
/** Handler release identifier. */
- private static final String RELEASE = "1.11";
+ private static final String RELEASE = "1.12";
/** Handler release date. */
- private static final int[] DATE = { 2024, 03, 05 };
+ private static final int[] DATE = { 2024, 8, 22 };
/** Handler informative note. */
private static final String NOTE = "This output handler is defined by the XML Schema "
@@ -3786,7 +3786,7 @@ protected void showNisoImageCaptureMetadata20(NisoImageMetadata niso,
n = niso.getOrientation();
if (n != NisoImageMetadata.NULL) {
- // Values defined in the MIX 2.0 schema
+ // Values defined in the MIX 2.0 schema
final String[] orient = { "", "normal*",
"normal, image flipped", "normal, rotated 180\u00B0",
"normal, image flipped, rotated 180\u00B0",
@@ -4383,10 +4383,10 @@ protected void showAESAudioMetadata(AESAudioMetadata aes) {
sampleRate != AESAudioMetadata.NILL ||
wordSize != AESAudioMetadata.NULL) {
_writer.println(margn2 + elementStart("aes:formatList"));
- String[][] frAttr = { { "ID", formatRegionID },
- {"xsi:type", "aes:formatRegionType"},
- {"ownerRef", faceRegionID},
- {"label", "JHOVE"}};
+ String[][] frAttr = { { "ID", formatRegionID },
+ { "xsi:type", "aes:formatRegionType" },
+ { "ownerRef", faceRegionID },
+ { "label", "JHOVE" } };
_writer.println(margn3 + elementStart("aes:formatRegion", frAttr));
if (bitDepth != AESAudioMetadata.NULL) {
_writer.println(margn4 + element("aes:bitDepth",
@@ -4446,10 +4446,10 @@ private void writeAESTimeRangePart(String indent, String elementName, AESAudioMe
}
String[][] attributes = {
- {"editRate", formatters.get().format(sampleRate)},
- {"factorNumerator", "1"},
- {"factorDenominator", "1"}
- };
+ { "editRate", formatters.get().format(sampleRate) },
+ { "factorNumerator", "1" },
+ { "factorDenominator", "1" }
+ };
_writer.println(indent +
element(elementName, attributes, String.valueOf(timeDesc.getSamples())));
diff --git a/jhove-ext-modules/pom.xml b/jhove-ext-modules/pom.xml
index d52517052..662f7edbd 100644
--- a/jhove-ext-modules/pom.xml
+++ b/jhove-ext-modules/pom.xml
@@ -3,7 +3,7 @@
org.openpreservation.jhove
jhove
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
jhove-ext-modules
diff --git a/jhove-installer/pom.xml b/jhove-installer/pom.xml
index 76dc74c70..875c5bace 100644
--- a/jhove-installer/pom.xml
+++ b/jhove-installer/pom.xml
@@ -5,11 +5,11 @@
org.openpreservation.jhove
jhove
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
jhove-installer
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
JHOVE Installer
Maven-built IzPack installer for JHOVE.
@@ -22,14 +22,14 @@
1.6.2
1.4.2
1.4.3
- 1.4.3
+ 1.4.4
1.4.4
1.5.4
- 1.12.6
- 1.9.4
+ 1.12.7
+ 1.9.5
1.7.3
1.8.3
- 1.5.4
+ 1.5.5
@@ -175,7 +175,7 @@
org.openpreservation.jhove
jhove-ext-modules
- 1.31.0-SNAPSHOT
+ ${project.version}
org.openpreservation.jhove.modules
diff --git a/jhove-modules/aiff-hul/pom.xml b/jhove-modules/aiff-hul/pom.xml
index 72180500d..62c8b5191 100644
--- a/jhove-modules/aiff-hul/pom.xml
+++ b/jhove-modules/aiff-hul/pom.xml
@@ -3,7 +3,7 @@
org.openpreservation.jhove.modules
jhove-modules
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
aiff-hul
1.6.2
diff --git a/jhove-modules/ascii-hul/pom.xml b/jhove-modules/ascii-hul/pom.xml
index 250678d7b..45e59316b 100644
--- a/jhove-modules/ascii-hul/pom.xml
+++ b/jhove-modules/ascii-hul/pom.xml
@@ -3,7 +3,7 @@
org.openpreservation.jhove.modules
jhove-modules
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
ascii-hul
1.4.2
diff --git a/jhove-modules/gif-hul/pom.xml b/jhove-modules/gif-hul/pom.xml
index ad7998d18..519953a30 100644
--- a/jhove-modules/gif-hul/pom.xml
+++ b/jhove-modules/gif-hul/pom.xml
@@ -3,7 +3,7 @@
org.openpreservation.jhove.modules
jhove-modules
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
gif-hul
1.4.3
diff --git a/jhove-modules/html-hul/pom.xml b/jhove-modules/html-hul/pom.xml
index a8af3637b..31e43b42a 100644
--- a/jhove-modules/html-hul/pom.xml
+++ b/jhove-modules/html-hul/pom.xml
@@ -3,10 +3,10 @@
org.openpreservation.jhove.modules
jhove-modules
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
html-hul
- 1.4.3
+ 1.4.4
JHOVE HTML Module HUL
HTML module developed by Harvard University Library
@@ -14,7 +14,7 @@
org.openpreservation.jhove.modules
xml-hul
- 1.5.4
+ 1.5.5
diff --git a/jhove-modules/html-hul/src/main/java/edu/harvard/hul/ois/jhove/module/HtmlModule.java b/jhove-modules/html-hul/src/main/java/edu/harvard/hul/ois/jhove/module/HtmlModule.java
index 56b8cdd3a..9d3f8d727 100644
--- a/jhove-modules/html-hul/src/main/java/edu/harvard/hul/ois/jhove/module/HtmlModule.java
+++ b/jhove-modules/html-hul/src/main/java/edu/harvard/hul/ois/jhove/module/HtmlModule.java
@@ -93,589 +93,589 @@
*/
public class HtmlModule extends ModuleBase {
- /******************************************************************
- * PRIVATE CLASS FIELDS.
- ******************************************************************/
- private static final String TRANSITIONAL = "Transitional";
- private static final String STRICT = "Strict";
- private static final String FRAMESET = "Frameset";
- private static final String HTML_4_0 = "HTML 4.0";
- private static final String HTML_4_01 = "HTML 4.01";
- private static final String XHTML_1_0 = "XHTML 1.0";
- private static final String XHTML_1_1_STR = "XHTML 1.1";
-
- private static final String NAME = "HTML-hul";
- private static final String RELEASE = "1.4.3";
- private static final int[] DATE = { 2023, 03, 16 };
- private static final String[] FORMAT = { "HTML" };
- private static final String COVERAGE = "HTML 3.2, HTML 4.0 Strict,"
- + "HTML 4.0 Transitional, HTML 4.0 Frameset, "
- + "HTML 4.01 Strict, HTML 4.01 Transitional, HTML 4.01 Frameset"
- + "XHTML 1.0 Strict, XHTML 1.0 Transitional, XHTML 1.0 Frameset"
- + "XHTML 1.1";
-
- private static final String[] MIMETYPE = { "text/html" };
- private static final String WELLFORMED = "An HTML file is well-formed "
- + "if it meets the criteria defined in the HTML 3.2 specification "
- + "(W3C Recommendation, 14-Jan-1997), "
- + "the HTML 4.0 specification (W3C Recommendation, 24-Apr-1998, "
- + "the HTML 4.01 specification (W3C Recommendation, 24-Dec-1999, "
- + "the XHTML 1.0 specification (W3C Recommendation, 26-Jan-2000, "
- + "revised 1-Aug-2002, "
- + "or the XHTML 1.1 specification (W3C Recommendation, 31-May-2001";
- private static final String VALIDITY = "An HTML file is valid if it is "
- + "well-formed and has a valid DOCTYPE declaration.";
- private static final String REPINFO = "Languages, title, META tags, "
- + "frames, links, scripts, images, citations, defined terms, "
- + "abbreviations, entities, Unicode entity blocks";
- private static final String NOTE = "";
- private static final String RIGHTS = "Copyright 2004-2007 by JSTOR and "
- + "the President and Fellows of Harvard College. "
- + "Released under the GNU Lesser General Public License.";
-
- /******************************************************************
- * PRIVATE INSTANCE FIELDS.
- ******************************************************************/
-
- /* Doctype extracted from document */
- protected String _doctype;
-
- /* Constants for the recognized flavors of HTML */
- public static final int HTML_3_2 = 1, HTML_4_0_STRICT = 2,
- HTML_4_0_FRAMESET = 3, HTML_4_0_TRANSITIONAL = 4,
- HTML_4_01_STRICT = 5, HTML_4_01_FRAMESET = 6,
- HTML_4_01_TRANSITIONAL = 7, XHTML_1_0_STRICT = 8,
- XHTML_1_0_TRANSITIONAL = 9, XHTML_1_0_FRAMESET = 10, XHTML_1_1 = 11;
-
- /* Profile names, matching the above indices */
- private static final String[] PROFILENAMES = { null, null, // there are no
- // profiles for
- // HTML 3.2
- STRICT, FRAMESET, TRANSITIONAL, STRICT, FRAMESET, TRANSITIONAL,
- STRICT, FRAMESET, TRANSITIONAL, null // there
- // are no
- // profiles
- // for
- // XHTML
- // 1.1
- };
-
- /* Version names, matching the above indices */
- private static final String[] VERSIONNAMES = { null, "HTML 3.2", HTML_4_0,
- HTML_4_0, HTML_4_0, HTML_4_01, HTML_4_01, HTML_4_01, XHTML_1_0,
- XHTML_1_0, XHTML_1_0, XHTML_1_1_STR };
-
- /* Flag to know if the property TextMDMetadata is to be added */
- protected boolean _withTextMD = false;
- /* Hold the information needed to generate a textMD metadata fragment */
- protected TextMDMetadata _textMD;
-
- /******************************************************************
- * CLASS CONSTRUCTOR.
- ******************************************************************/
- /**
- * Instantiate an HtmlModule object.
- */
- public HtmlModule() {
- super(NAME, RELEASE, DATE, FORMAT, COVERAGE, MIMETYPE, WELLFORMED,
- VALIDITY, REPINFO, NOTE, RIGHTS, false);
-
- _vendor = Agent.harvardInstance();
-
- /* HTML 3.2 spec */
- Document doc = new Document("HTML 3.2 Reference Specification",
- DocumentType.REPORT);
- Agent w3cAgent = Agent.newW3CInstance();
- doc.setPublisher(w3cAgent);
-
- Agent dRaggett = new Agent.Builder("Dave Raggett", AgentType.OTHER)
- .build();
- doc.setAuthor(dRaggett);
-
- doc.setDate("1997-01-14");
- doc.setIdentifier(
- new Identifier("http://www.w3c.org/TR/REC-html32-19970114",
- IdentifierType.URL));
- _specification.add(doc);
-
- /* HTML 4.0 spec */
- doc = new Document("HTML 4.0 Specification", DocumentType.REPORT);
- doc.setPublisher(w3cAgent);
- doc.setAuthor(dRaggett);
- Agent leHors = new Agent.Builder("Arnaud Le Hors", AgentType.OTHER)
- .build();
- doc.setAuthor(leHors);
- Agent jacobs = new Agent.Builder("Ian Jacobs", AgentType.OTHER).build();
- doc.setAuthor(jacobs);
- doc.setDate("1998-04-24");
- doc.setIdentifier(
- new Identifier("http://www.w3.org/TR/1998/REC-html40-19980424/",
- IdentifierType.URL));
- _specification.add(doc);
-
- /* HTML 4.01 spec */
- doc = new Document("HTML 4.01 Specification", DocumentType.REPORT);
- doc.setPublisher(w3cAgent);
- doc.setAuthor(dRaggett);
- doc.setAuthor(leHors);
- doc.setAuthor(jacobs);
- doc.setDate("1999-12-24");
- doc.setIdentifier(new Identifier(
- "http://www.w3.org/TR/1999/REC-html401-19991224/",
- IdentifierType.URL));
- _specification.add(doc);
-
- /* XHTML 1.0 spec */
- doc = new Document(
- "XHTML(TM) 1.0 The Extensible HyperText Markup Language "
- + "(Second Edition)",
- DocumentType.REPORT);
- doc.setPublisher(w3cAgent);
- doc.setDate("01-08-2002");
- doc.setIdentifier(new Identifier("http://www.w3.org/TR/xhtml1/",
- IdentifierType.URL));
- _specification.add(doc);
-
- /* XHTML 1.1 spec */
- doc = new Document(" XHTML(TM) 1.1 - Module-based XHTML",
- DocumentType.REPORT);
- doc.setPublisher(w3cAgent);
- doc.setDate("31-05-2001");
- doc.setIdentifier(new Identifier(
- "http://www.w3.org/TR/2001/REC-xhtml11-20010531/",
- IdentifierType.URL));
- _specification.add(doc);
-
- /*
- * XHTML 2.0 spec -- NOT included yet; this is presented in
- * "conditionalized-out" form just as a note for future expansion.
- * if (false) {
- * doc = new Document("XHTML 2.0, W3C Working Draft",
- * DocumentType.OTHER);
- * doc.setPublisher(w3cAgent);
- * doc.setDate("22-07-2004");
- * doc.setIdentifier(new Identifier(
- * "http://www.w3.org/TR/2004/WD-xhtml2-20040722/",
- * IdentifierType.URL));
- * _specification.add(doc);
- * }
- */
-
- Signature sig = new ExternalSignature(".html", SignatureType.EXTENSION,
- SignatureUseType.OPTIONAL);
- _signature.add(sig);
- sig = new ExternalSignature(".htm", SignatureType.EXTENSION,
- SignatureUseType.OPTIONAL);
- _signature.add(sig);
- }
-
- /**
- * Parse the content of a purported HTML stream digital object and store the
- * results in RepInfo.
- *
- *
- * @param stream
- * An InputStream, positioned at its beginning, which is
- * generated from the object to be parsed. If multiple calls
- * to
- * parse
are made on the basis of a nonzero value
- * being returned, a new InputStream must be provided each
- * time.
- *
- * @param info
- * A fresh (on the first call) RepInfo object which will be
- * modified to reflect the results of the parsing If multiple
- * calls to parse
are made on the basis of a
- * nonzero
- * value being returned, the same RepInfo object should be
- * passed
- * with each call.
- *
- * @param parseIndex
- * Must be 0 in first call to parse
. If
- * parse
returns a nonzero value, it must be
- * called
- * again with parseIndex
equal to that return
- * value.
- *
- * @return parseInt
- */
- @Override
- public int parse(InputStream stream, RepInfo info, int parseIndex) {
- if (parseIndex != 0) {
- // Coming in with parseIndex = 1 indicates that we've determined
- // this is XHTML; so we invoke the XML module to parse it.
- // If parseIndex is 100, this is the first invocation of the
- // XML module, so we call it with 0; otherwise we call it with
- // the value of parseIndex.
- if (isXmlAvailable()) {
- edu.harvard.hul.ois.jhove.module.XmlModule xmlMod = new edu.harvard.hul.ois.jhove.module.XmlModule();
- if (parseIndex == 100) {
- parseIndex = 0;
- }
- xmlMod.setApp(_app);
- xmlMod.setBase(_je);
- xmlMod.setDefaultParams(_defaultParams);
- try {
- xmlMod.applyDefaultParams();
- } catch (Exception e) {
- // really shouldn't happen
- }
- xmlMod.setXhtmlDoctype(_doctype);
- return xmlMod.parse(stream, info, parseIndex);
- }
- // The XML module shouldn't be missing from any installation,
- // but someone who really wanted to could remove it. In
- // that case, you deserve what you get.
- info.setMessage(new ErrorMessage(
- MessageConstants.JHOVE_1));
- info.setWellFormed(false); // Treat it as completely wrong
- return 0;
- }
- /* parseIndex = 0, first call only */
- _doctype = null;
- // Test if textMD is to be generated
- if (_defaultParams != null) {
- Iterator iter = _defaultParams.iterator();
- while (iter.hasNext()) {
- String param = (String) iter.next();
- if ("withtextmd=true".equalsIgnoreCase(param)) {
- _withTextMD = true;
- }
- }
- }
-
- initParse();
- info.setFormat(_format[0]);
- info.setMimeType(_mimeType[0]);
- info.setModule(this);
-
- if (_textMD == null || parseIndex == 0) {
- _textMD = new TextMDMetadata();
- }
- /*
- * We may have already done the checksums while converting a temporary
- * file.
- */
- setupDataStream(stream, info);
-
- ParseHtml parser;
- HtmlMetadata metadata = null;
- HtmlCharStream cstream;
- try {
- cstream = new HtmlCharStream(_dstream, "ISO-8859-1");
- parser = new ParseHtml(this, cstream);
- } catch (UnsupportedEncodingException e) {
- info.setMessage(new ErrorMessage(
- MessageConstants.JHOVE_2, e.getMessage()));
- info.setWellFormed(false);
- return 0; // shouldn't happen!
- }
- int type = 0;
- try {
- List elements = parser.HtmlDoc();
- if (elements.isEmpty()) {
- // Consider an empty document bad
- info.setWellFormed(false);
- info.setMessage(new ErrorMessage(
- MessageConstants.JHOVE_3));
- return 0;
- }
- type = checkDoctype(elements);
- if (type < 0) {
- info.setWellFormed(false);
- info.setMessage(new ErrorMessage(
- MessageConstants.HTML_HUL_15));
- return 0;
- }
- /*
- * Check if there is at least one html, head, body or title tag. A
- * plain text document might be interpreted as a single PCDATA,
- * which is in some ethereal sense well-formed HTML, but it's
- * pointless to consider it such. It might also use angle brackets
- * as a text delimiter, and that shouldn't count as HTML either.
- */
- boolean hasElements = false;
- Iterator iter = elements.iterator();
- while (iter.hasNext()) {
- Object o = iter.next();
- if (o instanceof JHOpenTag) {
- String name = ((JHOpenTag) o).getName();
- if ("html".equals(name) || "head".equals(name)
- || "body".equals(name) || "title".equals(name)) {
- hasElements = true;
- }
- break;
- }
- }
- if (!hasElements) {
- info.setMessage(new ErrorMessage(
- MessageConstants.HTML_HUL_17));
- info.setWellFormed(false);
- return 0;
- }
-
- // CRLF from HtmlCharStream ...
- String lineEnd = cstream.getKindOfLineEnd();
- if (lineEnd == null) {
- info.setMessage(
- new InfoMessage(MessageConstants.HTML_HUL_23));
- _textMD.setLinebreak(TextMDMetadata.NILL);
- } else if ("CR".equalsIgnoreCase(lineEnd)) {
- _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CR);
- } else if ("LF".equalsIgnoreCase(lineEnd)) {
- _textMD.setLinebreak(TextMDMetadata.LINEBREAK_LF);
- } else if ("CRLF".equalsIgnoreCase(lineEnd)) {
- _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CRLF);
- }
-
- if (type == 0) {
- /*
- * If we can't find a doctype, it still might be XHTML if the
- * elements start with an XML declaration and the root element
- * is "html"
- */
- switch (seemsToBeXHTML(elements)) {
- case 0: // Not XML
- break; // fall through
- case 1: // XML but not HTML
- info.setMessage(new ErrorMessage(
- MessageConstants.HTML_HUL_14));
- info.setWellFormed(false);
- return 0;
- case 2: // probably XHTML
- return 100;
- default:
- break;
- }
- info.setMessage(new ErrorMessage(
- MessageConstants.HTML_HUL_16));
- info.setValid(false);
- // But keep going
- }
-
- HtmlDocDesc docDesc = null;
- switch (type) {
- case HTML_3_2:
-
- case HTML_4_0_FRAMESET:
- docDesc = new Html4_0FrameDocDesc();
- _textMD.setMarkup_basis("HTML");
- _textMD.setMarkup_basis_version("4.0");
- break;
- case HTML_4_0_TRANSITIONAL:
- docDesc = new Html4_0TransDocDesc();
- _textMD.setMarkup_basis("HTML");
- _textMD.setMarkup_basis_version("4.0");
- break;
- case HTML_4_0_STRICT:
- docDesc = new Html4_0StrictDocDesc();
- _textMD.setMarkup_basis("HTML");
- _textMD.setMarkup_basis_version("4.0");
- break;
- case HTML_4_01_FRAMESET:
- docDesc = new Html4_01FrameDocDesc();
- _textMD.setMarkup_basis("HTML");
- _textMD.setMarkup_basis_version("4.01");
- break;
- case HTML_4_01_TRANSITIONAL:
- docDesc = new Html4_01TransDocDesc();
- _textMD.setMarkup_basis("HTML");
- _textMD.setMarkup_basis_version("4.01");
- break;
- case HTML_4_01_STRICT:
- docDesc = new Html4_01StrictDocDesc();
- _textMD.setMarkup_basis("HTML");
- _textMD.setMarkup_basis_version("4.01");
- break;
- case XHTML_1_0_STRICT:
- case XHTML_1_0_TRANSITIONAL:
- case XHTML_1_0_FRAMESET:
- case XHTML_1_1:
- // Force a second call to parse as XML. 100 is a
- // magic code for the first XML call.
- return 100;
- }
- _textMD.setMarkup_language(_doctype);
- if (docDesc == null) {
- info.setMessage(new InfoMessage(
- MessageConstants.HTML_HUL_22));
- docDesc = new Html3_2DocDesc();
- }
- docDesc.validate(elements, info);
- metadata = docDesc.getMetadata();
-
- // Try to get the charset from the meta Content
- if (metadata.getCharset() != null) {
- _textMD.setCharset(metadata.getCharset());
- } else {
- _textMD.setCharset(TextMDMetadata.CHARSET_ISO8859_1);
- }
- String textMDEncoding = _textMD.getCharset();
- if (textMDEncoding.contains("UTF")) {
- _textMD.setByte_order(_bigEndian ? TextMDMetadata.BYTE_ORDER_BIG
- : TextMDMetadata.BYTE_ORDER_LITTLE);
- _textMD.setByte_size("8");
- _textMD.setCharacter_size("variable");
- } else {
- _textMD.setByte_order(_bigEndian ? TextMDMetadata.BYTE_ORDER_BIG
- : TextMDMetadata.BYTE_ORDER_LITTLE);
- _textMD.setByte_size("8");
- _textMD.setCharacter_size("1");
- }
- } catch (ParseException e) {
- Token t = e.currentToken;
- info.setMessage(new ErrorMessage(
- MessageConstants.HTML_HUL_18,
- "Line = " + t.beginLine + ", column = " + t.beginColumn));
- info.setWellFormed(false);
- } catch (TokenMgrError f) {
- info.setMessage(new ErrorMessage(
- MessageConstants.HTML_HUL_19,
- f.getLocalizedMessage()));
- info.setWellFormed(false);
- }
-
- if (info.getWellFormed() == RepInfo.FALSE) {
- return 0;
- }
-
- if (type != 0) {
- if (PROFILENAMES[type] != null) {
- info.setProfile(PROFILENAMES[type]);
- }
- info.setVersion(VERSIONNAMES[type]);
- }
-
- if (metadata != null) {
- Property property = metadata
- .toProperty(_withTextMD ? _textMD : null);
- if (property != null) {
- info.setProperty(property);
- }
- }
-
- // Set the checksums in the report if they're calculated
- setChecksums(this._ckSummer, info);
-
- return 0;
- }
-
- /**
- * Check if the digital object conforms to this Module's internal signature
- * information.
- *
- * HTML is one of the most ill-defined of any open formats, so checking a
- * "signature" really means using some heuristics. The only required tag is
- * TITLE, but that could occur well into the file. So we look for any of
- * three strings -- taking into account case-independence and white space --
- * within the first sigBytes bytes, and call that a signature check.
- *
- * @param file
- * A File object for the object being parsed
- * @param stream
- * An InputStream, positioned at its beginning, which is
- * generated from the object to be parsed
- * @param info
- * A fresh RepInfo object which will be modified to reflect the
- * results of the test
- *
- * @throws IOException
- */
- @Override
- public void checkSignatures(File file, InputStream stream, RepInfo info)
- throws IOException {
- info.setFormat(_format[0]);
- info.setMimeType(_mimeType[0]);
- info.setModule(this);
- char[][] sigtext = new char[3][];
- sigtext[0] = "= 2) {
- firstElem = (JHElement) elements.get(1);
- }
- if (!(firstElem instanceof JHDoctype)) {
- return 0; // no DOCTYPE found
- }
- List dt = ((JHDoctype) firstElem).getDoctypeElements();
- if (dt.size() < 3) {
- return 0;
- }
- try {
- // Is DOCTYPE case sensitive? Assume not.
- String str = ((String) dt.get(0)).toUpperCase();
- if (!"HTML".equals(str)) {
- // It's not HTML
- return -1;
- }
- str = ((String) dt.get(1)).toUpperCase();
- if (!"PUBLIC".equals(str)) {
- return 0;
- }
- str = stripQuotes(((String) dt.get(2)).toUpperCase());
- _doctype = str;
- if (null != str)
- switch (str) {
- case "-//W3C//DTD HTML 3.2 FINAL//EN":
- case "-//W3C//DTD HTML 3.2//EN":
- return HTML_3_2;
- case "-//W3C//DTD HTML 4.0//EN":
- return HTML_4_0_STRICT;
- case "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN":
- return HTML_4_0_TRANSITIONAL;
- case "-//W3C//DTD HTML 4.0 FRAMESET//EN":
- return HTML_4_0_FRAMESET;
- case "-//W3C//DTD HTML 4.01//EN":
- return HTML_4_01_STRICT;
- case "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN":
- return HTML_4_01_TRANSITIONAL;
- case "-//W3C//DTD HTML 4.01 FRAMESET//EN":
- return HTML_4_01_FRAMESET;
+ /******************************************************************
+ * PRIVATE CLASS FIELDS.
+ ******************************************************************/
+ private static final String TRANSITIONAL = "Transitional";
+ private static final String STRICT = "Strict";
+ private static final String FRAMESET = "Frameset";
+ private static final String HTML_4_0 = "HTML 4.0";
+ private static final String HTML_4_01 = "HTML 4.01";
+ private static final String XHTML_1_0 = "XHTML 1.0";
+ private static final String XHTML_1_1_STR = "XHTML 1.1";
+
+ private static final String NAME = "HTML-hul";
+ private static final String RELEASE = "1.4.4";
+ private static final int[] DATE = { 2024, 8, 22 };
+ private static final String[] FORMAT = { "HTML" };
+ private static final String COVERAGE = "HTML 3.2, HTML 4.0 Strict,"
+ + "HTML 4.0 Transitional, HTML 4.0 Frameset, "
+ + "HTML 4.01 Strict, HTML 4.01 Transitional, HTML 4.01 Frameset"
+ + "XHTML 1.0 Strict, XHTML 1.0 Transitional, XHTML 1.0 Frameset"
+ + "XHTML 1.1";
+
+ private static final String[] MIMETYPE = { "text/html" };
+ private static final String WELLFORMED = "An HTML file is well-formed "
+ + "if it meets the criteria defined in the HTML 3.2 specification "
+ + "(W3C Recommendation, 14-Jan-1997), "
+ + "the HTML 4.0 specification (W3C Recommendation, 24-Apr-1998, "
+ + "the HTML 4.01 specification (W3C Recommendation, 24-Dec-1999, "
+ + "the XHTML 1.0 specification (W3C Recommendation, 26-Jan-2000, "
+ + "revised 1-Aug-2002, "
+ + "or the XHTML 1.1 specification (W3C Recommendation, 31-May-2001";
+ private static final String VALIDITY = "An HTML file is valid if it is "
+ + "well-formed and has a valid DOCTYPE declaration.";
+ private static final String REPINFO = "Languages, title, META tags, "
+ + "frames, links, scripts, images, citations, defined terms, "
+ + "abbreviations, entities, Unicode entity blocks";
+ private static final String NOTE = "";
+ private static final String RIGHTS = "Copyright 2004-2007 by JSTOR and "
+ + "the President and Fellows of Harvard College. "
+ + "Released under the GNU Lesser General Public License.";
+
+ /******************************************************************
+ * PRIVATE INSTANCE FIELDS.
+ ******************************************************************/
+
+ /* Doctype extracted from document */
+ protected String _doctype;
+
+ /* Constants for the recognized flavors of HTML */
+ public static final int HTML_3_2 = 1, HTML_4_0_STRICT = 2,
+ HTML_4_0_FRAMESET = 3, HTML_4_0_TRANSITIONAL = 4,
+ HTML_4_01_STRICT = 5, HTML_4_01_FRAMESET = 6,
+ HTML_4_01_TRANSITIONAL = 7, XHTML_1_0_STRICT = 8,
+ XHTML_1_0_TRANSITIONAL = 9, XHTML_1_0_FRAMESET = 10, XHTML_1_1 = 11;
+
+ /* Profile names, matching the above indices */
+ private static final String[] PROFILENAMES = { null, null, // there are no
+ // profiles for
+ // HTML 3.2
+ STRICT, FRAMESET, TRANSITIONAL, STRICT, FRAMESET, TRANSITIONAL,
+ STRICT, FRAMESET, TRANSITIONAL, null // there
+ // are no
+ // profiles
+ // for
+ // XHTML
+ // 1.1
+ };
+
+ /* Version names, matching the above indices */
+ private static final String[] VERSIONNAMES = { null, "HTML 3.2", HTML_4_0,
+ HTML_4_0, HTML_4_0, HTML_4_01, HTML_4_01, HTML_4_01, XHTML_1_0,
+ XHTML_1_0, XHTML_1_0, XHTML_1_1_STR };
+
+ /* Flag to know if the property TextMDMetadata is to be added */
+ protected boolean _withTextMD = false;
+ /* Hold the information needed to generate a textMD metadata fragment */
+ protected TextMDMetadata _textMD;
+
+ /******************************************************************
+ * CLASS CONSTRUCTOR.
+ ******************************************************************/
+ /**
+ * Instantiate an HtmlModule object.
+ */
+ public HtmlModule() {
+ super(NAME, RELEASE, DATE, FORMAT, COVERAGE, MIMETYPE, WELLFORMED,
+ VALIDITY, REPINFO, NOTE, RIGHTS, false);
+
+ _vendor = Agent.harvardInstance();
+
+ /* HTML 3.2 spec */
+ Document doc = new Document("HTML 3.2 Reference Specification",
+ DocumentType.REPORT);
+ Agent w3cAgent = Agent.newW3CInstance();
+ doc.setPublisher(w3cAgent);
+
+ Agent dRaggett = new Agent.Builder("Dave Raggett", AgentType.OTHER)
+ .build();
+ doc.setAuthor(dRaggett);
+
+ doc.setDate("1997-01-14");
+ doc.setIdentifier(
+ new Identifier("http://www.w3c.org/TR/REC-html32-19970114",
+ IdentifierType.URL));
+ _specification.add(doc);
+
+ /* HTML 4.0 spec */
+ doc = new Document("HTML 4.0 Specification", DocumentType.REPORT);
+ doc.setPublisher(w3cAgent);
+ doc.setAuthor(dRaggett);
+ Agent leHors = new Agent.Builder("Arnaud Le Hors", AgentType.OTHER)
+ .build();
+ doc.setAuthor(leHors);
+ Agent jacobs = new Agent.Builder("Ian Jacobs", AgentType.OTHER).build();
+ doc.setAuthor(jacobs);
+ doc.setDate("1998-04-24");
+ doc.setIdentifier(
+ new Identifier("http://www.w3.org/TR/1998/REC-html40-19980424/",
+ IdentifierType.URL));
+ _specification.add(doc);
+
+ /* HTML 4.01 spec */
+ doc = new Document("HTML 4.01 Specification", DocumentType.REPORT);
+ doc.setPublisher(w3cAgent);
+ doc.setAuthor(dRaggett);
+ doc.setAuthor(leHors);
+ doc.setAuthor(jacobs);
+ doc.setDate("1999-12-24");
+ doc.setIdentifier(new Identifier(
+ "http://www.w3.org/TR/1999/REC-html401-19991224/",
+ IdentifierType.URL));
+ _specification.add(doc);
+
+ /* XHTML 1.0 spec */
+ doc = new Document(
+ "XHTML(TM) 1.0 The Extensible HyperText Markup Language "
+ + "(Second Edition)",
+ DocumentType.REPORT);
+ doc.setPublisher(w3cAgent);
+ doc.setDate("01-08-2002");
+ doc.setIdentifier(new Identifier("http://www.w3.org/TR/xhtml1/",
+ IdentifierType.URL));
+ _specification.add(doc);
+
+ /* XHTML 1.1 spec */
+ doc = new Document(" XHTML(TM) 1.1 - Module-based XHTML",
+ DocumentType.REPORT);
+ doc.setPublisher(w3cAgent);
+ doc.setDate("31-05-2001");
+ doc.setIdentifier(new Identifier(
+ "http://www.w3.org/TR/2001/REC-xhtml11-20010531/",
+ IdentifierType.URL));
+ _specification.add(doc);
+
+ /*
+ * XHTML 2.0 spec -- NOT included yet; this is presented in
+ * "conditionalized-out" form just as a note for future expansion.
+ * if (false) {
+ * doc = new Document("XHTML 2.0, W3C Working Draft",
+ * DocumentType.OTHER);
+ * doc.setPublisher(w3cAgent);
+ * doc.setDate("22-07-2004");
+ * doc.setIdentifier(new Identifier(
+ * "http://www.w3.org/TR/2004/WD-xhtml2-20040722/",
+ * IdentifierType.URL));
+ * _specification.add(doc);
+ * }
+ */
+
+ Signature sig = new ExternalSignature(".html", SignatureType.EXTENSION,
+ SignatureUseType.OPTIONAL);
+ _signature.add(sig);
+ sig = new ExternalSignature(".htm", SignatureType.EXTENSION,
+ SignatureUseType.OPTIONAL);
+ _signature.add(sig);
+ }
+
+ /**
+ * Parse the content of a purported HTML stream digital object and store the
+ * results in RepInfo.
+ *
+ *
+ * @param stream
+ * An InputStream, positioned at its beginning, which is
+ * generated from the object to be parsed. If multiple calls
+ * to
+ * parse
are made on the basis of a nonzero value
+ * being returned, a new InputStream must be provided each
+ * time.
+ *
+ * @param info
+ * A fresh (on the first call) RepInfo object which will be
+ * modified to reflect the results of the parsing If multiple
+ * calls to parse
are made on the basis of a
+ * nonzero
+ * value being returned, the same RepInfo object should be
+ * passed
+ * with each call.
+ *
+ * @param parseIndex
+ * Must be 0 in first call to parse
. If
+ * parse
returns a nonzero value, it must be
+ * called
+ * again with parseIndex
equal to that return
+ * value.
+ *
+ * @return parseInt
+ */
+ @Override
+ public int parse(InputStream stream, RepInfo info, int parseIndex) {
+ if (parseIndex != 0) {
+ // Coming in with parseIndex = 1 indicates that we've determined
+ // this is XHTML; so we invoke the XML module to parse it.
+ // If parseIndex is 100, this is the first invocation of the
+ // XML module, so we call it with 0; otherwise we call it with
+ // the value of parseIndex.
+ if (isXmlAvailable()) {
+ edu.harvard.hul.ois.jhove.module.XmlModule xmlMod = new edu.harvard.hul.ois.jhove.module.XmlModule();
+ if (parseIndex == 100) {
+ parseIndex = 0;
+ }
+ xmlMod.setApp(_app);
+ xmlMod.setBase(_je);
+ xmlMod.setDefaultParams(_defaultParams);
+ try {
+ xmlMod.applyDefaultParams();
+ } catch (Exception e) {
+ // really shouldn't happen
+ }
+ xmlMod.setXhtmlDoctype(_doctype);
+ return xmlMod.parse(stream, info, parseIndex);
+ }
+ // The XML module shouldn't be missing from any installation,
+ // but someone who really wanted to could remove it. In
+ // that case, you deserve what you get.
+ info.setMessage(new ErrorMessage(
+ MessageConstants.JHOVE_1));
+ info.setWellFormed(false); // Treat it as completely wrong
+ return 0;
+ }
+ /* parseIndex = 0, first call only */
+ _doctype = null;
+ // Test if textMD is to be generated
+ if (_defaultParams != null) {
+ Iterator iter = _defaultParams.iterator();
+ while (iter.hasNext()) {
+ String param = (String) iter.next();
+ if ("withtextmd=true".equalsIgnoreCase(param)) {
+ _withTextMD = true;
+ }
+ }
+ }
+
+ initParse();
+ info.setFormat(_format[0]);
+ info.setMimeType(_mimeType[0]);
+ info.setModule(this);
+
+ if (_textMD == null || parseIndex == 0) {
+ _textMD = new TextMDMetadata();
+ }
+ /*
+ * We may have already done the checksums while converting a temporary
+ * file.
+ */
+ setupDataStream(stream, info);
+
+ ParseHtml parser;
+ HtmlMetadata metadata = null;
+ HtmlCharStream cstream;
+ try {
+ cstream = new HtmlCharStream(_dstream, "ISO-8859-1");
+ parser = new ParseHtml(this, cstream);
+ } catch (UnsupportedEncodingException e) {
+ info.setMessage(new ErrorMessage(
+ MessageConstants.JHOVE_2, e.getMessage()));
+ info.setWellFormed(false);
+ return 0; // shouldn't happen!
+ }
+ int type = 0;
+ try {
+ List elements = parser.HtmlDoc();
+ if (elements.isEmpty()) {
+ // Consider an empty document bad
+ info.setWellFormed(false);
+ info.setMessage(new ErrorMessage(
+ MessageConstants.JHOVE_3));
+ return 0;
+ }
+ type = checkDoctype(elements);
+ if (type < 0) {
+ info.setWellFormed(false);
+ info.setMessage(new ErrorMessage(
+ MessageConstants.HTML_HUL_15));
+ return 0;
+ }
+ /*
+ * Check if there is at least one html, head, body or title tag. A
+ * plain text document might be interpreted as a single PCDATA,
+ * which is in some ethereal sense well-formed HTML, but it's
+ * pointless to consider it such. It might also use angle brackets
+ * as a text delimiter, and that shouldn't count as HTML either.
+ */
+ boolean hasElements = false;
+ Iterator iter = elements.iterator();
+ while (iter.hasNext()) {
+ Object o = iter.next();
+ if (o instanceof JHOpenTag) {
+ String name = ((JHOpenTag) o).getName();
+ if ("html".equals(name) || "head".equals(name)
+ || "body".equals(name) || "title".equals(name)) {
+ hasElements = true;
+ }
+ break;
+ }
+ }
+ if (!hasElements) {
+ info.setMessage(new ErrorMessage(
+ MessageConstants.HTML_HUL_17));
+ info.setWellFormed(false);
+ return 0;
+ }
+
+ // CRLF from HtmlCharStream ...
+ String lineEnd = cstream.getKindOfLineEnd();
+ if (lineEnd == null) {
+ info.setMessage(
+ new InfoMessage(MessageConstants.HTML_HUL_23));
+ _textMD.setLinebreak(TextMDMetadata.NILL);
+ } else if ("CR".equalsIgnoreCase(lineEnd)) {
+ _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CR);
+ } else if ("LF".equalsIgnoreCase(lineEnd)) {
+ _textMD.setLinebreak(TextMDMetadata.LINEBREAK_LF);
+ } else if ("CRLF".equalsIgnoreCase(lineEnd)) {
+ _textMD.setLinebreak(TextMDMetadata.LINEBREAK_CRLF);
+ }
+
+ if (type == 0) {
+ /*
+ * If we can't find a doctype, it still might be XHTML if the
+ * elements start with an XML declaration and the root element
+ * is "html"
+ */
+ switch (seemsToBeXHTML(elements)) {
+ case 0: // Not XML
+ break; // fall through
+ case 1: // XML but not HTML
+ info.setMessage(new ErrorMessage(
+ MessageConstants.HTML_HUL_14));
+ info.setWellFormed(false);
+ return 0;
+ case 2: // probably XHTML
+ return 100;
+ default:
+ break;
+ }
+ info.setMessage(new ErrorMessage(
+ MessageConstants.HTML_HUL_16));
+ info.setValid(false);
+ // But keep going
+ }
+
+ HtmlDocDesc docDesc = null;
+ switch (type) {
+ case HTML_3_2:
+
+ case HTML_4_0_FRAMESET:
+ docDesc = new Html4_0FrameDocDesc();
+ _textMD.setMarkup_basis("HTML");
+ _textMD.setMarkup_basis_version("4.0");
+ break;
+ case HTML_4_0_TRANSITIONAL:
+ docDesc = new Html4_0TransDocDesc();
+ _textMD.setMarkup_basis("HTML");
+ _textMD.setMarkup_basis_version("4.0");
+ break;
+ case HTML_4_0_STRICT:
+ docDesc = new Html4_0StrictDocDesc();
+ _textMD.setMarkup_basis("HTML");
+ _textMD.setMarkup_basis_version("4.0");
+ break;
+ case HTML_4_01_FRAMESET:
+ docDesc = new Html4_01FrameDocDesc();
+ _textMD.setMarkup_basis("HTML");
+ _textMD.setMarkup_basis_version("4.01");
+ break;
+ case HTML_4_01_TRANSITIONAL:
+ docDesc = new Html4_01TransDocDesc();
+ _textMD.setMarkup_basis("HTML");
+ _textMD.setMarkup_basis_version("4.01");
+ break;
+ case HTML_4_01_STRICT:
+ docDesc = new Html4_01StrictDocDesc();
+ _textMD.setMarkup_basis("HTML");
+ _textMD.setMarkup_basis_version("4.01");
+ break;
+ case XHTML_1_0_STRICT:
+ case XHTML_1_0_TRANSITIONAL:
+ case XHTML_1_0_FRAMESET:
+ case XHTML_1_1:
+ // Force a second call to parse as XML. 100 is a
+ // magic code for the first XML call.
+ return 100;
+ }
+ _textMD.setMarkup_language(_doctype);
+ if (docDesc == null) {
+ info.setMessage(new InfoMessage(
+ MessageConstants.HTML_HUL_22));
+ docDesc = new Html3_2DocDesc();
+ }
+ docDesc.validate(elements, info);
+ metadata = docDesc.getMetadata();
+
+ // Try to get the charset from the meta Content
+ if (metadata.getCharset() != null) {
+ _textMD.setCharset(metadata.getCharset());
+ } else {
+ _textMD.setCharset(TextMDMetadata.CHARSET_ISO8859_1);
+ }
+ String textMDEncoding = _textMD.getCharset();
+ if (textMDEncoding.contains("UTF")) {
+ _textMD.setByte_order(_bigEndian ? TextMDMetadata.BYTE_ORDER_BIG
+ : TextMDMetadata.BYTE_ORDER_LITTLE);
+ _textMD.setByte_size("8");
+ _textMD.setCharacter_size("variable");
+ } else {
+ _textMD.setByte_order(_bigEndian ? TextMDMetadata.BYTE_ORDER_BIG
+ : TextMDMetadata.BYTE_ORDER_LITTLE);
+ _textMD.setByte_size("8");
+ _textMD.setCharacter_size("1");
+ }
+ } catch (ParseException e) {
+ Token t = e.currentToken;
+ info.setMessage(new ErrorMessage(
+ MessageConstants.HTML_HUL_18,
+ "Line = " + t.beginLine + ", column = " + t.beginColumn));
+ info.setWellFormed(false);
+ } catch (TokenMgrError f) {
+ info.setMessage(new ErrorMessage(
+ MessageConstants.HTML_HUL_19,
+ f.getLocalizedMessage()));
+ info.setWellFormed(false);
+ }
+
+ if (info.getWellFormed() == RepInfo.FALSE) {
+ return 0;
+ }
+
+ if (type != 0) {
+ if (PROFILENAMES[type] != null) {
+ info.setProfile(PROFILENAMES[type]);
+ }
+ info.setVersion(VERSIONNAMES[type]);
+ }
+
+ if (metadata != null) {
+ Property property = metadata
+ .toProperty(_withTextMD ? _textMD : null);
+ if (property != null) {
+ info.setProperty(property);
+ }
+ }
+
+ // Set the checksums in the report if they're calculated
+ setChecksums(this._ckSummer, info);
+
+ return 0;
+ }
+
+ /**
+ * Check if the digital object conforms to this Module's internal signature
+ * information.
+ *
+ * HTML is one of the most ill-defined of any open formats, so checking a
+ * "signature" really means using some heuristics. The only required tag is
+ * TITLE, but that could occur well into the file. So we look for any of
+ * three strings -- taking into account case-independence and white space --
+ * within the first sigBytes bytes, and call that a signature check.
+ *
+ * @param file
+ * A File object for the object being parsed
+ * @param stream
+ * An InputStream, positioned at its beginning, which is
+ * generated from the object to be parsed
+ * @param info
+ * A fresh RepInfo object which will be modified to reflect the
+ * results of the test
+ *
+ * @throws IOException
+ */
+ @Override
+ public void checkSignatures(File file, InputStream stream, RepInfo info)
+ throws IOException {
+ info.setFormat(_format[0]);
+ info.setMimeType(_mimeType[0]);
+ info.setModule(this);
+ char[][] sigtext = new char[3][];
+ sigtext[0] = "= 2) {
+ firstElem = (JHElement) elements.get(1);
+ }
+ if (!(firstElem instanceof JHDoctype)) {
+ return 0; // no DOCTYPE found
+ }
+ List dt = ((JHDoctype) firstElem).getDoctypeElements();
+ if (dt.size() < 3) {
+ return 0;
+ }
+ try {
+ // Is DOCTYPE case sensitive? Assume not.
+ String str = ((String) dt.get(0)).toUpperCase();
+ if (!"HTML".equals(str)) {
+ // It's not HTML
+ return -1;
+ }
+ str = ((String) dt.get(1)).toUpperCase();
+ if (!"PUBLIC".equals(str)) {
+ return 0;
+ }
+ str = stripQuotes(((String) dt.get(2)).toUpperCase());
+ _doctype = str;
+ if (null != str)
+ switch (str) {
+ case "-//W3C//DTD HTML 3.2 FINAL//EN":
+ case "-//W3C//DTD HTML 3.2//EN":
+ return HTML_3_2;
+ case "-//W3C//DTD HTML 4.0//EN":
+ return HTML_4_0_STRICT;
+ case "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN":
+ return HTML_4_0_TRANSITIONAL;
+ case "-//W3C//DTD HTML 4.0 FRAMESET//EN":
+ return HTML_4_0_FRAMESET;
+ case "-//W3C//DTD HTML 4.01//EN":
+ return HTML_4_01_STRICT;
+ case "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN":
+ return HTML_4_01_TRANSITIONAL;
+ case "-//W3C//DTD HTML 4.01 FRAMESET//EN":
+ return HTML_4_01_FRAMESET;
case "-//W3C//DTD XHTML 1.0 STRICT//EN":
return XHTML_1_0_STRICT;
case "-//W3C//DTD XHTML 1.0 TRANSITIONAL//EN":
@@ -685,68 +685,68 @@ protected int checkDoctype(List elements) {
case "-//W3C//DTD XHTML 1.1//EN":
return XHTML_1_1;
default:
- break;
- }
- } catch (Exception e) {
- // Really shouldn't happen, but if it does we've got
- // a bad doctype
- return 0;
- }
- return 0;
- }
-
- /*
- * See if this document, even if it lacks a doctype, is most likely XHTML.
- * The test is that the document starts with an XML declaration and has
- * "html" for its first tag.
- *
- * Returns: 0 if there's no XML declaration 1 if there's an XML declaration
- * but no html tag; in this case it's probably some other kind of XML 2 if
- * there's an XML declaration and an html tag
- */
- protected int seemsToBeXHTML(List elements) {
- JHElement elem;
- try {
- elem = (JHElement) elements.get(0);
- if (!(elem instanceof JHXmlDecl)) {
- return 0;
- }
- Iterator iter = elements.iterator();
- while (iter.hasNext()) {
- elem = (JHElement) iter.next();
- if (elem instanceof JHOpenTag) {
- JHOpenTag tag = (JHOpenTag) elem;
- return ("html".equals(tag.getName()) ? 2 : 1);
- }
- }
- } catch (Exception e) {
- return 0; // document must be really empty
- }
- return 1;
- }
-
- /*
- * Remove quotes from the beginning and end of a string. If it doesn't have
- * quotes in both places, leave it alone.
- */
- protected String stripQuotes(String str) {
- int len = str.length();
- if (str.charAt(0) == '"' && str.charAt(len - 1) == '"') {
- return str.substring(1, len - 1);
- }
- return str;
- }
-
- /*
- * Checks if the XML module is available.
- */
- protected static boolean isXmlAvailable() {
- try {
- Class.forName("edu.harvard.hul.ois.jhove.module.XmlModule");
- return true;
- } catch (Exception e) {
- return false;
- }
- }
+ break;
+ }
+ } catch (Exception e) {
+ // Really shouldn't happen, but if it does we've got
+ // a bad doctype
+ return 0;
+ }
+ return 0;
+ }
+
+ /*
+ * See if this document, even if it lacks a doctype, is most likely XHTML.
+ * The test is that the document starts with an XML declaration and has
+ * "html" for its first tag.
+ *
+ * Returns: 0 if there's no XML declaration 1 if there's an XML declaration
+ * but no html tag; in this case it's probably some other kind of XML 2 if
+ * there's an XML declaration and an html tag
+ */
+ protected int seemsToBeXHTML(List elements) {
+ JHElement elem;
+ try {
+ elem = (JHElement) elements.get(0);
+ if (!(elem instanceof JHXmlDecl)) {
+ return 0;
+ }
+ Iterator iter = elements.iterator();
+ while (iter.hasNext()) {
+ elem = (JHElement) iter.next();
+ if (elem instanceof JHOpenTag) {
+ JHOpenTag tag = (JHOpenTag) elem;
+ return ("html".equals(tag.getName()) ? 2 : 1);
+ }
+ }
+ } catch (Exception e) {
+ return 0; // document must be really empty
+ }
+ return 1;
+ }
+
+ /*
+ * Remove quotes from the beginning and end of a string. If it doesn't have
+ * quotes in both places, leave it alone.
+ */
+ protected String stripQuotes(String str) {
+ int len = str.length();
+ if (str.charAt(0) == '"' && str.charAt(len - 1) == '"') {
+ return str.substring(1, len - 1);
+ }
+ return str;
+ }
+
+ /*
+ * Checks if the XML module is available.
+ */
+ protected static boolean isXmlAvailable() {
+ try {
+ Class.forName("edu.harvard.hul.ois.jhove.module.XmlModule");
+ return true;
+ } catch (Exception e) {
+ return false;
+ }
+ }
}
diff --git a/jhove-modules/jpeg-hul/pom.xml b/jhove-modules/jpeg-hul/pom.xml
index ab0eebcaf..3dd165646 100644
--- a/jhove-modules/jpeg-hul/pom.xml
+++ b/jhove-modules/jpeg-hul/pom.xml
@@ -3,7 +3,7 @@
org.openpreservation.jhove.modules
jhove-modules
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
jpeg-hul
1.5.4
@@ -14,7 +14,7 @@
org.openpreservation.jhove.modules
tiff-hul
- 1.9.4
+ 1.9.5
diff --git a/jhove-modules/jpeg2000-hul/pom.xml b/jhove-modules/jpeg2000-hul/pom.xml
index 77ed54d56..5bac0118c 100644
--- a/jhove-modules/jpeg2000-hul/pom.xml
+++ b/jhove-modules/jpeg2000-hul/pom.xml
@@ -3,7 +3,7 @@
org.openpreservation.jhove.modules
jhove-modules
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
jpeg2000-hul
1.4.4
diff --git a/jhove-modules/pdf-hul/pom.xml b/jhove-modules/pdf-hul/pom.xml
index 3e5a435ec..9b9893727 100644
--- a/jhove-modules/pdf-hul/pom.xml
+++ b/jhove-modules/pdf-hul/pom.xml
@@ -3,10 +3,10 @@
org.openpreservation.jhove.modules
jhove-modules
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
pdf-hul
- 1.12.6
+ 1.12.7
JHOVE PDF Module HUL
PDF module developed by Harvard University Library
diff --git a/jhove-modules/pdf-hul/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java b/jhove-modules/pdf-hul/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java
index 012cc9428..c3014d307 100644
--- a/jhove-modules/pdf-hul/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java
+++ b/jhove-modules/pdf-hul/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java
@@ -380,8 +380,8 @@ public class PdfModule extends ModuleBase {
******************************************************************/
private static final String NAME = "PDF-hul";
- private static final String RELEASE = "1.12.6";
- private static final int[] DATE = { 2024, 07, 31 };
+ private static final String RELEASE = "1.12.7";
+ private static final int[] DATE = { 2024, 8, 22 };
private static final String[] FORMAT = { "PDF",
"Portable Document Format" };
private static final String COVERAGE = "PDF 1.0-1.6; "
diff --git a/jhove-modules/pdf-hul/src/test/java/edu/harvard/hul/ois/jhove/module/pdf/LiteralTests.java b/jhove-modules/pdf-hul/src/test/java/edu/harvard/hul/ois/jhove/module/pdf/LiteralTests.java
index 820ac0b19..930665db2 100644
--- a/jhove-modules/pdf-hul/src/test/java/edu/harvard/hul/ois/jhove/module/pdf/LiteralTests.java
+++ b/jhove-modules/pdf-hul/src/test/java/edu/harvard/hul/ois/jhove/module/pdf/LiteralTests.java
@@ -2,10 +2,6 @@
import static org.junit.Assert.assertNotNull;
-import java.util.Calendar;
-import java.util.Date;
-import java.util.TimeZone;
-
import org.junit.Test;
/**
diff --git a/jhove-modules/pom.xml b/jhove-modules/pom.xml
index cbdf843cd..af3133987 100644
--- a/jhove-modules/pom.xml
+++ b/jhove-modules/pom.xml
@@ -5,13 +5,13 @@
org.openpreservation.jhove
jhove
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
org.openpreservation.jhove.modules
jhove-modules
pom
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
JHOVE Validation Modules
The JHOVE HUL validation modules.
@@ -19,7 +19,7 @@
org.openpreservation.jhove
jhove-core
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
org.junit.vintage
diff --git a/jhove-modules/tiff-hul/pom.xml b/jhove-modules/tiff-hul/pom.xml
index 34ae9f03d..dc4661644 100644
--- a/jhove-modules/tiff-hul/pom.xml
+++ b/jhove-modules/tiff-hul/pom.xml
@@ -3,10 +3,10 @@
org.openpreservation.jhove.modules
jhove-modules
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
tiff-hul
- 1.9.4
+ 1.9.5
JHOVE TIFF Module HUL
TIFF module developed by Harvard University Library
diff --git a/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/TiffModule.java b/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/TiffModule.java
index 7d0d6318f..26c61c894 100644
--- a/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/TiffModule.java
+++ b/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/TiffModule.java
@@ -121,8 +121,8 @@ public class TiffModule extends ModuleBase {
protected Logger _logger;
private static final String NAME = "TIFF-hul";
- private static final String RELEASE = "1.9.4";
- private static final int[] DATE = { 2023, 03, 16 };
+ private static final String RELEASE = "1.9.5";
+ private static final int[] DATE = { 2024, 8, 22 };
private static final String[] FORMAT = { "TIFF", "Tagged Image File Format" };
private static final String COVERAGE = "TIFF 4.0, 5.0, and 6.0; "
+ "TIFF/IT (ISO/DIS 12639:2003), including file types CT, LW, HC, MP, "
@@ -1228,7 +1228,7 @@ protected IFD parseIFDChain(long next, RepInfo info, int type,
ifd.setThumbnail(true);
}
list.add(ifd);
-
+
if (list.size() > 50) {
throw new TiffException(MessageConstants.TIFF_HUL_60);
}
diff --git a/jhove-modules/utf8-hul/pom.xml b/jhove-modules/utf8-hul/pom.xml
index 3798fb6fd..568545ac3 100644
--- a/jhove-modules/utf8-hul/pom.xml
+++ b/jhove-modules/utf8-hul/pom.xml
@@ -3,7 +3,7 @@
org.openpreservation.jhove.modules
jhove-modules
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
utf8-hul
1.7.3
@@ -19,7 +19,7 @@
org.openpreservation.jhove.modules
pdf-hul
- 1.12.1
+ 1.12.7
test
diff --git a/jhove-modules/wave-hul/pom.xml b/jhove-modules/wave-hul/pom.xml
index b03bf26cb..7dae61bda 100644
--- a/jhove-modules/wave-hul/pom.xml
+++ b/jhove-modules/wave-hul/pom.xml
@@ -3,7 +3,7 @@
org.openpreservation.jhove.modules
jhove-modules
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
wave-hul
1.8.3
diff --git a/jhove-modules/xml-hul/pom.xml b/jhove-modules/xml-hul/pom.xml
index cf18c4cca..c074ddec0 100644
--- a/jhove-modules/xml-hul/pom.xml
+++ b/jhove-modules/xml-hul/pom.xml
@@ -3,10 +3,10 @@
org.openpreservation.jhove.modules
jhove-modules
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
xml-hul
- 1.5.4
+ 1.5.5
JHOVE XML Module HUL
XML module developed by Harvard University Library
diff --git a/jhove-modules/xml-hul/src/main/java/edu/harvard/hul/ois/jhove/module/XmlModule.java b/jhove-modules/xml-hul/src/main/java/edu/harvard/hul/ois/jhove/module/XmlModule.java
index 374243814..1abf44cff 100644
--- a/jhove-modules/xml-hul/src/main/java/edu/harvard/hul/ois/jhove/module/XmlModule.java
+++ b/jhove-modules/xml-hul/src/main/java/edu/harvard/hul/ois/jhove/module/XmlModule.java
@@ -49,8 +49,8 @@
public class XmlModule extends ModuleBase {
private static final String NAME = "XML-hul";
- private static final String RELEASE = "1.5.4";
- private static final int[] DATE = { 2024, 03, 05 };
+ private static final String RELEASE = "1.5.5";
+ private static final int[] DATE = { 2024, 8, 22 };
private static final String[] FORMAT = { "XML", "XHTML" };
private static final String COVERAGE = "XML 1.0";
private static final String[] MIMETYPE = { "text/xml", "application/xml",
diff --git a/pom.xml b/pom.xml
index f63ee57db..02ce407c8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -10,7 +10,7 @@
org.openpreservation.jhove
jhove
- 1.31.0-SNAPSHOT
+ 1.32.0-RC1
pom
JHOVE - JSTOR/Harvard Object Validation Environment