+** @{
*/
TIDY_EXPORT TidyDoc tidyCreate();
TIDY_EXPORT void tidyRelease( TidyDoc tdoc );
-/* Let application store a chunk of data w/ each Tidy instance.
-** Useful for callbacks.
+/** Let application store a chunk of data w/ each Tidy instance.
+** Useful for callbacks.
*/
TIDY_EXPORT void tidySetAppData( TidyDoc tdoc, uint appData );
+
+/** Get application data set previously */
TIDY_EXPORT uint tidyGetAppData( TidyDoc tdoc );
+/** Get release date (version) for current library */
TIDY_EXPORT ctmbstr tidyReleaseDate();
-/* Diagnostics and Repair */
+/* Diagnostics and Repair
+*/
+
+/** Get status of current document. */
TIDY_EXPORT int tidyStatus( TidyDoc tdoc );
-TIDY_EXPORT int tidyDetectedHtmlVersion( TidyDoc tdoc ); /* 0, 2, 3 or 4 */
+
+/** Detected HTML version: 0, 2, 3 or 4 */
+TIDY_EXPORT int tidyDetectedHtmlVersion( TidyDoc tdoc );
+
+/** Input is XHTML? */
TIDY_EXPORT Bool tidyDetectedXhtml( TidyDoc tdoc );
+
+/** Input is generic XML (not HTML or XHTML)? */
TIDY_EXPORT Bool tidyDetectedGenericXml( TidyDoc tdoc );
+/** Number of Tidy errors encountered. If > 0, output is suppressed
+** unless TidyForceOutput is set.
+*/
TIDY_EXPORT uint tidyErrorCount( TidyDoc tdoc );
+
+/** Number of Tidy warnings encountered. */
TIDY_EXPORT uint tidyWarningCount( TidyDoc tdoc );
+
+/** Number of Tidy accessibility warnings encountered. */
TIDY_EXPORT uint tidyAccessWarningCount( TidyDoc tdoc );
+
+/** Number of Tidy configuration errors encountered. */
TIDY_EXPORT uint tidyConfigErrorCount( TidyDoc tdoc );
/* Get/Set configuration options
*/
+/** Load an ASCII Tidy configuration file */
TIDY_EXPORT int tidyLoadConfig( TidyDoc tdoc, ctmbstr configFile );
-TIDY_EXPORT int tidyLoadConfigEnc( TidyDoc tdoc,
- ctmbstr configFile, ctmbstr charenc );
+/** Load a Tidy configuration file with the specified character encoding */
+TIDY_EXPORT int tidyLoadConfigEnc( TidyDoc tdoc, ctmbstr configFile,
+ ctmbstr charenc );
+
+/** Set the input/output character encoding for parsing markup.
+** Values include: ascii, latin1, raw, utf8, iso2022, mac,
+** win1252, utf16le, utf16be, utf16, big5 and shiftjis. Case in-sensitive.
+*/
TIDY_EXPORT int tidySetCharEncoding( TidyDoc tdoc, ctmbstr encnam );
-/* Enumerate configuration options
+/** @} end Basic group */
+
+
+/** @defgroup Configuration Configuration Options
+**
+** Functions for getting and setting Tidy configuration options.
+** @{
*/
+/** Applications using TidyLib may want to augment command-line and
+** configuration file options. Setting this callback allows an application
+** developer to examine command-line and configuration file options after
+** TidyLib has examined them and failed to recognize them.
+**/
+
+typedef Bool (*TidyOptCallback)( ctmbstr option, ctmbstr value );
+
+TIDY_EXPORT Bool tidySetOptionCallback( TidyDoc tdoc, TidyOptCallback pOptCallback );
+
+/** Get option ID by name */
TIDY_EXPORT TidyOptionId tidyOptGetIdForName( ctmbstr optnam );
+/** Get iterator for list of option */
+/**
+Example:
+
+*/
+
TIDY_EXPORT TidyIterator tidyGetOptionList( TidyDoc tdoc );
+/** Get next Option */
TIDY_EXPORT TidyOption tidyGetNextOption( TidyDoc tdoc, TidyIterator* pos );
+/** Lookup option by ID */
TIDY_EXPORT TidyOption tidyGetOption( TidyDoc tdoc, TidyOptionId optId );
+/** Lookup option by name */
TIDY_EXPORT TidyOption tidyGetOptionByName( TidyDoc tdoc, ctmbstr optnam );
+/** Get ID of given Option */
TIDY_EXPORT TidyOptionId tidyOptGetId( TidyOption opt );
+
+/** Get name of given Option */
TIDY_EXPORT ctmbstr tidyOptGetName( TidyOption opt );
+
+/** Get datatype of given Option */
TIDY_EXPORT TidyOptionType tidyOptGetType( TidyOption opt );
+
+/** Is Option read-only? */
TIDY_EXPORT Bool tidyOptIsReadOnly( TidyOption opt );
+
+/** Get category of given Option */
TIDY_EXPORT TidyConfigCategory tidyOptGetCategory( TidyOption opt );
+
+/** Get default value of given Option as a string */
TIDY_EXPORT ctmbstr tidyOptGetDefault( TidyOption opt );
+
+/** Get default value of given Option as an unsigned integer */
TIDY_EXPORT uint tidyOptGetDefaultInt( TidyOption opt );
+
+/** Get default value of given Option as a Boolean value */
TIDY_EXPORT Bool tidyOptGetDefaultBool( TidyOption opt );
+/** Iterate over Option "pick list" */
TIDY_EXPORT TidyIterator tidyOptGetPickList( TidyOption opt );
+/** Get next string value of Option "pick list" */
TIDY_EXPORT ctmbstr tidyOptGetNextPick( TidyOption opt, TidyIterator* pos );
+/** Get current Option value as a string */
TIDY_EXPORT ctmbstr tidyOptGetValue( TidyDoc tdoc, TidyOptionId optId );
+/** Set Option value as a string */
TIDY_EXPORT Bool tidyOptSetValue( TidyDoc tdoc, TidyOptionId optId, ctmbstr val );
+/** Set named Option value as a string. Good if not sure of type. */
TIDY_EXPORT Bool tidyOptParseValue( TidyDoc tdoc, ctmbstr optnam, ctmbstr val );
+/** Get current Option value as an integer */
TIDY_EXPORT uint tidyOptGetInt( TidyDoc tdoc, TidyOptionId optId );
+/** Set Option value as an integer */
TIDY_EXPORT Bool tidyOptSetInt( TidyDoc tdoc, TidyOptionId optId, uint val );
+/** Get current Option value as a Boolean flag */
TIDY_EXPORT Bool tidyOptGetBool( TidyDoc tdoc, TidyOptionId optId );
+/** Set Option value as a Boolean flag */
TIDY_EXPORT Bool tidyOptSetBool( TidyDoc tdoc, TidyOptionId optId, Bool val );
+/** Reset option to default value by ID */
TIDY_EXPORT Bool tidyOptResetToDefault( TidyDoc tdoc, TidyOptionId opt );
+/** Reset all options to their default values */
TIDY_EXPORT Bool tidyOptResetAllToDefault( TidyDoc tdoc );
-/* reset to config (after document processing) */
+/** Take a snapshot of current config settings */
TIDY_EXPORT Bool tidyOptSnapshot( TidyDoc tdoc );
+/** Reset config settings to snapshot (after document processing) */
TIDY_EXPORT Bool tidyOptResetToSnapshot( TidyDoc tdoc );
+/** Any settings different than default? */
TIDY_EXPORT Bool tidyOptDiffThanDefault( TidyDoc tdoc );
+/** Any settings different than snapshot? */
TIDY_EXPORT Bool tidyOptDiffThanSnapshot( TidyDoc tdoc );
+/** Copy current configuration settings from one document to another */
TIDY_EXPORT Bool tidyOptCopyConfig( TidyDoc tdocTo, TidyDoc tdocFrom );
+/** Get character encoding name. Used with TidyCharEncoding,
+** TidyOutCharEncoding, TidyInCharEncoding */
TIDY_EXPORT ctmbstr tidyOptGetEncName( TidyDoc tdoc, TidyOptionId optId );
+
+/** Get current pick list value for option by ID. Useful for enum types. */
TIDY_EXPORT ctmbstr tidyOptGetCurrPick( TidyDoc tdoc, TidyOptionId optId);
+/** Iterate over user declared tags */
TIDY_EXPORT TidyIterator tidyOptGetDeclTagList( TidyDoc tdoc );
+/** Get next declared tag of specified type: TidyInlineTags, TidyBlockTags,
+** TidyEmptyTags, TidyPreTags */
TIDY_EXPORT ctmbstr tidyOptGetNextDeclTag( TidyDoc tdoc,
TidyOptionId optId,
TidyIterator* iter );
+/** @} end Configuration group */
-/* I/O and Message handling interface
+/** @defgroup IO I/O and Messages
**
** By default, Tidy will define, create and use
** instances of input and output handlers for
@@ -189,32 +363,42 @@ TIDY_EXPORT ctmbstr tidyOptGetNextDeclTag( TidyDoc tdoc,
** respectively. A FILE* cfgFile input handler
** will be used for config files. Command line
** options will just be set directly.
+**
+** @{
*/
/*****************
Input Source
*****************/
+/** Input Callback: get next byte of input */
typedef int (*TidyGetByteFunc)( uint sourceData );
+
+/** Input Callback: unget a byte of input */
typedef void (*TidyUngetByteFunc)( uint sourceData, byte bt );
+
+/** Input Callback: is end of input? */
typedef Bool (*TidyEOFFunc)( uint sourceData );
+/** End of input "character" */
#define EndOfStream (~0u)
+/** TidyInputSource - Delivers raw bytes of input
+*/
TIDY_STRUCT
typedef struct _TidyInputSource
{
/* Instance data */
- uint sourceData;
+ uint sourceData; /**< Input context. Passed to callbacks */
/* Methods */
- TidyGetByteFunc getByte;
- TidyUngetByteFunc ungetByte;
- TidyEOFFunc eof;
+ TidyGetByteFunc getByte; /**< Pointer to "get byte" callback */
+ TidyUngetByteFunc ungetByte; /**< Pointer to "unget" callback */
+ TidyEOFFunc eof; /**< Pointer to "eof" callback */
} TidyInputSource;
-/* Facilitates user defined source by providing
-** an entry point to marshal pointers-to-functions.
-** Needed by .NET and possibly other language bindings.
+/** Facilitates user defined source by providing
+** an entry point to marshal pointers-to-functions.
+** Needed by .NET and possibly other language bindings.
*/
TIDY_EXPORT Bool tidyInitSource( TidyInputSource* source,
void* srcData,
@@ -222,54 +406,72 @@ TIDY_EXPORT Bool tidyInitSource( TidyInputSource* source,
TidyUngetByteFunc ugbFunc,
TidyEOFFunc endFunc );
+/** Helper: get next byte from input source */
TIDY_EXPORT uint tidyGetByte( TidyInputSource* source );
+
+/** Helper: unget byte back to input source */
TIDY_EXPORT void tidyUngetByte( TidyInputSource* source, uint byteValue );
+
+/** Helper: check if input source at end */
TIDY_EXPORT Bool tidyIsEOF( TidyInputSource* source );
/****************
Output Sink
****************/
+/** Output callback: send a byte to output */
typedef void (*TidyPutByteFunc)( uint sinkData, byte bt );
+
+/** TidyOutputSink - accepts raw bytes of output
+*/
TIDY_STRUCT
typedef struct _TidyOutputSink
{
/* Instance data */
- uint sinkData;
+ uint sinkData; /**< Output context. Passed to callbacks */
/* Methods */
- TidyPutByteFunc putByte;
+ TidyPutByteFunc putByte; /**< Pointer to "put byte" callback */
} TidyOutputSink;
-/* Facilitates user defined sinks by providing
-** an entry point to marshal pointers-to-functions.
-** Needed by .NET and possibly other language bindings.
+/** Facilitates user defined sinks by providing
+** an entry point to marshal pointers-to-functions.
+** Needed by .NET and possibly other language bindings.
*/
TIDY_EXPORT Bool tidyInitSink( TidyOutputSink* sink,
void* snkData,
TidyPutByteFunc pbFunc );
+
+/** Helper: send a byte to output */
TIDY_EXPORT void tidyPutByte( TidyOutputSink* sink, uint byteValue );
-/* Use TidyReportFilter to filter messages by diagnostic level:
-** info, warning, etc. Just set diagnostic output
-** handler to redirect all diagnostics output. Return true
-** to proceed with output, false to cancel.
+/** Callback to filter messages by diagnostic level:
+** info, warning, etc. Just set diagnostic output
+** handler to redirect all diagnostics output. Return true
+** to proceed with output, false to cancel.
*/
-typedef Bool (*TidyReportFilter)( TidyDoc tdoc,
- TidyReportLevel lvl, uint line, uint col, ctmbstr mssg );
+typedef Bool (*TidyReportFilter)( TidyDoc tdoc, TidyReportLevel lvl,
+ uint line, uint col, ctmbstr mssg );
+/** Give Tidy a filter callback to use */
TIDY_EXPORT Bool tidySetReportFilter( TidyDoc tdoc,
TidyReportFilter filtCallback );
-
+/** Set error sink to named file */
TIDY_EXPORT FILE* tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam );
+/** Set error sink to given buffer */
TIDY_EXPORT int tidySetErrorBuffer( TidyDoc tdoc, TidyBuffer* errbuf );
+/** Set error sink to given generic sink */
TIDY_EXPORT int tidySetErrorSink( TidyDoc tdoc, TidyOutputSink* sink );
+/** @} end IO group */
-/* By default, Tidy will use its own wrappers
+
+/** @defgroup Memory Memory Allocation
+**
+** By default, Tidy will use its own wrappers
** around standard C malloc/free calls.
** These wrappers will abort upon any failures.
** If any are set, all must be set.
@@ -277,61 +479,189 @@ TIDY_EXPORT int tidySetErrorSink( TidyDoc tdoc, TidyOutputSink* sink );
**
** May be used to set environment-specific allocators
** such as used by web server plugins, etc.
+**
+** @{
*/
+
+/** Callback for "malloc" replacement */
typedef void* (*TidyMalloc)( size_t len );
+/** Callback for "realloc" replacement */
typedef void* (*TidyRealloc)( void* buf, size_t len );
+/** Callback for "free" replacement */
typedef void (*TidyFree)( void* buf );
+/** Callback for "out of memory" panic state */
typedef void (*TidyPanic)( ctmbstr mssg );
+/** Give Tidy a malloc() replacement */
TIDY_EXPORT Bool tidySetMallocCall( TidyMalloc fmalloc );
+/** Give Tidy a realloc() replacement */
TIDY_EXPORT Bool tidySetReallocCall( TidyRealloc frealloc );
+/** Give Tidy a free() replacement */
TIDY_EXPORT Bool tidySetFreeCall( TidyFree ffree );
+/** Give Tidy an "out of memory" handler */
TIDY_EXPORT Bool tidySetPanicCall( TidyPanic fpanic );
+/** @} end Memory group */
+
/* TODO: Catalog all messages for easy translation
TIDY_EXPORT ctmbstr tidyLookupMessage( int errorNo );
*/
-/* Parse/load Functions
+/** @defgroup Parse Document Parse
**
-** HTML/XHTML version determined from input.
+** Parse markup from a given input source. String and filename
+** functions added for convenience. HTML/XHTML version determined
+** from input.
+** @{
*/
+/** Parse markup in named file */
TIDY_EXPORT int tidyParseFile( TidyDoc tdoc, ctmbstr filename );
+
+/** Parse markup from the standard input */
TIDY_EXPORT int tidyParseStdin( TidyDoc tdoc );
+
+/** Parse markup in given string */
TIDY_EXPORT int tidyParseString( TidyDoc tdoc, ctmbstr content );
+
+/** Parse markup in given buffer */
TIDY_EXPORT int tidyParseBuffer( TidyDoc tdoc, TidyBuffer* buf );
-TIDY_EXPORT int tidyParseSource( TidyDoc tdoc, TidyInputSource* source );
-/* Diagnostics and Repair */
+/** Parse markup in given generic input source */
+TIDY_EXPORT int tidyParseSource( TidyDoc tdoc, TidyInputSource* source);
+
+/** @} End Parse group */
+
+
+/** @defgroup Clean Diagnostics and Repair
+**
+** @{
+*/
+/** Execute configured cleanup and repair operations on parsed markup */
TIDY_EXPORT int tidyCleanAndRepair( TidyDoc tdoc );
+
+/** Run configured diagnostics on parsed and repaired markup.
+** Must call tidyCleanAndRepair() first.
+*/
TIDY_EXPORT int tidyRunDiagnostics( TidyDoc tdoc );
-/* Document save Functions
+/** @} end Clean group */
+
+
+/** @defgroup Save Document Save Functions
**
-** If buffer is not big enough, ENOMEM will be returned and
-** the necessary buffer size will be placed in *buflen.
+** Save currently parsed document to the given output sink. File name
+** and string/buffer functions provided for convenience.
+** @{
*/
+
+/** Save to named file */
TIDY_EXPORT int tidySaveFile( TidyDoc tdoc, ctmbstr filename );
+
+/** Save to standard output (FILE*) */
TIDY_EXPORT int tidySaveStdout( TidyDoc tdoc );
+
+/** Save to given TidyBuffer object */
TIDY_EXPORT int tidySaveBuffer( TidyDoc tdoc, TidyBuffer* buf );
-TIDY_EXPORT int tidySaveString( TidyDoc tdoc, tmbstr buffer, uint* buflen );
+
+/** Save document to application buffer. If buffer is not big enough,
+** ENOMEM will be returned and the necessary buffer size will be placed
+** in *buflen.
+*/
+TIDY_EXPORT int tidySaveString( TidyDoc tdoc,
+ tmbstr buffer, uint* buflen );
+
+/** Save to given generic output sink */
TIDY_EXPORT int tidySaveSink( TidyDoc tdoc, TidyOutputSink* sink );
-/* Save Config
+/** @} end Save group */
+
+
+/** @addtogroup Basic
+** @{
*/
+/** Save current settings to named file.
+ Only non-default values are written. */
TIDY_EXPORT int tidyOptSaveFile( TidyDoc tdoc, ctmbstr cfgfil );
+
+/** Save current settings to given output sink.
+ Only non-default values are written. */
TIDY_EXPORT int tidyOptSaveSink( TidyDoc tdoc, TidyOutputSink* sink );
+
/* Error reporting functions
*/
+
+/** Write more complete information about errors to current error sink. */
TIDY_EXPORT void tidyErrorSummary( TidyDoc tdoc );
+
+/** Write more general information about markup to current error sink. */
TIDY_EXPORT void tidyGeneralInfo( TidyDoc tdoc );
+/** @} end Basic group (again) */
+
+
+/** @defgroup Tree Document Tree
+**
+** A parsed and, optionally, repaired document is
+** represented by Tidy as a Tree, much like a W3C DOM.
+** This tree may be traversed using these functions.
+** The following snippet gives a basic idea how these
+** functions can be used.
+**
+
+void dumpNode( TidyNode tnod, int indent )
+{
+ TidyNode child;
+
+ for ( child = tidyGetChild(tnod); child; child = tidyGetNext(child) )
+ {
+ ctmbstr name = tidyNodeGetName( child );
+ if ( !name )
+ {
+ switch ( tidyNodeGetType(child) )
+ {
+ case TidyNode_Root: name = "Root"; break;
+ case TidyNode_DocType: name = "DOCTYPE"; break;
+ case TidyNode_Comment: name = "Comment"; break;
+ case TidyNode_ProcIns: name = "Processing Instruction"; break;
+ case TidyNode_Text: name = "Text"; break;
+ case TidyNode_CDATA: name = "CDATA"; break;
+ case TidyNode_Section: name = "XML Section"; break;
+ case TidyNode_Asp: name = "ASP"; break;
+ case TidyNode_Jste: name = "JSTE"; break;
+ case TidyNode_Php: name = "PHP"; break;
+ case TidyNode_XmlDecl: name = "XML Declaration"; break;
+
+ case TidyNode_Start:
+ case TidyNode_End:
+ case TidyNode_StartEnd:
+ default:
+ assert( name != NULL ); // Shouldn't get here
+ break;
+ }
+ }
+ assert( name != NULL );
+ printf( "\%*.*sNode: \%s\\n", indent, indent, tidy );
+ dumpNode( child, indent + 4 );
+ }
+}
+
+void dumpDoc( TidyDoc tdoc )
+{
+ dumpNode( tidyGetRoot(tdoc), 0 );
+}
+
+void dumpBody( TidyDoc tdoc )
+{
+ dumpNode( tidyGetBody(tdoc), 0 );
+}
+
+
+@{
-/* Document tree traversal functions
*/
TIDY_EXPORT TidyNode tidyGetRoot( TidyDoc tdoc );
@@ -347,10 +677,6 @@ TIDY_EXPORT TidyNode tidyGetChild( TidyNode tnod );
TIDY_EXPORT TidyNode tidyGetNext( TidyNode tnod );
TIDY_EXPORT TidyNode tidyGetPrev( TidyNode tnod );
-/* Node info */
-TIDY_EXPORT TidyNodeType tidyNodeGetType( TidyNode tnod );
-TIDY_EXPORT ctmbstr tidyNodeGetName( TidyNode tnod );
-
/* Null for non-element nodes and all pure HTML
TIDY_EXPORT ctmbstr tidyNodeNsLocal( TidyNode tnod );
TIDY_EXPORT ctmbstr tidyNodeNsPrefix( TidyNode tnod );
@@ -370,11 +696,19 @@ TIDY_EXPORT ctmbstr tidyAttrNsPrefix( TidyAttr tattr );
TIDY_EXPORT ctmbstr tidyAttrNsUri( TidyAttr tattr );
*/
+/** @} end Tree group */
-/* Node interrogation
+/** @defgroup NodeAsk Node Interrogation
+**
+** Get information about any givent node.
+** @{
*/
+/* Node info */
+TIDY_EXPORT TidyNodeType tidyNodeGetType( TidyNode tnod );
+TIDY_EXPORT ctmbstr tidyNodeGetName( TidyNode tnod );
+
TIDY_EXPORT Bool tidyNodeIsText( TidyNode tnod );
TIDY_EXPORT Bool tidyNodeIsProp( TidyDoc tdoc, TidyNode tnod );
TIDY_EXPORT Bool tidyNodeIsHeader( TidyNode tnod ); /* h1, h2, ... */
@@ -465,8 +799,13 @@ TIDY_EXPORT Bool tidyNodeIsSTRIKE( TidyNode tnod );
TIDY_EXPORT Bool tidyNodeIsU( TidyNode tnod );
TIDY_EXPORT Bool tidyNodeIsMENU( TidyNode tnod );
+/** @} End NodeAsk group */
+
-/* Attribute interrogation
+/** @defgroup Attribute Attribute Interrogation
+**
+** Get information about any given attribute.
+** @{
*/
TIDY_EXPORT TidyAttrId tidyAttrGetId( TidyAttr tattr );
@@ -519,9 +858,16 @@ TIDY_EXPORT Bool tidyAttrIsABBR( TidyAttr tattr );
TIDY_EXPORT Bool tidyAttrIsCOLSPAN( TidyAttr tattr );
TIDY_EXPORT Bool tidyAttrIsROWSPAN( TidyAttr tattr );
-/* Attribute retrieval
+/** @} end AttrAsk group */
+
+
+/** @defgroup AttrGet Attribute Retrieval
+**
+** Lookup an attribute from a given node
+** @{
*/
+
TIDY_EXPORT TidyAttr tidyAttrGetHREF( TidyNode tnod );
TIDY_EXPORT TidyAttr tidyAttrGetSRC( TidyNode tnod );
TIDY_EXPORT TidyAttr tidyAttrGetID( TidyNode tnod );
@@ -568,6 +914,9 @@ TIDY_EXPORT TidyAttr tidyAttrGetABBR( TidyNode tnod );
TIDY_EXPORT TidyAttr tidyAttrGetCOLSPAN( TidyNode tnod );
TIDY_EXPORT TidyAttr tidyAttrGetROWSPAN( TidyNode tnod );
+
+/** @} end AttrGet group */
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tidylib-src/include/tidyenum.h b/tidylib-src/include/tidyenum.h
old mode 100755
new mode 100644
index 8a81e524..fc2f6a1c
--- a/tidylib-src/include/tidyenum.h
+++ b/tidylib-src/include/tidyenum.h
@@ -14,8 +14,8 @@
CVS Info :
$Author: creitzel $
- $Date: 2002/10/15 19:46:52 $
- $Revision: 1.1.2.1 $
+ $Date: 2003/02/16 19:33:09 $
+ $Revision: 1.2 $
Contributing Author(s):
@@ -59,7 +59,9 @@
Created 2001-05-20 by Charles Reitzel
Updated 2002-07-01 by Charles Reitzel - 1st Implementation
+*/
+/** @file tidyenum.h - Enumerations defined for use with TidyLib.
*/
#ifdef __cplusplus
@@ -69,150 +71,164 @@ extern "C" {
/* Enumerate configuration options
*/
+/** Categories of Tidy configuration options
+*/
typedef enum
{
- TidyMarkup,
- TidyDiagnostics,
- TidyPrettyPrint,
- TidyEncoding,
- TidyMiscellaneous
+ TidyMarkup, /**< Markup options: (X)HTML version, etc */
+ TidyDiagnostics, /**< Diagnostics */
+ TidyPrettyPrint, /**< Output layout */
+ TidyEncoding, /**< Character encodings */
+ TidyMiscellaneous /**< File handling, message format, etc. */
} TidyConfigCategory;
-/* Option IDs
-**
-** Used to get/set option values.
+/** Option IDs Used to get/set option values.
*/
-
typedef enum
{
- TidyUnknownOption, /* Unknown! */
- TidyIndentSpaces, /* indentation n spaces */
- TidyWrapLen, /* wrap margin */
- TidyTabSize, /* expand tabs to n spaces */
-
- TidyCharEncoding, /* in/out character encoding */
- TidyInCharEncoding, /* input character encoding (if different) */
- TidyOutCharEncoding, /* output character encoding (if different) */
-
- TidyDoctypeMode, /* see doctype property */
- TidyDoctype, /* user specified doctype */
-
- TidyDuplicateAttrs, /* Keep first or last duplicate attribute */
- TidyAltText, /* default text for alt attribute */
- TidySlideStyle, /* style sheet for slides: not used for anything yet */
- TidyErrFile, /* file name to write errors to */
- TidyWriteBack, /* if true then output tidied markup */
- TidyShowMarkup, /* if false, normal output is suppressed */
- TidyShowWarnings, /* however errors are always shown */
- TidyQuiet, /* no 'Parsing X', guessed DTD or summary */
- TidyIndentContent, /* indent content of appropriate tags */
- /* "auto" does text/block level content indentation */
- TidyHideEndTags, /* suppress optional end tags */
- TidyXmlTags, /* treat input as XML */
- TidyXmlOut, /* create output as XML */
- TidyXhtmlOut, /* output extensible HTML */
- TidyHtmlOut, /* output plain HTML, even for XHTML input.
- Yes means set explicitly. */
- TidyXmlDecl, /* add for XML docs */
- TidyUpperCaseTags, /* output tags in upper not lower case */
- TidyUpperCaseAttrs, /* output attributes in upper not lower case */
- TidyMakeBare, /* Make bare HTML: remove Microsoft cruft */
- TidyMakeClean, /* replace presentational clutter by style rules */
- TidyLogicalEmphasis, /* replace i by em and b by strong */
- TidyDropPropAttrs, /* discard proprietary attributes */
- TidyDropFontTags, /* discard presentation tags */
- TidyDropEmptyParas, /* discard empty p elements */
- TidyFixComments, /* fix comments with adjacent hyphens */
- TidyBreakBeforeBR, /* o/p newline before or not? */
- TidyBurstSlides, /* create slides on each h2 element */
- TidyNumEntities, /* use numeric entities */
- TidyQuoteMarks, /* output " marks as " */
- TidyQuoteNbsp, /* output non-breaking space as entity */
- TidyQuoteAmpersand, /* output naked ampersand as & */
- TidyWrapAttVals, /* wrap within attribute values */
- TidyWrapScriptlets, /* wrap within JavaScript string literals */
- TidyWrapSection, /* wrap within section tags */
- TidyWrapAsp, /* wrap within ASP pseudo elements */
- TidyWrapJste, /* wrap within JSTE pseudo elements */
- TidyWrapPhp, /* wrap within PHP pseudo elements */
- TidyFixBackslash, /* fix URLs by replacing \ with / */
- TidyIndentAttributes,/* newline+indent before each attribute */
- TidyXmlPIs, /* if set to yes PIs must end with ?> */
- TidyXmlSpace, /* if set to yes adds xml:space attr as needed */
- TidyEncloseBodyText, /* if yes text at body is wrapped in
's */
- TidyEncloseBlockText,/* if yes text in blocks is wrapped in
+ */
+ if ( attrIsALIGN(attr) && VERS_HTML40_STRICT==versWanted )
+ {
+ switch ( TagId(node) )
+ {
+ case TidyTag_COL:
+ case TidyTag_COLGROUP:
+ case TidyTag_TBODY:
+ case TidyTag_TD:
+ case TidyTag_TFOOT:
+ case TidyTag_TH:
+ case TidyTag_THEAD:
+ case TidyTag_TR:
+ break;
+
+ default:
+ ReportNonCompliantAttr( doc, node, attr, versWanted );
+ compliant = no;
+ break;
+ }
+ }
+ }
+ return compliant;
+}
+
+static Bool NodeCompliance( TidyDocImpl* doc, Node* node, uint versWanted )
+{
+ Bool compliant = yes;
+ Bool checkCM = no;
+
+ if ( !node )
+ return no;
+
+ switch ( node->type )
+ {
+#if 0
+ case TidyNode_Root: /* Root */
+ case TidyNode_DocType: /* DOCTYPE */
+ case TidyNode_Comment: /* Comment */
+ case TidyNode_ProcIns: /* Processing Instruction */
+ break;
+#endif
+
+ case TidyNode_Text: /* Text */
+ checkCM = yes;
+ break;
+
+ case TidyNode_Start: /* Start Tag */
+ case TidyNode_StartEnd: /* Start/End (empty) Tag */
+ checkCM = !nodeCMIsBlock( node );
+ compliant = AttrCompliance( doc, node, versWanted );
+ if ( compliant )
+ {
+ uint nodeVer = node->tag->versions;
+ if ( MissingVersion(nodeVer, versWanted) )
+ {
+ ReportNonCompliantNode( doc, node,
+ OBSOLETE_ELEMENT, versWanted );
+ compliant = no;
+ }
+ }
+ if ( compliant && VERS_HTML40_STRICT == versWanted )
+ {
+ AttVal* attr = null;
+ switch ( TagId(node) )
+ {
+ case TidyTag_BR: /* no clear */
+ attr = AttrGetById( node, TidyAttr_CLEAR );
+ break;
+
+ case TidyTag_HR: /* no size, shade */
+ attr = AttrGetById( node, TidyAttr_SIZE );
+ if ( !attr )
+ attr = AttrGetById( node, TidyAttr_NOSHADE );
+ break;
+
+ case TidyTag_IMG: /* no border */
+ attr = AttrGetById( node, TidyAttr_BORDER );
+ break;
+
+ case TidyTag_LI: /* no value, type */
+ attr = AttrGetById( node, TidyAttr_TYPE );
+ if ( !attr )
+ attr = AttrGetById( node, TidyAttr_VALUE );
+ break;
+
+ case TidyTag_OL: /* no start, type */
+ attr = AttrGetById( node, TidyAttr_TYPE );
+ if ( !attr )
+ attr = AttrGetById( node, TidyAttr_START );
+ break;
+
+ case TidyTag_PRE: /* no width */
+ attr = AttrGetById( node, TidyAttr_WIDTH );
+ break;
+
+ case TidyTag_SCRIPT: /* no language */
+ attr = AttrGetById( node, TidyAttr_LANGUAGE );
+ break;
+
+ case TidyTag_TD: /* no width, height */
+ case TidyTag_TH:
+ attr = AttrGetById( node, TidyAttr_WIDTH );
+ if ( !attr )
+ attr = AttrGetById( node, TidyAttr_HEIGHT );
+ break;
+
+ case TidyTag_UL: /* no type */
+ attr = AttrGetById( node, TidyAttr_TYPE );
+ break;
+ }
+ if ( attr )
+ {
+ ReportNonCompliantAttr( doc, node, attr, versWanted );
+ compliant = no;
+ }
+ }
+ break;
+
+#if 0
+ case TidyNode_End: /* End Tag */
+ case TidyNode_CDATA: /* Unparsed Text */
+ case TidyNode_Section: /* XML Section */
+ case TidyNode_Asp: /* ASP Source */
+ case TidyNode_Jste: /* JSTE Source */
+ case TidyNode_Php: /* PHP Source */
+ case TidyNode_XmlDecl: /* XML Declaration */
+ break;
+#endif
+ }
+
+ /* Check inline elements and text nodes
+ ** not a child of %block content model.
+ */
+ if ( compliant && checkCM )
+ {
+ Node* parent = node->parent;
+ if ( nodeIsBODY(parent) ||
+ nodeIsMAP(parent) ||
+ nodeIsBLOCKQUOTE(parent) ||
+ nodeIsFORM(parent) ||
+ nodeIsNOSCRIPT(parent) )
+ {
+ ReportNonCompliantNode( doc, parent,
+ MIXED_CONTENT_IN_BLOCK, versWanted );
+ }
+ }
+
+ /* Scan all child nodes */
+ for ( node=node->content; node; node = node->next )
+ {
+ Bool comply = NodeCompliance( doc, node, versWanted );
+ if ( compliant && !comply )
+ compliant = no;
+ }
+
+ return compliant;
+}
+
+Bool HTMLVersionCompliance( TidyDocImpl* doc )
+{
+ Bool compliant = no;
+ uint versWanted = VERS_HTML32;
+ uint dtmode = cfg( doc, TidyDoctypeMode );
+ uint contver = (uint) doc->lexer->versions;
+ uint dtver = (uint) doc->lexer->doctype;
+
+ if ( TidyDoctypeStrict == dtmode || VERS_HTML40_STRICT == dtver )
+ versWanted = VERS_HTML40_STRICT;
+ else if ( TidyDoctypeLoose == dtmode || VERS_HTML40_LOOSE == dtver )
+ versWanted = VERS_HTML40_LOOSE;
+
+ compliant = ( (versWanted & contver) != 0 );
+ if ( !compliant )
+ NodeCompliance( doc, doc->root, versWanted );
+ return compliant;
+}
+
diff --git a/tidylib-src/src/clean.h b/tidylib-src/src/clean.h
old mode 100755
new mode 100644
index 203cc376..1a061b56
--- a/tidylib-src/src/clean.h
+++ b/tidylib-src/src/clean.h
@@ -7,12 +7,14 @@
See tidy.h for the copyright notice.
CVS Info:
- $Author: terry_teague $
- $Date: 2002/07/08 07:40:46 $
- $Revision: 1.1.2.2 $
+ $Author: creitzel $
+ $Date: 2003/02/16 19:33:10 $
+ $Revision: 1.2 $
*/
+void RenameElem( Node* node, TidyTagId tid );
+
Node* CleanNode( TidyDocImpl* doc, Node* node );
void FreeStyles( TidyDocImpl* doc );
@@ -71,4 +73,10 @@ Bool IsWord2000( TidyDocImpl* doc );
/* where appropriate move object elements from head to body */
void BumpObject( TidyDocImpl* doc, Node *html );
+void FixBrakes( TidyDocImpl* pDoc, Node *pParent );
+
+void VerifyHTTPEquiv( TidyDocImpl* pDoc, Node *pParent );
+
+Bool HTMLVersionCompliance( TidyDocImpl* doc );
+
#endif /* __CLEAN_H__ */
diff --git a/tidylib-src/src/config.c b/tidylib-src/src/config.c
old mode 100755
new mode 100644
index 4e39a6c7..93417afa
--- a/tidylib-src/src/config.c
+++ b/tidylib-src/src/config.c
@@ -1,14 +1,14 @@
/*
config.c -- read config file and manage config properties
- (c) 1998-2002 (W3C) MIT, INRIA, Keio University
+ (c) 1998-2003 (W3C) MIT, INRIA, Keio University
See tidy.h for the copyright notice.
CVS Info :
- $Author: creitzel $
- $Date: 2002/08/13 21:41:53 $
- $Revision: 1.43.2.8 $
+ $Author: terry_teague $
+ $Date: 2003/02/18 07:13:08 $
+ $Revision: 1.46 $
*/
@@ -31,6 +31,7 @@
#include "tidy-int.h"
#include "message.h"
#include "tmbstr.h"
+#include "tags.h"
int CharEncodingId( ctmbstr charenc ); /* returns -1 if not recognized */
@@ -49,21 +50,21 @@ void FreeConfig( TidyDocImpl* doc )
/* Arrange so index can be cast to enum
*/
-static ctmbstr boolPicks[] =
+static const ctmbstr boolPicks[] =
{
"no",
"yes",
null
};
-static ctmbstr invBoolPicks[] =
+static const ctmbstr invBoolPicks[] =
{
"yes",
"no",
null
};
-static ctmbstr autoBoolPicks[] =
+static const ctmbstr autoBoolPicks[] =
{
"no",
"yes",
@@ -71,14 +72,14 @@ static ctmbstr autoBoolPicks[] =
null
};
-static ctmbstr repeatAttrPicks[] =
+static const ctmbstr repeatAttrPicks[] =
{
"keep-first",
"keep-last",
null
};
-static ctmbstr accessPicks[] =
+static const ctmbstr accessPicks[] =
{
"0 - Tidy Classic",
"1 - Priority 1 Checks",
@@ -87,15 +88,17 @@ static ctmbstr accessPicks[] =
null
};
-static ctmbstr charEncPicks[] =
+static const ctmbstr charEncPicks[] =
{
"raw",
"ascii",
+ "latin0",
"latin1",
"utf8",
"iso2022",
"mac",
"win1252",
+ "ibm858",
#if SUPPORT_UTF16_ENCODINGS
"utf16le",
@@ -111,7 +114,15 @@ static ctmbstr charEncPicks[] =
null
};
-static ctmbstr doctypePicks[] =
+static const ctmbstr newlinePicks[] =
+{
+ "LF",
+ "CRLF",
+ "CR",
+ null
+};
+
+static const ctmbstr doctypePicks[] =
{
"omit",
"auto",
@@ -121,7 +132,6 @@ static ctmbstr doctypePicks[] =
null
};
-
#define MU TidyMarkup
#define DG TidyDiagnostics
#define PP TidyPrettyPrint
@@ -132,7 +142,16 @@ static ctmbstr doctypePicks[] =
#define BL TidyBoolean
#define ST TidyString
-static TidyOptionImpl option_defs[] =
+#define DLF DEFAULT_NL_CONFIG
+
+/* If Accessibility checks not supported, make config setting read-only */
+#if SUPPORT_ACCESSIBILITY_CHECKS
+#define ParseAcc ParseInt
+#else
+#define ParseAcc null
+#endif
+
+static const TidyOptionImpl option_defs[] =
{
{ TidyUnknownOption, MS, "unknown!", IN, 0, null, null },
{ TidyIndentSpaces, PP, "indent-spaces", IN, 2, ParseInt, null },
@@ -142,6 +161,7 @@ static TidyOptionImpl option_defs[] =
{ TidyCharEncoding, CE, "char-encoding", IN, ASCII, ParseCharEnc, charEncPicks },
{ TidyInCharEncoding, CE, "input-encoding", IN, LATIN1, ParseCharEnc, charEncPicks },
{ TidyOutCharEncoding,CE, "output-encoding", IN, ASCII, ParseCharEnc, charEncPicks },
+ { TidyNewline, CE, "newline", IN, DLF, ParseNewline, newlinePicks },
{ TidyDoctypeMode, MU, "doctype-mode", IN, TidyDoctypeAuto, null, doctypePicks },
{ TidyDoctype, MU, "doctype", ST, null, ParseDocType, doctypePicks },
@@ -150,11 +170,12 @@ static TidyOptionImpl option_defs[] =
{ TidyAltText, MU, "alt-text", ST, null, ParseString, null },
{ TidySlideStyle, MS, "slide-style", ST, null, ParseName, null },
{ TidyErrFile, MS, "error-file", ST, null, ParseString, null },
+ { TidyOutFile, MS, "output-file", ST, null, ParseString, null },
{ TidyWriteBack, MS, "write-back", BL, no, ParseBool, boolPicks },
{ TidyShowMarkup, PP, "markup", BL, yes, ParseBool, boolPicks },
{ TidyShowWarnings, DG, "show-warnings", BL, yes, ParseBool, boolPicks },
{ TidyQuiet, MS, "quiet", BL, no, ParseBool, boolPicks },
- { TidyIndentContent, PP, "indent", IN, TidyAutoState, ParseIndent, autoBoolPicks },
+ { TidyIndentContent, PP, "indent", IN, TidyNoState, ParseIndent, autoBoolPicks },
{ TidyHideEndTags, MU, "hide-endtags", BL, no, ParseBool, boolPicks },
{ TidyXmlTags, MU, "input-xml", BL, no, ParseBool, boolPicks },
{ TidyXmlOut, MU, "output-xml", BL, no, ParseBool, boolPicks },
@@ -217,14 +238,12 @@ static TidyOptionImpl option_defs[] =
{ TidyReplaceColor, MU, "replace-color", BL, no, ParseBool, boolPicks },
{ TidyCSSPrefix, MU, "css-prefix", ST, null, ParseCSS1Selector, null },
- { TidyInlineTags, MU, "new-inline-tags", ST, null, ParseTagNames, boolPicks },
- { TidyBlockTags, MU, "new-blocklevel-tags", ST, null, ParseTagNames, boolPicks },
- { TidyEmptyTags, MU, "new-empty-tags", ST, null, ParseTagNames, boolPicks },
- { TidyPreTags, MU, "new-pre-tags", ST, null, ParseTagNames, boolPicks },
+ { TidyInlineTags, MU, "new-inline-tags", ST, null, ParseTagNames, NULL },
+ { TidyBlockTags, MU, "new-blocklevel-tags", ST, null, ParseTagNames, NULL },
+ { TidyEmptyTags, MU, "new-empty-tags", ST, null, ParseTagNames, NULL },
+ { TidyPreTags, MU, "new-pre-tags", ST, null, ParseTagNames, NULL },
-#if SUPPORT_ACCESSIBILITY_CHECKS
- { TidyAccessibilityCheckLevel, DG, "accessibility-check", IN, 0, ParseInt, accessPicks },
-#endif
+ { TidyAccessibilityCheckLevel, DG, "accessibility-check", IN, 0, ParseAcc, accessPicks },
{ N_TIDY_OPTIONS, null }
};
@@ -234,7 +253,7 @@ static TidyOptionImpl option_defs[] =
*/
const TidyOptionImpl* lookupOption( ctmbstr s )
{
- TidyOptionImpl* np = option_defs;
+ const TidyOptionImpl* np = option_defs;
for ( /**/; np < option_defs + N_TIDY_OPTIONS; ++np )
{
if ( tmbstrcasecmp(s, np->name) == 0 )
@@ -251,7 +270,7 @@ const TidyOptionImpl* getOption( TidyOptionId optId )
}
-static void FreeOptionValue( TidyOptionImpl* option, uint value )
+static void FreeOptionValue( const TidyOptionImpl* option, uint value )
{
if ( value && option->type == TidyString && value != option->dflt )
{
@@ -259,7 +278,8 @@ static void FreeOptionValue( TidyOptionImpl* option, uint value )
}
}
-static void CopyOptionValue( TidyOptionImpl* option, uint* oldval, uint newval )
+static void CopyOptionValue( const TidyOptionImpl* option,
+ uint* oldval, uint newval )
{
assert( oldval != null );
FreeOptionValue( option, *oldval );
@@ -273,7 +293,7 @@ static void CopyOptionValue( TidyOptionImpl* option, uint* oldval, uint newval )
Bool SetOptionValue( TidyDocImpl* doc, TidyOptionId optId, ctmbstr val )
{
- TidyOptionImpl* option = &option_defs[ optId ];
+ const TidyOptionImpl* option = &option_defs[ optId ];
Bool ok = ( optId < N_TIDY_OPTIONS );
if ( ok )
{
@@ -311,7 +331,7 @@ Bool ResetOptionToDefault( TidyDocImpl* doc, TidyOptionId optId )
Bool ok = ( optId > 0 && optId < N_TIDY_OPTIONS );
if ( ok )
{
- TidyOptionImpl* option = option_defs + optId;
+ const TidyOptionImpl* option = option_defs + optId;
uint* value = &doc->config.value[ optId ];
assert( optId == option->id );
CopyOptionValue( option, value, option->dflt );
@@ -319,22 +339,46 @@ Bool ResetOptionToDefault( TidyDocImpl* doc, TidyOptionId optId )
return ok;
}
+static void ReparseTagType( TidyDocImpl* doc, TidyOptionId optId )
+{
+ ctmbstr tagdecl = cfgStr( doc, optId );
+ tmbstr dupdecl = tmbstrdup( tagdecl );
+ ParseConfigValue( doc, optId, dupdecl );
+ MemFree( dupdecl );
+}
+
+/* Not efficient, but effective */
+static void ReparseTagDecls( TidyDocImpl* doc )
+{
+ ctmbstr tagdecl = null;
+ FreeDeclaredTags( doc, 0 );
+ if ( cfg(doc, TidyInlineTags) )
+ ReparseTagType( doc, TidyInlineTags );
+ if ( cfg(doc, TidyBlockTags) )
+ ReparseTagType( doc, TidyBlockTags );
+ if ( cfg(doc, TidyEmptyTags) )
+ ReparseTagType( doc, TidyEmptyTags );
+ if ( cfg(doc, TidyPreTags) )
+ ReparseTagType( doc, TidyPreTags );
+}
+
void ResetConfigToDefault( TidyDocImpl* doc )
{
uint ixVal;
- TidyOptionImpl* option = option_defs;
+ const TidyOptionImpl* option = option_defs;
uint* value = &doc->config.value[ 0 ];
for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
{
assert( ixVal == (uint) option->id );
CopyOptionValue( option, &value[ixVal], option->dflt );
}
+ FreeDeclaredTags( doc, 0 );
}
void TakeConfigSnapshot( TidyDocImpl* doc )
{
uint ixVal;
- TidyOptionImpl* option = option_defs;
+ const TidyOptionImpl* option = option_defs;
uint* value = &doc->config.value[ 0 ];
uint* snap = &doc->config.snapshot[ 0 ];
@@ -349,7 +393,7 @@ void TakeConfigSnapshot( TidyDocImpl* doc )
void ResetConfigToSnapshot( TidyDocImpl* doc )
{
uint ixVal;
- TidyOptionImpl* option = option_defs;
+ const TidyOptionImpl* option = option_defs;
uint* value = &doc->config.value[ 0 ];
uint* snap = &doc->config.snapshot[ 0 ];
@@ -358,6 +402,8 @@ void ResetConfigToSnapshot( TidyDocImpl* doc )
assert( ixVal == (uint) option->id );
CopyOptionValue( option, &value[ixVal], snap[ixVal] );
}
+ FreeDeclaredTags( doc, 0 );
+ ReparseTagDecls( doc );
}
void CopyConfig( TidyDocImpl* docTo, TidyDocImpl* docFrom )
@@ -365,7 +411,7 @@ void CopyConfig( TidyDocImpl* docTo, TidyDocImpl* docFrom )
if ( docTo != docFrom )
{
uint ixVal;
- TidyOptionImpl* option = option_defs;
+ const TidyOptionImpl* option = option_defs;
uint* from = &docFrom->config.value[ 0 ];
uint* to = &docTo->config.value[ 0 ];
@@ -375,6 +421,7 @@ void CopyConfig( TidyDocImpl* docTo, TidyDocImpl* docFrom )
assert( ixVal == (uint) option->id );
CopyOptionValue( option, &to[ixVal], from[ixVal] );
}
+ ReparseTagDecls( docTo );
AdjustConfig( docTo ); /* Make sure it's consistent */
}
}
@@ -392,7 +439,7 @@ uint _cfgGet( TidyDocImpl* doc, TidyOptionId optId )
Bool _cfgGetBool( TidyDocImpl* doc, TidyOptionId optId )
{
uint val = _cfgGet( doc, optId );
- TidyOptionImpl* opt = &option_defs[ optId ];
+ const TidyOptionImpl* opt = &option_defs[ optId ];
assert( opt && opt->type == TidyBoolean );
return (Bool) val;
}
@@ -400,7 +447,7 @@ Bool _cfgGetBool( TidyDocImpl* doc, TidyOptionId optId )
ctmbstr _cfgGetString( TidyDocImpl* doc, TidyOptionId optId )
{
uint val = _cfgGet( doc, optId );
- TidyOptionImpl* opt = &option_defs[ optId ];
+ const TidyOptionImpl* opt = &option_defs[ optId ];
assert( opt && opt->type == TidyString );
return (ctmbstr) val;
}
@@ -483,7 +530,7 @@ static uint NextProperty( TidyConfigImpl* config )
~/foo or ~your/foo according to $HOME and your
user name. This will only work on Unix systems.
*/
-tmbstr ExpandTilde(ctmbstr filename)
+ctmbstr ExpandTilde(ctmbstr filename)
{
#ifdef SUPPORT_GETPWNAM
static char *expanded_filename;
@@ -536,7 +583,7 @@ tmbstr ExpandTilde(ctmbstr filename)
}
#endif /* SUPPORT_GETPWNAM */
- return (tmbstr) filename;
+ return (ctmbstr) filename;
}
#ifndef TIDY_MAX_NAME
@@ -588,7 +635,50 @@ int ParseConfigFileEnc( TidyDocImpl* doc, ctmbstr file, ctmbstr charenc )
if ( option )
option->parser( doc, option );
else
- ReportUnknownOption( doc, name );
+ {
+ if (null != doc->pOptCallback)
+ {
+ TidyConfigImpl* cfg = &doc->config;
+ tmbchar buf[8192];
+ int i = 0;
+ tchar delim = 0;
+ Bool waswhite = yes;
+
+ tchar c = SkipWhite( cfg );
+
+ if ( c == '"' || c == '\'' )
+ {
+ delim = c;
+ c = AdvanceChar( cfg );
+ }
+
+ while ( i < sizeof(buf)-2 && c != EOF && c != '\r' && c != '\n' )
+ {
+ if ( delim && c == delim )
+ break;
+
+ if ( IsWhite(c) )
+ {
+ if ( waswhite )
+ {
+ c = AdvanceChar( cfg );
+ continue;
+ }
+ c = ' ';
+ }
+ else
+ waswhite = no;
+
+ buf[i++] = (tmbchar) c;
+ c = AdvanceChar( cfg );
+ }
+ buf[i] = '\0';
+ if (no == (*doc->pOptCallback)( name, buf ))
+ ReportUnknownOption( doc, name );
+ }
+ else
+ ReportUnknownOption( doc, name );
+ }
}
}
@@ -614,7 +704,14 @@ Bool ParseConfigOption( TidyDocImpl* doc, ctmbstr optnam, ctmbstr optval )
const TidyOptionImpl* option = lookupOption( optnam );
Bool ok = ( option != NULL );
if ( !ok )
- ReportUnknownOption( doc, optnam );
+ {
+ /* Not a standard tidy option. Check to see if the user application
+ recognizes it */
+ if (null != doc->pOptCallback)
+ ok = (*doc->pOptCallback)( optnam, optval );
+ if (!ok)
+ ReportUnknownOption( doc, optnam );
+ }
else
ok = ParseConfigValue( doc, option->id, optval );
return ok;
@@ -625,7 +722,7 @@ Bool ParseConfigOption( TidyDocImpl* doc, ctmbstr optnam, ctmbstr optval )
*/
Bool ParseConfigValue( TidyDocImpl* doc, TidyOptionId optId, ctmbstr optval )
{
- TidyOptionImpl* option = option_defs + optId;
+ const TidyOptionImpl* option = option_defs + optId;
Bool ok = ( optId < N_TIDY_OPTIONS && optval != null );
if ( !ok )
@@ -665,11 +762,21 @@ Bool AdjustCharEncoding( TidyDocImpl* doc, int encoding )
outenc = ASCII;
break;
+ case IBM858:
+ inenc = IBM858;
+ outenc = ASCII;
+ break;
+
case ASCII:
inenc = LATIN1;
outenc = ASCII;
break;
+ case LATIN0:
+ inenc = LATIN0;
+ outenc = ASCII;
+ break;
+
case RAW:
case LATIN1:
case UTF8:
@@ -812,6 +919,36 @@ Bool ParseTriState( TidyTriState theState, TidyDocImpl* doc,
return yes;
}
+/* cr, lf or crlf */
+Bool ParseNewline( TidyDocImpl* doc, const TidyOptionImpl* entry )
+{
+ int nl = -1;
+ tmbchar work[ 16 ] = {0};
+ tmbstr cp = work, end = work + sizeof(work);
+ TidyConfigImpl* cfg = &doc->config;
+ tchar c = SkipWhite( cfg );
+
+ while ( c!=EndOfStream && cp < end && !IsWhite(c) && c != '\r' && c != '\n' )
+ {
+ *cp++ = (tmbchar) c;
+ c = AdvanceChar( cfg );
+ }
+ *cp = 0;
+
+ if ( tmbstrcasecmp(work, "lf") == 0 )
+ nl = TidyLF;
+ else if ( tmbstrcasecmp(work, "crlf") == 0 )
+ nl = TidyCRLF;
+ else if ( tmbstrcasecmp(work, "cr") == 0 )
+ nl = TidyCR;
+
+ if ( nl < TidyLF || nl > TidyCR )
+ ReportBadArgument( doc, entry->name );
+ else
+ SetOptionInt( doc, entry->id, nl );
+ return ( nl >= TidyLF && nl <= TidyCR );
+}
+
Bool ParseBool( TidyDocImpl* doc, const TidyOptionImpl* entry )
{
uint flag = 0;
@@ -869,6 +1006,23 @@ Bool ParseCSS1Selector( TidyDocImpl* doc, const TidyOptionImpl* option )
return yes;
}
+/* Coordinates Config update and Tags data */
+static void DeclareUserTag( TidyDocImpl* doc, TidyOptionId optId, int tagType, tmbstr name )
+{
+ ctmbstr prvval = cfgStr( doc, optId );
+ tmbstr catval = name;
+ if ( prvval )
+ {
+ uint len = tmbstrlen(name) + tmbstrlen(prvval) + 3;
+ catval = tmbstrndup( prvval, len );
+ tmbstrcat( catval, ", " );
+ tmbstrcat( catval, name );
+ }
+ DefineTag( doc, tagType, name );
+ SetOptionValue( doc, optId, catval );
+ if ( prvval )
+ MemFree( catval );
+}
/* a space or comma separated list of tag names */
Bool ParseTagNames( TidyDocImpl* doc, const TidyOptionImpl* option )
@@ -890,6 +1044,8 @@ Bool ParseTagNames( TidyDocImpl* doc, const TidyOptionImpl* option )
return no;
}
+ SetOptionValue( doc, option->id, null );
+ FreeDeclaredTags( doc, ttyp );
cfg->defined_tags |= ttyp;
do
@@ -900,23 +1056,36 @@ Bool ParseTagNames( TidyDocImpl* doc, const TidyOptionImpl* option )
continue;
}
- if (c == '\r')
+ if ( c == '\r' || c == '\n' )
{
- c = AdvanceChar( cfg );
-
- if ( c == '\n' )
- AdvanceChar( cfg );
+ uint c2 = AdvanceChar( cfg );
+ if ( c == '\r' && c2 == '\n' )
+ c = AdvanceChar( cfg );
+ else
+ c = c2;
if ( !IsWhite(c) )
+ {
+ buf[i] = 0;
+ UngetChar( c, cfg->cfgIn );
+ UngetChar( '\n', cfg->cfgIn );
break;
+ }
}
+ /*
if ( c == '\n' )
{
c = AdvanceChar( cfg );
if ( !IsWhite(c) )
+ {
+ buf[i] = 0;
+ UngetChar( c, cfg->cfgIn );
+ UngetChar( '\n', cfg->cfgIn );
break;
+ }
}
+ */
while ( i < sizeof(buf)-2 && c != EOF && !IsWhite(c) && c != ',' )
{
@@ -929,12 +1098,14 @@ Bool ParseTagNames( TidyDocImpl* doc, const TidyOptionImpl* option )
continue; /* there is a trailing space on the line. */
/* add tag to dictionary */
- DefineTag( doc, ttyp, buf );
+ DeclareUserTag( doc, option->id, ttyp, buf );
i = 0;
++nTags;
}
while ( c != EOF );
+ if ( i > 0 )
+ DeclareUserTag( doc, option->id, ttyp, buf );
return ( nTags > 0 );
}
@@ -1002,6 +1173,8 @@ Bool ParseCharEnc( TidyDocImpl* doc, const TidyOptionImpl* option )
validEncoding = no;
ReportBadArgument( doc, option->name );
}
+ else
+ SetOptionInt( doc, option->id, enc );
if ( validEncoding && option->id == TidyCharEncoding )
AdjustCharEncoding( doc, enc );
@@ -1014,6 +1187,8 @@ int CharEncodingId( ctmbstr charenc )
int enc = -1;
if ( tmbstrcasecmp(charenc, "ascii") == 0 )
enc = ASCII;
+ else if ( tmbstrcasecmp(charenc, "latin0") == 0 )
+ enc = LATIN0;
else if ( tmbstrcasecmp(charenc, "latin1") == 0 )
enc = LATIN1;
else if ( tmbstrcasecmp(charenc, "raw") == 0 )
@@ -1026,6 +1201,8 @@ int CharEncodingId( ctmbstr charenc )
enc = MACROMAN;
else if ( tmbstrcasecmp(charenc, "win1252") == 0 )
enc = WIN1252;
+ else if ( tmbstrcasecmp(charenc, "ibm858") == 0 )
+ enc = IBM858;
#if SUPPORT_UTF16_ENCODINGS
else if ( tmbstrcasecmp(charenc, "utf16le") == 0 )
@@ -1053,12 +1230,14 @@ ctmbstr CharEncodingName( int encoding )
switch ( encoding )
{
case ASCII: encodingName = "ascii"; break;
+ case LATIN0: encodingName = "latin0"; break;
case LATIN1: encodingName = "latin1"; break;
case RAW: encodingName = "raw"; break;
case UTF8: encodingName = "utf8"; break;
case ISO2022: encodingName = "iso2022"; break;
case MACROMAN: encodingName = "mac"; break;
case WIN1252: encodingName = "win1252"; break;
+ case IBM858: encodingName = "ibm858"; break;
#if SUPPORT_UTF16_ENCODINGS
case UTF16LE: encodingName = "utf16le"; break;
@@ -1169,6 +1348,7 @@ Bool ParseRepeatAttr( TidyDocImpl* doc, const TidyOptionImpl* option )
return ok;
}
+#if SUPPORT_UTF16_ENCODINGS
Bool ParseBOM( TidyDocImpl* doc, const TidyOptionImpl* option )
{
uint flag = 0;
@@ -1179,6 +1359,7 @@ Bool ParseBOM( TidyDocImpl* doc, const TidyOptionImpl* option )
}
return ok;
}
+#endif
/* Use TidyOptionId as iterator.
** Send index of 1st option after TidyOptionUnknown as start of list.
@@ -1196,7 +1377,7 @@ const TidyOptionImpl* getNextOption( TidyDocImpl* doc, TidyIterator* iter )
const TidyOptionImpl* option = null;
TidyOptionId optId;
assert( iter != null );
- optId = (TidyOptionId) *iter;
+ optId = *(TidyOptionId *) iter;
if ( optId > TidyUnknownOption && optId < N_TIDY_OPTIONS )
{
option = &option_defs[ optId ];
@@ -1230,7 +1411,8 @@ ctmbstr getNextOptionPick( const TidyOptionImpl* option,
return val;
}
-static int WriteOptionString( TidyOptionImpl* option, ctmbstr sval, StreamOut* out )
+static int WriteOptionString( const TidyOptionImpl* option,
+ ctmbstr sval, StreamOut* out )
{
ctmbstr cp = option->name;
while ( *cp )
@@ -1244,23 +1426,23 @@ static int WriteOptionString( TidyOptionImpl* option, ctmbstr sval, StreamOut*
return 0;
}
-static int WriteOptionInt( TidyOptionImpl* option, uint ival, StreamOut* out )
+static int WriteOptionInt( const TidyOptionImpl* option, uint ival, StreamOut* out )
{
tmbchar sval[ 32 ] = {0};
sprintf( sval, "%d", ival );
return WriteOptionString( option, sval, out );
}
-static int WriteOptionBool( TidyOptionImpl* option, Bool bval, StreamOut* out )
+static int WriteOptionBool( const TidyOptionImpl* option, Bool bval, StreamOut* out )
{
ctmbstr sval = bval ? "yes" : "no";
return WriteOptionString( option, sval, out );
}
-static int WriteOptionPick( TidyOptionImpl* option, uint ival, StreamOut* out )
+static int WriteOptionPick( const TidyOptionImpl* option, uint ival, StreamOut* out )
{
uint ix;
- ctmbstr* val = option->pickList;
+ const ctmbstr* val = option->pickList;
for ( ix=0; val[ix] && ixconfig.value;
for ( /**/; !diff && option && option->name; ++option, ++ival )
{
@@ -1291,20 +1473,35 @@ Bool ConfigDiffThanDefault( TidyDocImpl* doc )
int SaveConfigToStream( TidyDocImpl* doc, StreamOut* out )
{
int rc = 0;
- TidyOptionImpl* option;
+ const TidyOptionImpl* option;
for ( option=option_defs+1; 0==rc && option && option->name; ++option )
{
uint ival = doc->config.value[ option->id ];
if ( option->parser == null )
continue;
- if ( ival == option->dflt )
+ if ( ival == option->dflt && option->id != TidyDoctype)
continue;
if ( option->id == TidyDoctype ) /* Special case */
{
uint dtmode = cfg( doc, TidyDoctypeMode );
if ( dtmode == TidyDoctypeUser )
- rc = WriteOptionString( option, (ctmbstr)ival, out );
+ {
+ tmbstr t;
+
+ if (( t = MemAlloc( tmbstrlen( (ctmbstr)ival) + 2 ) )) /* add 2 double quotes */
+ {
+ t[0] = '\"'; t[1] = 0;
+
+ tmbstrcat( t, (ctmbstr)ival );
+ tmbstrcat( t, "\"" );
+ rc = WriteOptionString( option, (ctmbstr)t, out );
+
+ MemFree( t );
+ }
+ }
+ else if ( dtmode == option_defs[TidyDoctypeMode].dflt )
+ continue;
else
rc = WriteOptionPick( option, dtmode, out );
}
@@ -1334,10 +1531,11 @@ int SaveConfigFile( TidyDocImpl* doc, ctmbstr cfgfil )
int status = -1;
StreamOut* out = null;
uint outenc = cfg( doc, TidyOutCharEncoding );
- FILE* fout = fopen( cfgfil, "w" );
+ uint nl = cfg( doc, TidyNewline );
+ FILE* fout = fopen( cfgfil, "wb" );
if ( fout )
{
- out = FileOutput( fout, outenc );
+ out = FileOutput( fout, outenc, nl );
status = SaveConfigToStream( doc, out );
fclose( fout );
MemFree( out );
@@ -1348,7 +1546,8 @@ int SaveConfigFile( TidyDocImpl* doc, ctmbstr cfgfil )
int SaveConfigSink( TidyDocImpl* doc, TidyOutputSink* sink )
{
uint outenc = cfg( doc, TidyOutCharEncoding );
- StreamOut* out = UserOutput( sink, outenc );
+ uint nl = cfg( doc, TidyNewline );
+ StreamOut* out = UserOutput( sink, outenc, nl );
int status = SaveConfigToStream( doc, out );
MemFree( out );
return status;
diff --git a/tidylib-src/src/config.h b/tidylib-src/src/config.h
old mode 100755
new mode 100644
index 7ea8461b..536a28b6
--- a/tidylib-src/src/config.h
+++ b/tidylib-src/src/config.h
@@ -9,8 +9,8 @@
CVS Info :
$Author: creitzel $
- $Date: 2002/08/08 21:40:36 $
- $Revision: 1.1.2.4 $
+ $Date: 2003/02/16 19:33:10 $
+ $Revision: 1.2 $
config files associate a property name with a value.
@@ -44,7 +44,7 @@ struct _tidy_option
TidyOptionType type; /* string, int or bool */
uint dflt; /* factory default */
ParseProperty* parser; /* parsing method, read-only if null */
- ctmbstr* pickList; /* pick list */
+ const ctmbstr* pickList; /* pick list */
};
@@ -163,10 +163,11 @@ ParseProperty ParseString;
/* a space or comma separated list of tag names */
ParseProperty ParseTagNames;
-/* RAW, ASCII, LATIN1, UTF8, ISO2022, MACROMAN,
- UTF16LE, UTF16BE, UTF16, WIN1252, BIG5, SHIFTJIS
+/* RAW, ASCII, LATIN0, LATIN1, UTF8, ISO2022, MACROMAN,
+ WIN1252, IBM858, UTF16LE, UTF16BE, UTF16, BIG5, SHIFTJIS
*/
ParseProperty ParseCharEnc;
+ParseProperty ParseNewline;
/* specific to the indent option - Bool and 'auto' */
ParseProperty ParseIndent;
diff --git a/tidylib-src/src/entities.c b/tidylib-src/src/entities.c
old mode 100755
new mode 100644
index 6cebf2c0..675caf98
--- a/tidylib-src/src/entities.c
+++ b/tidylib-src/src/entities.c
@@ -5,9 +5,9 @@
CVS Info :
- $Author: terry_teague $
- $Date: 2002/07/08 07:41:58 $
- $Revision: 1.7.2.2 $
+ $Author: creitzel $
+ $Date: 2003/02/16 19:33:10 $
+ $Revision: 1.8 $
Entity handling can be static because there are no config or
document-specific values. Lookup table is 100% defined at
@@ -30,13 +30,10 @@ struct _entity
ctmbstr name;
uint versions;
uint code;
-#if 0
- entity* next;
-#endif
};
-static entity entities[] =
+static const entity entities[] =
{
/*
** Markup pre-defined character entities
@@ -309,82 +306,6 @@ static entity entities[] =
{ null, 0, 0 }
};
-#if 0
-static entity* hashtab[ ENTITY_HASHSIZE ] = {0};
-
-static unsigned hash( ctmbstr s )
-{
- uint hashval;
- for (hashval = 0; *s != '\0'; s++)
- hashval = *s + 31*hashval;
- return hashval % ENTITY_HASHSIZE;
-}
-
-/* Don't dupe strings. Only literal constants are used.
-*/
-static entity* install( ctmbstr name, uint code, uint versions )
-{
- entity *np = lookup( name );
-
- if ( name == null )
- return null;
-
- if ( np == null )
- {
- uint hashval;
- np = (entity*) MemAlloc( sizeof(*np) );
- if ( np == null )
- return null;
-
- hashval = hash(name);
- np->next = hashtab[hashval];
- np->name = name;
- hashtab[hashval] = np;
- }
-
- np->code = code;
- np->versions = versions;
- return np;
-}
-
-static entity* lookup( ctmbstr s )
-{
- entity *np;
- for ( np = hashtab[hash(s)]; np != null; np = np->next )
- if ( tmbstrcmp(s, np->name) == 0 )
- return np;
- return null;
-}
-
-void InitEntities()
-{
- entity *ep;
- for ( ep = entities; ep->name != null; ++ep )
- install( ep->name, ep->code, ep->versions );
-}
-
-
-void FreeEntities()
-{
- entity* prev, *next;
- int i;
-
- for (i = 0; i < ENTITY_HASHSIZE; ++i)
- {
- prev = null;
- next = hashtab[i];
-
- while(next)
- {
- prev = next->next;
- MemFree(next);
- next = prev;
- }
-
- hashtab[i] = null;
- }
-}
-#else
/* Pure static implementation. Trades off lookup speed
** for faster setup time (well, none actually).
@@ -392,10 +313,10 @@ void FreeEntities()
** speed that hash doesn't improve things without > 500
** items in list.
*/
-static entity* lookup( ctmbstr s )
+static const entity* lookup( ctmbstr s )
{
tmbchar ch = (tmbchar)( s ? *s : 0 );
- entity *np;
+ const entity *np;
for ( np = entities; ch && np && np->name; ++np )
if ( ch == *np->name && tmbstrcmp(s, np->name) == 0 )
return np;
@@ -409,19 +330,18 @@ void InitEntities()
void FreeEntities()
{
}
-#endif
/* entity starting with "&" returns zero on error */
uint EntityCode( ctmbstr name, uint versions )
{
- entity* np;
+ const entity* np;
assert( name && name[0] == '&' );
/* numeric entitity: name = "" followed by number */
if ( name[1] == '#' )
{
uint c = 0; /* zero on missing/bad number */
- Bool isXml = ( versions & VERS_XML == VERS_XML );
+ Bool isXml = ( (versions & VERS_XML) == VERS_XML );
/* 'x' prefix denotes hexadecimal number format */
if ( name[2] == 'x' || (!isXml && name[2] == 'X') )
@@ -435,13 +355,6 @@ uint EntityCode( ctmbstr name, uint versions )
/* Named entity: name ="&" followed by a name */
if ( np = lookup(name+1) )
{
-#if 0
- /* if input is treated as XML (-xml) only accept general entities */
- /* i.e. only amp, gt, lt, quot, apos */
- if ( isXml && !(np->code == 34 || np->code == 38 || np->code == 39 ||
- np->code == 60 || np->code == 62))
- return 0;
-#endif
/* Only recognize entity name if version supports it. */
if ( np->versions & versions )
return np->code;
@@ -454,7 +367,7 @@ uint EntityCode( ctmbstr name, uint versions )
ctmbstr EntityName( uint ch, uint versions )
{
ctmbstr entnam = null;
- entity *ep;
+ const entity *ep;
for ( ep = entities; ep->name != null; ++ep )
{
diff --git a/tidylib-src/src/entities.h b/tidylib-src/src/entities.h
old mode 100755
new mode 100644
index 7287e859..901f31ba
--- a/tidylib-src/src/entities.h
+++ b/tidylib-src/src/entities.h
@@ -8,9 +8,9 @@
CVS Info :
- $Author: terry_teague $
- $Date: 2002/07/08 07:42:13 $
- $Revision: 1.1.2.2 $
+ $Author: creitzel $
+ $Date: 2003/02/16 19:33:10 $
+ $Revision: 1.2 $
*/
diff --git a/tidylib-src/src/fileio.c b/tidylib-src/src/fileio.c
old mode 100755
new mode 100644
index e564ef39..c31309e9
--- a/tidylib-src/src/fileio.c
+++ b/tidylib-src/src/fileio.c
@@ -6,8 +6,8 @@
CVS Info :
$Author: creitzel $
- $Date: 2002/07/28 18:10:15 $
- $Revision: 1.1.2.3 $
+ $Date: 2003/02/16 19:33:10 $
+ $Revision: 1.2 $
Default implementations of Tidy input sources
and output sinks based on standard C FILE*.
@@ -33,14 +33,14 @@ int filesrc_getByte( uint sourceData )
if ( fin->unget.size > 0 )
bv = tidyBufPopByte( &fin->unget );
else
- bv = getc( fin->fp );
+ bv = fgetc( fin->fp );
return bv;
}
Bool filesrc_eof( uint sourceData )
{
FileSource* fin = (FileSource*) sourceData;
- Bool isEOF = ( fin->unget.size > 0 );
- if ( !isEOF )
+ Bool isEOF = ( fin->unget.size == 0 );
+ if ( isEOF )
isEOF = feof( fin->fp );
return isEOF;
}
@@ -76,7 +76,7 @@ void freeFileSource( TidyInputSource* inp, Bool closeIt )
void filesink_putByte( uint sinkData, byte bv )
{
FILE* fout = (FILE*) sinkData;
- putc( bv, fout );
+ fputc( bv, fout );
}
void initFileSink( TidyOutputSink* outp, FILE* fp )
diff --git a/tidylib-src/src/forward.h b/tidylib-src/src/forward.h
old mode 100755
new mode 100644
index 58ee2424..72aa3bae
--- a/tidylib-src/src/forward.h
+++ b/tidylib-src/src/forward.h
@@ -8,9 +8,9 @@
CVS Info :
- $Author: terry_teague $
- $Date: 2002/07/08 07:42:40 $
- $Revision: 1.1.2.5 $
+ $Author: creitzel $
+ $Date: 2003/02/16 19:33:10 $
+ $Revision: 1.2 $
Avoids many include file circular dependencies.
diff --git a/tidylib-src/src/istack.c b/tidylib-src/src/istack.c
old mode 100755
new mode 100644
index cdb57d0b..f17f5a92
--- a/tidylib-src/src/istack.c
+++ b/tidylib-src/src/istack.c
@@ -5,9 +5,9 @@
CVS Info :
- $Author: terry_teague $
- $Date: 2002/07/14 01:10:55 $
- $Revision: 1.7.2.4 $
+ $Author: creitzel $
+ $Date: 2003/02/16 19:33:10 $
+ $Revision: 1.8 $
*/
diff --git a/tidylib-src/src/lexer.c b/tidylib-src/src/lexer.c
old mode 100755
new mode 100644
index 41c7febf..a441a966
--- a/tidylib-src/src/lexer.c
+++ b/tidylib-src/src/lexer.c
@@ -1,13 +1,13 @@
/* lexer.c -- Lexer for html parser
- (c) 1998-2002 (W3C) MIT, INRIA, Keio University
+ (c) 1998-2003 (W3C) MIT, INRIA, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: creitzel $
- $Date: 2002/11/02 20:15:32 $
- $Revision: 1.72.2.9 $
+ $Date: 2003/02/20 16:39:47 $
+ $Revision: 1.76 $
*/
@@ -45,8 +45,6 @@
#include "clean.h"
#include "utf8.h"
-extern tmbstr release_date;
-
/* Forward references
*/
@@ -87,7 +85,7 @@ struct _vers
ctmbstr profile;
uint code;
}
-W3C_Version[] =
+const W3C_Version[] =
{
{"HTML 4.01", "XHTML 1.0 Strict", voyager_strict, VERS_HTML40_STRICT},
{"HTML 4.01 Transitional", "XHTML 1.0 Transitional", voyager_loose, VERS_HTML40_LOOSE},
@@ -580,7 +578,8 @@ Lexer* NewLexer()
Bool EndOfInput( TidyDocImpl* doc )
{
- return doc->docIn->source.eof( doc->docIn->source.sourceData );
+ assert( doc->docIn != null );
+ return ( !doc->docIn->pushed && IsEOF(doc->docIn) );
}
void FreeLexer( TidyDocImpl* doc )
@@ -601,6 +600,9 @@ void FreeLexer( TidyDocImpl* doc )
FreeNode( doc, doc->root );
doc->root = null;
+
+ FreeNode( doc, doc->givenDoctype );
+ doc->givenDoctype = null;
}
/* Lexer uses bigger memory chunks than pprint as
@@ -1257,9 +1259,9 @@ Bool AddGenerator( TidyDocImpl* doc )
{
#ifdef PLATFORM_NAME
sprintf( buf, "HTML Tidy for "PLATFORM_NAME" (vers %s), see www.w3.org",
- release_date );
+ tidyReleaseDate() );
#else
- sprintf( buf, "HTML Tidy (vers %s), see www.w3.org", release_date );
+ sprintf( buf, "HTML Tidy (vers %s), see www.w3.org", tidyReleaseDate() );
#endif
for ( node = head->content; node; node = node->next )
@@ -1349,11 +1351,15 @@ int FindGivenVersion( TidyDocImpl* doc, Node* doctype )
/**/;
len = j - i - 13;
- for (j = 1; j < W3C_VERSIONS; ++j)
+ for (j = 0; j < W3C_VERSIONS; ++j)
{
nm = W3C_Version[j].name;
if ( len == tmbstrlen(nm) && tmbstrncmp(p, nm, len) == 0 )
return W3C_Version[j].code;
+
+ nm = W3C_Version[j].voyager_name;
+ if ( len == tmbstrlen(nm) && tmbstrncmp(p, nm, len) == 0 )
+ return W3C_Version[j].code;
}
/* else unrecognized version */
@@ -1416,20 +1422,26 @@ Bool CheckDocTypeKeyWords(Lexer *lexer, Node *doctype)
ctmbstr HTMLVersionName( TidyDocImpl* doc )
{
- uint j, guessed = ApparentVersion( doc );
+ uint vers = ApparentVersion( doc );
+ return HTMLVersionNameFromCode( vers, doc->lexer->isvoyager );
+}
- for ( j = 0; j < W3C_VERSIONS; ++j )
+ctmbstr HTMLVersionNameFromCode( uint vers, Bool isXhtml )
+{
+ int ix;
+ for ( ix=0; ix < W3C_VERSIONS; ++ix )
+ {
+ if ( vers == W3C_Version[ix].code )
{
- if ( guessed == W3C_Version[j].code )
- {
- if ( doc->lexer->isvoyager )
- return W3C_Version[j].voyager_name;
- return W3C_Version[j].name;
- }
+ if ( isXhtml )
+ return W3C_Version[ix].voyager_name;
+ return W3C_Version[ix].name;
}
- return null;
+ }
+ return "HTML Proprietary";
}
+
static void FixHTMLNameSpace( TidyDocImpl* doc, ctmbstr profile )
{
Node* node = FindHTML( doc );
@@ -1465,7 +1477,8 @@ static void FixHTMLNameSpace( TidyDocImpl* doc, ctmbstr profile )
** etc. that may precede the tag.
*/
-static Node* NewXhtmlDocTypeNode( TidyDocImpl* doc )
+
+static Node* NewDocTypeNode( TidyDocImpl* doc )
{
Node* doctype = null;
Node* html = FindHTML( doc );
@@ -1580,7 +1593,7 @@ Bool SetXHTMLDocType( TidyDocImpl* doc )
}
else
{
- if ( !(doctype = NewXhtmlDocTypeNode( doc )) )
+ if ( !(doctype = NewDocTypeNode( doc )) )
return no;
}
@@ -1747,8 +1760,9 @@ Bool FixDocType( TidyDocImpl* doc )
return no;
/* for XML use the Voyager system identifier */
- if ( cfgBool(doc, TidyXmlOut) || cfgBool(doc, TidyXmlTags) ||
- lexer->isvoyager )
+ if ( !cfgBool(doc, TidyHtmlOut) &&
+ ( cfgBool(doc, TidyXmlOut) || cfgBool(doc, TidyXmlTags) ||
+ lexer->isvoyager ) )
{
if ( doctype )
DiscardElement( doc, doctype );
@@ -1758,7 +1772,7 @@ Bool FixDocType( TidyDocImpl* doc )
if ( !doctype )
{
- if ( !(doctype = NewXhtmlDocTypeNode( doc )) )
+ if ( !(doctype = NewDocTypeNode( doc )) )
return no;
}
@@ -3207,7 +3221,7 @@ static int ParseServerInstruction( TidyDocImpl* doc )
/* values start with "=" or " = " etc. */
/* doesn't consume the ">" at end of start tag */
-tmbstr ParseValue( TidyDocImpl* doc, ctmbstr name,
+static tmbstr ParseValue( TidyDocImpl* doc, ctmbstr name,
Bool foldCase, Bool *isempty, int *pdelim)
{
Lexer* lexer = doc->lexer;
@@ -3602,13 +3616,19 @@ AttVal* ParseAttrs( TidyDocImpl* doc, Bool *isempty )
ReportAttrError(lexer, lexer->token, av, MISSING_ATTR_VALUE);
else
ReportAttrError(lexer, lexer->token, av, BAD_ATTRIBUTE_VALUE);
- */
if (value != null)
ReportAttrError( doc, lexer->token, av, BAD_ATTRIBUTE_VALUE);
else if (LastChar(attribute) == '"')
ReportAttrError( doc, lexer->token, av, MISSING_QUOTEMARK);
else
ReportAttrError( doc, lexer->token, av, UNKNOWN_ATTRIBUTE);
+ */
+ if (LastChar(attribute) == '"')
+ ReportAttrError( doc, lexer->token, av, MISSING_QUOTEMARK);
+ else if (value == null)
+ ReportAttrError(doc, lexer->token, av, MISSING_ATTR_VALUE);
+ else
+ ReportAttrError(doc, lexer->token, av, INVALID_ATTRIBUTE);
FreeAttribute(av);
}
diff --git a/tidylib-src/src/lexer.h b/tidylib-src/src/lexer.h
old mode 100755
new mode 100644
index 28d3214c..fd466cf2
--- a/tidylib-src/src/lexer.h
+++ b/tidylib-src/src/lexer.h
@@ -8,8 +8,8 @@
CVS Info:
$Author: creitzel $
- $Date: 2002/08/11 16:41:40 $
- $Revision: 1.1.2.5 $
+ $Date: 2003/02/16 19:33:10 $
+ $Revision: 1.2 $
*/
@@ -200,13 +200,13 @@ struct _StyleProp
struct _AttVal
{
- AttVal* next;
- Attribute* dict;
- Node* asp;
- Node* php;
- int delim;
- tmbstr attribute;
- tmbstr value;
+ AttVal* next;
+ const Attribute* dict;
+ Node* asp;
+ Node* php;
+ int delim;
+ tmbstr attribute;
+ tmbstr value;
};
@@ -229,7 +229,7 @@ struct _AttVal
struct _IStack
{
IStack* next;
- Dict* tag; /* tag's dictionary definition */
+ const Dict* tag; /* tag's dictionary definition */
tmbstr element; /* name (null for text nodes) */
AttVal* attributes;
};
@@ -248,8 +248,8 @@ struct _Node
Node* last;
AttVal* attributes;
- Dict* was; /* old tag when it was changed */
- Dict* tag; /* tag's dictionary definition */
+ const Dict* was; /* old tag when it was changed */
+ const Dict* tag; /* tag's dictionary definition */
tmbstr element; /* name (null for text nodes) */
@@ -340,12 +340,6 @@ struct _Lexer
*/
Node *CommentToken( Lexer *lexer );
-/* used to classify ASCII characters for lexical purposes.
-*/
-#define MAP(c) ((unsigned)c < 128 ? lexmap[(unsigned)c] : 0)
-extern uint lexmap[128];
-
-
#define XHTML_NAMESPACE "http://www.w3.org/1999/xhtml"
@@ -462,6 +456,7 @@ int ApparentVersion( TidyDocImpl* doc );
Bool CheckDocTypeKeyWords(Lexer *lexer, Node *doctype);
ctmbstr HTMLVersionName( TidyDocImpl* doc );
+ctmbstr HTMLVersionNameFromCode( uint vers, Bool isXhtml );
Bool SetXHTMLDocType( TidyDocImpl* doc );
diff --git a/tidylib-src/src/localize.c b/tidylib-src/src/localize.c
old mode 100755
new mode 100644
index 35f47bc2..56957658
--- a/tidylib-src/src/localize.c
+++ b/tidylib-src/src/localize.c
@@ -1,6 +1,6 @@
/* localize.c -- text strings and routines to handle errors and general messages
- (c) 1998-2002 (W3C) MIT, INRIA, Keio University
+ (c) 1998-2003 (W3C) MIT, INRIA, Keio University
See tidy.h for the copyright notice.
You should only need to edit this file and tidy.c
@@ -9,8 +9,8 @@
CVS Info :
$Author: terry_teague $
- $Date: 2002/11/04 08:45:00 $
- $Revision: 1.60.2.10 $
+ $Date: 2003/03/02 04:30:09 $
+ $Revision: 1.68 $
*/
@@ -29,7 +29,7 @@
*/
#define ATRC_ACCESS_URL "http://www.aprompt.ca/Tidy/accessibilitychecks.html"
-char *release_date = "1st November 2002";
+const static char *release_date = "1st March 2003";
ctmbstr ReleaseDate()
{
@@ -111,7 +111,7 @@ static char* ReportPosition( TidyDocImpl* doc, int line, int col, char* buf )
/* Change formatting to be parsable by GNU Emacs */
if ( cfgBool(doc, TidyEmacs) && cfgStr(doc, TidyEmacsFile) )
- sprintf( buf, "%s:%d:%d:",
+ sprintf( buf, "%s:%d:%d: ",
cfgStr(doc, TidyEmacsFile), line, col );
else /* traditional format */
sprintf( buf, "line %d column %d - ", line, col );
@@ -406,9 +406,9 @@ void ReportAttrError( TidyDocImpl* doc, Node *node, AttVal *av, uint code)
"unknown attribute \"%s\"", name );
break;
- case MISSING_ATTRIBUTE:
+ case INSERTING_ATTRIBUTE:
messageNode( doc, TidyWarning, node,
- "%s lacks \"%s\" attribute", tagdesc, name );
+ "inserting \"%s\" attribute for %s element", name, tagdesc );
break;
case MISSING_ATTR_VALUE:
@@ -428,6 +428,11 @@ void ReportAttrError( TidyDocImpl* doc, Node *node, AttVal *av, uint code)
tagdesc, name, value );
break;
+ case INVALID_ATTRIBUTE:
+ messageNode( doc, TidyWarning, node,
+ "%s attribute name \"%s\" (value=\"%s\") is invalid",
+ tagdesc, name, value );
+ break;
case XML_ID_SYNTAX:
messageNode( doc, TidyWarning, node,
"%s ID \"%s\" uses XML ID syntax", tagdesc, value );
@@ -537,13 +542,42 @@ void ReportAttrError( TidyDocImpl* doc, Node *node, AttVal *av, uint code)
}
}
+
+void ReportNonCompliantAttr( TidyDocImpl* doc, Node* node, AttVal* attr, uint versWanted )
+{
+ ctmbstr attrnam = ( attr && attr->attribute ? attr->attribute : "Unknown" );
+ ctmbstr htmlVer = HTMLVersionNameFromCode( versWanted, doc->lexer->isvoyager );
+ messageNode( doc, TidyWarning, node,
+ "Attribute \"%s\" not supported in %s", attrnam, htmlVer );
+}
+
+void ReportNonCompliantNode( TidyDocImpl* doc, Node* node, uint code, uint versWanted )
+{
+ char desc[ 256 ] = {0};
+ ctmbstr htmlVer = HTMLVersionNameFromCode( versWanted, doc->lexer->isvoyager );
+ TagToString( node, desc );
+
+ switch ( code )
+ {
+ case MIXED_CONTENT_IN_BLOCK:
+ messageNode( doc, TidyWarning, node,
+ "Text node in %s in %s", desc, htmlVer );
+ break;
+
+ case OBSOLETE_ELEMENT:
+ messageNode( doc, TidyWarning, node,
+ "Element %s not supported in %s", desc, htmlVer );
+ break;
+ }
+}
+
void ReportMissingAttr( TidyDocImpl* doc, Node* node, ctmbstr name )
{
/* ReportAttrError( doc, node, null, MISSING_ATTRIBUTE ); */
char tagdesc[ 64 ];
TagToString( node, tagdesc );
messageNode( doc, TidyWarning, node,
- "%s attribute \"%s\" lacks value", tagdesc, name );
+ "%s lacks \"%s\" attribute", tagdesc, name );
}
void ReportWarning( TidyDocImpl* doc, Node *element, Node *node, uint code )
@@ -654,7 +688,7 @@ void ReportWarning( TidyDocImpl* doc, Node *element, Node *node, uint code )
TagToString( element, elemdesc );
messageNode( doc, TidyWarning, rpt,
- "replacing %selement %s by %s",
+ "replacing %s element %s by %s",
obsolete, elemdesc, nodedesc );
}
break;
@@ -748,6 +782,11 @@ void ReportWarning( TidyDocImpl* doc, Node *element, Node *node, uint code )
messageNode( doc, TidyWarning, element,
"%s element not empty or not closed", elemdesc );
break;
+
+ case ENCODING_IO_CONFLICT:
+ messageNode( doc, TidyWarning, node,
+ "Output encoding does not work with standard output" );
+ break;
}
}
@@ -776,7 +815,11 @@ void ReportError( TidyDocImpl* doc, Node *element, Node *node, uint code)
if (element)
messageNode( doc, TidyError, node, "unexpected %s> in <%s>",
node->element, element->element );
- else
+#if defined(__arm)
+ if (!element)
+#else
+ else
+#endif
messageNode( doc, TidyError, node, "unexpected %s>",
node->element );
break;
@@ -787,10 +830,15 @@ void ErrorSummary( TidyDocImpl* doc )
{
/* adjust badAccess to that its null if frames are ok */
ctmbstr encnam = "specified";
- if ( doc->docIn->encoding == WIN1252 )
+ int charenc = cfg( doc, TidyCharEncoding );
+ if ( charenc == WIN1252 )
encnam = "Windows-1252";
- else if ( doc->docIn->encoding == MACROMAN )
+ else if ( charenc == MACROMAN )
encnam = "MacRoman";
+ else if ( charenc == IBM858 )
+ encnam = "ibm858";
+ else if ( charenc == LATIN0 )
+ encnam = "latin0";
if ( doc->badAccess & (USING_FRAMES | USING_NOFRAMES) )
{
@@ -1040,8 +1088,7 @@ void HelloMessage( TidyDocImpl* doc, ctmbstr date, ctmbstr filename )
void ReportMarkupVersion( TidyDocImpl* doc )
{
- ctmbstr vers = HTMLVersionName( doc );
- Node* doctype = FindDocType( doc );
+ Node* doctype = doc->givenDoctype;
if ( doctype )
{
@@ -1069,8 +1116,13 @@ void ReportMarkupVersion( TidyDocImpl* doc )
message( doc, TidyInfo, "Doctype given is \"%s\"", buf );
}
- message( doc, TidyInfo, "Document content looks like %s",
- (vers ? vers : "HTML proprietary") );
+ if ( ! cfgBool(doc, TidyXmlTags) )
+ {
+ uint apparentVers = HTMLVersion( doc );
+ Bool isXhtml = doc->lexer->isvoyager;
+ ctmbstr vers = HTMLVersionNameFromCode( apparentVers, isXhtml );
+ message( doc, TidyInfo, "Document content looks like %s", vers );
+ }
}
void ReportNumWarnings( TidyDocImpl* doc )
@@ -1093,7 +1145,7 @@ void ReportNumWarnings( TidyDocImpl* doc )
void HelpText( TidyDocImpl* doc, ctmbstr prog )
{
- tidy_out(doc, "%s [option...] [file...]\n", prog );
+ tidy_out(doc, "%s [option...] [file...] [option...] [file...]\n", prog );
tidy_out(doc, "Utility to clean up and pretty print HTML/XHTML/XML\n");
tidy_out(doc, "see http://tidy.sourgeforge.net/\n");
tidy_out(doc, "\n");
@@ -1106,10 +1158,21 @@ void HelpText( TidyDocImpl* doc, ctmbstr prog )
#endif
tidy_out(doc, "\n");
+ tidy_out(doc, "File manipulation\n");
+ tidy_out(doc, "-----------------\n");
+ tidy_out(doc, " -o to write output markup to specified \n");
+ tidy_out(doc, " -config to set configuration options from the specified \n");
+ tidy_out(doc, " -f to write errors to the specified \n");
+ tidy_out(doc, " -modify or -m to modify the original input files\n");
+ tidy_out(doc, "\n");
+
tidy_out(doc, "Processing directives\n");
tidy_out(doc, "---------------------\n");
+ tidy_out(doc, " -asxhtml to convert HTML to well formed XHTML\n");
+ tidy_out(doc, " -ashtml to force XHTML to (non-XML) HTML\n");
+ tidy_out(doc, " -xml to specify the input is XML\n");
+ tidy_out(doc, " -asxml to convert input to well formed XML\n");
tidy_out(doc, " -indent or -i to indent element content\n");
- tidy_out(doc, " -omit or -o to omit optional end tags\n");
tidy_out(doc, " -wrap to wrap text at the specified (default is 68)\n");
tidy_out(doc, " -upper or -u to force tags to upper case (default is lower case)\n");
tidy_out(doc, " -clean or -c to replace FONT, NOBR and CENTER tags by CSS\n");
@@ -1117,11 +1180,7 @@ void HelpText( TidyDocImpl* doc, ctmbstr prog )
tidy_out(doc, " -numeric or -n to output numeric rather than named entities\n");
tidy_out(doc, " -errors or -e to only show errors\n");
tidy_out(doc, " -quiet or -q to suppress nonessential output\n");
- tidy_out(doc, " -xml to specify the input is well formed XML\n");
- tidy_out(doc, " -asxml to convert HTML to well formed XHTML\n");
- tidy_out(doc, " -asxhtml to convert HTML to well formed XHTML\n");
- tidy_out(doc, " -ashtml to force XHTML to well formed HTML\n");
- tidy_out(doc, " -slides to burst into slides on H2 elements\n");
+ tidy_out(doc, " -omit to omit optional end tags\n");
/* TRT */
#if SUPPORT_ACCESSIBILITY_CHECKS
@@ -1134,10 +1193,13 @@ void HelpText( TidyDocImpl* doc, ctmbstr prog )
tidy_out(doc, "-------------------\n");
tidy_out(doc, " -raw to output values above 127 without conversion to entities\n");
tidy_out(doc, " -ascii to use US-ASCII for output, ISO-8859-1 for input\n");
+ tidy_out(doc, " -latin0 to use ISO-8859-15 for input and US-ASCII for output\n");
tidy_out(doc, " -latin1 to use ISO-8859-1 for both input and output\n");
tidy_out(doc, " -iso2022 to use ISO-2022 for both input and output\n");
tidy_out(doc, " -utf8 to use UTF-8 for both input and output\n");
tidy_out(doc, " -mac to use MacRoman for input, US-ASCII for output\n");
+ tidy_out(doc, " -win1252 to use Windows-1252 for input, US-ASCII for output\n");
+ tidy_out(doc, " -ibm858 to use IBM-858 (CP850+Euro) for input, US-ASCII for output\n");
#if SUPPORT_UTF16_ENCODINGS
tidy_out(doc, " -utf16le to use UTF-16LE for both input and output\n");
@@ -1145,8 +1207,6 @@ void HelpText( TidyDocImpl* doc, ctmbstr prog )
tidy_out(doc, " -utf16 to use UTF-16 for both input and output\n");
#endif
- tidy_out(doc, " -win1252 to use Windows-1252 for input, US-ASCII for output\n");
-
#if SUPPORT_ASIAN_ENCODINGS
tidy_out(doc, " -big5 to use Big5 for both input and output\n"); /* #431953 - RJ */
tidy_out(doc, " -shiftjis to use Shift_JIS for both input and output\n"); /* #431953 - RJ */
@@ -1154,13 +1214,6 @@ void HelpText( TidyDocImpl* doc, ctmbstr prog )
#endif
tidy_out(doc, "\n");
- tidy_out(doc, "File manipulation\n");
- tidy_out(doc, "-----------------\n");
- tidy_out(doc, " -config to set configuration options from the specified \n");
- tidy_out(doc, " -f to write errors to the specified \n");
- tidy_out(doc, " -modify or -m to modify the original input files\n");
- tidy_out(doc, "\n");
-
tidy_out(doc, "Miscellaneous\n");
tidy_out(doc, "-------------\n");
tidy_out(doc, " -version or -v to show the version of Tidy\n");
diff --git a/tidylib-src/src/message.h b/tidylib-src/src/message.h
old mode 100755
new mode 100644
index 39f0bae8..399f4867
--- a/tidylib-src/src/message.h
+++ b/tidylib-src/src/message.h
@@ -9,8 +9,8 @@
CVS Info :
$Author: creitzel $
- $Date: 2002/07/28 18:10:16 $
- $Revision: 1.1.2.3 $
+ $Date: 2003/02/16 19:33:10 $
+ $Revision: 1.2 $
*/
@@ -72,6 +72,9 @@ void ReportMissingAttr( TidyDocImpl* doc, Node* node, ctmbstr name );
void ReportWarning( TidyDocImpl* doc, Node* element, Node* node, uint code );
void ReportError( TidyDocImpl* doc, Node* element, Node* node, uint code );
+void ReportNonCompliantAttr( TidyDocImpl* doc, Node* node, AttVal* attr, uint versWanted );
+void ReportNonCompliantNode( TidyDocImpl* doc, Node* node, uint code, uint versWanted );
+
/* error codes for entities/numeric character references */
#define MISSING_SEMICOLON 1
@@ -118,11 +121,13 @@ void ReportError( TidyDocImpl* doc, Node* element, Node* node, uint code );
#define UNESCAPED_ELEMENT 34
#define NESTED_QUOTATION 35
#define ELEMENT_NOT_EMPTY 36
+#define ENCODING_IO_CONFLICT 37
+#define MIXED_CONTENT_IN_BLOCK 38
/* error codes used for attribute messages */
#define UNKNOWN_ATTRIBUTE 1
-#define MISSING_ATTRIBUTE 2
+#define INSERTING_ATTRIBUTE 2
#define MISSING_ATTR_VALUE 3
#define BAD_ATTRIBUTE_VALUE 4
#define UNEXPECTED_GT 5
@@ -148,6 +153,8 @@ void ReportError( TidyDocImpl* doc, Node* element, Node* node, uint code );
#define ATTR_VALUE_NOT_LCASE 23
#define XML_ID_SYNTAX 24
+#define INVALID_ATTRIBUTE 25
+
/* page transition effects */
#define EFFECT_BLEND -1
diff --git a/tidylib-src/src/parser.c b/tidylib-src/src/parser.c
old mode 100755
new mode 100644
index a3febe94..1a42bc48
--- a/tidylib-src/src/parser.c
+++ b/tidylib-src/src/parser.c
@@ -6,8 +6,8 @@
CVS Info :
$Author: creitzel $
- $Date: 2002/11/02 20:16:28 $
- $Revision: 1.53.2.6 $
+ $Date: 2003/02/16 19:33:10 $
+ $Revision: 1.56 $
*/
@@ -75,7 +75,7 @@ Bool IsNewNode(Node *node)
void CoerceNode( TidyDocImpl* doc, Node *node, TidyTagId tid )
{
- Dict* tag = LookupTagDef( tid );
+ const Dict* tag = LookupTagDef( tid );
Node* tmp = InferredTag( doc, tag->name );
ReportWarning( doc, node, tmp, OBSOLETE_ELEMENT );
MemFree( tmp->element );
@@ -90,7 +90,7 @@ void CoerceNode( TidyDocImpl* doc, Node *node, TidyTagId tid )
}
/* extract a node and its children from a markup tree */
-void RemoveNode(Node *node)
+Node *RemoveNode(Node *node)
{
if (node->prev)
node->prev->next = node->next;
@@ -108,6 +108,7 @@ void RemoveNode(Node *node)
}
node->parent = node->prev = node->next = null;
+ return node;
}
/* remove node from markup tree and discard it */
@@ -233,6 +234,9 @@ static Bool CanPrune( TidyDocImpl* doc, Node *element )
if (element->content)
return no;
+ if ( element->tag->model & CM_BLOCK && element->attributes != null )
+ return no;
+
if ( nodeIsA(element) && element->attributes != null )
return no;
@@ -270,25 +274,24 @@ static Bool CanPrune( TidyDocImpl* doc, Node *element )
return yes;
}
-static void TrimEmptyElement( TidyDocImpl* doc, Node *element )
+Node *TrimEmptyElement( TidyDocImpl* doc, Node *element )
{
if ( CanPrune(doc, element) )
{
if ( element->type != TextNode )
ReportWarning( doc, element, null, TRIM_EMPTY_ELEMENT);
-
- DiscardElement(doc, element);
+ return DiscardElement(doc, element);
}
else if ( nodeIsP(element) && element->content == null )
{
/* Put a non-breaking space into empty paragraphs.
** Contrary to intent, replacing empty paragraphs
** with two
does not preserve formatting.
- ** Moving Lee Passey's fix over to TidyLib branch.
*/
char onesixty[2] = { '\240', 0 };
InsertNodeAtStart( element, NewLiteralTextNode(doc->lexer, onesixty) );
}
+ return element;
}
/*
@@ -298,7 +301,7 @@ static void TrimEmptyElement( TidyDocImpl* doc, Node *element )
static void BadForm( TidyDocImpl* doc )
{
doc->badForm = yes;
- doc->errors++;
+ /* doc->errors++; */
}
/*
@@ -501,8 +504,32 @@ static Bool InsertMisc(Node *element, Node *node)
node->type == SectionTag ||
node->type == AspTag ||
node->type == JsteTag ||
- node->type == PhpTag ||
- node->type == XmlDecl)
+ node->type == PhpTag )
+ {
+ InsertNodeAtEnd(element, node);
+ return yes;
+ }
+
+ if ( node->type == XmlDecl )
+ {
+ Node* root = element;
+ while ( root && root->parent )
+ root = root->parent;
+ if ( root )
+ {
+ InsertNodeAtStart( root, node );
+ return yes;
+ }
+ }
+
+ /* Declared empty tags seem to be slipping through
+ ** the cracks. This is an experiment to figure out
+ ** a decent place to pick them up.
+ */
+ if ( node->tag &&
+ (node->type == StartTag || node->type == StartEndTag) &&
+ nodeCMIsEmpty(node) && TagId(node) == TidyTag_UNKNOWN &&
+ (node->tag->versions & VERS_PROPRIETARY) != 0 )
{
InsertNodeAtEnd(element, node);
return yes;
@@ -1919,7 +1946,7 @@ static void FixEmptyRow(TidyDocImpl* doc, Node *row)
void ParseRow(TidyDocImpl* doc, Node *row, uint mode)
{
Lexer* lexer = doc->lexer;
- Node *node, *parent;
+ Node *node;
Bool exclude_state;
if (row->tag->model & CM_EMPTY)
@@ -1937,6 +1964,7 @@ void ParseRow(TidyDocImpl* doc, Node *row, uint mode)
return;
}
+ /* New row start implies end of current row */
UngetToken( doc );
FixEmptyRow( doc, row);
return;
@@ -1948,6 +1976,13 @@ void ParseRow(TidyDocImpl* doc, Node *row, uint mode)
*/
if ( node->type == EndTag )
{
+ if ( DescendantOf(row, TagId(node)) )
+ {
+ UngetToken( doc );
+ TrimEmptyElement( doc, row);
+ return;
+ }
+
if ( nodeIsFORM(node) || nodeHasCM(node, CM_BLOCK|CM_INLINE) )
{
if ( nodeIsFORM(node) )
@@ -1964,17 +1999,6 @@ void ParseRow(TidyDocImpl* doc, Node *row, uint mode)
FreeNode( doc, node);
continue;
}
-
- for (parent = row->parent;
- parent != null; parent = parent->parent)
- {
- if (node->tag == parent->tag)
- {
- UngetToken( doc );
- TrimEmptyElement( doc, row);
- return;
- }
- }
}
/* deal with comments etc. */
@@ -2427,8 +2451,8 @@ void ParseTableTag(TidyDocImpl* doc, Node *table, uint mode)
/* acceptable content for pre elements */
Bool PreContent( TidyDocImpl* doc, Node* node )
{
- /* p is coerced to br's */
- if ( nodeIsP(node) )
+ /* p is coerced to br's, Text OK too */
+ if ( nodeIsP(node) || nodeIsText(node) )
return yes;
if ( node->tag == null ||
@@ -2454,9 +2478,16 @@ void ParsePre( TidyDocImpl* doc, Node *pre, uint mode )
while ((node = GetToken(doc, Preformatted)) != null)
{
- if (node->tag == pre->tag && node->type == EndTag)
+ if ( node->type == EndTag &&
+ (node->tag == pre->tag || DescendantOf(pre, TagId(node))) )
{
- FreeNode( doc, node);
+ if ( node->tag == pre->tag )
+ FreeNode( doc, node);
+ else
+ {
+ ReportWarning( doc, pre, node, MISSING_ENDTAG_BEFORE );
+ UngetToken( doc );
+ }
TrimSpaces(doc, pre);
pre->closed = yes;
TrimEmptyElement(doc, pre);
diff --git a/tidylib-src/src/parser.h b/tidylib-src/src/parser.h
old mode 100755
new mode 100644
index 1d340b25..bfa2e241
--- a/tidylib-src/src/parser.h
+++ b/tidylib-src/src/parser.h
@@ -9,8 +9,8 @@
CVS Info :
$Author: creitzel $
- $Date: 2002/07/08 18:03:19 $
- $Revision: 1.1.2.4 $
+ $Date: 2003/02/16 19:33:11 $
+ $Revision: 1.2 $
*/
@@ -29,7 +29,7 @@ Bool IsNewNode(Node *node);
void CoerceNode( TidyDocImpl* doc, Node *node, TidyTagId tid );
/* extract a node and its children from a markup tree */
-void RemoveNode(Node *node);
+Node *RemoveNode(Node *node);
/* remove node from markup tree and discard it */
Node *DiscardElement( TidyDocImpl* doc, Node *element);
@@ -40,14 +40,16 @@ void InsertNodeAtStart(Node *element, Node *node);
/* insert node into markup tree */
void InsertNodeAtEnd(Node *element, Node *node);
-
-
/* insert node into markup tree before element */
void InsertNodeBeforeElement(Node *element, Node *node);
/* insert node into markup tree after element */
void InsertNodeAfterElement(Node *element, Node *node);
+Node *TrimEmptyElement( TidyDocImpl* doc, Node *element );
+
+
+
/* assumes node is a text node */
Bool IsBlank(Lexer *lexer, Node *node);
diff --git a/tidylib-src/src/pprint.c b/tidylib-src/src/pprint.c
old mode 100755
new mode 100644
index 552bb788..2d1568a5
--- a/tidylib-src/src/pprint.c
+++ b/tidylib-src/src/pprint.c
@@ -6,9 +6,9 @@
CVS Info :
- $Author: lpassey $
- $Date: 2002/10/29 23:53:51 $
- $Revision: 1.45.2.8 $
+ $Author: creitzel $
+ $Date: 2003/02/16 19:33:11 $
+ $Revision: 1.50 $
*/
@@ -1103,7 +1103,7 @@ static void PPrintAttrs( TidyDocImpl* doc, uint indent, Node *node )
{
if ( av->attribute != null )
{
- Attribute *dict = av->dict;
+ const Attribute *dict = av->dict;
if ( !cfgBool(doc, TidyDropPropAttrs) ||
( dict != null && !(dict->versions & VERS_PROPRIETARY) ) )
PPrintAttribute( doc, indent, node, av );
@@ -1327,13 +1327,23 @@ static void PPrintPI( TidyDocImpl* doc, uint indent, Node *node )
static void PPrintXmlDecl( TidyDocImpl* doc, uint indent, Node *node )
{
+ AttVal* att;
uint saveWrap;
TidyPrintImpl* pprint = &doc->pprint;
SetWrap( doc, indent );
saveWrap = WrapOff( doc );
AddString( pprint, "end <= 0 || doc->lexer->lexbuf[node->end - 1] != '?' )
AddChar( pprint, '?' );
AddChar( pprint, '>' );
@@ -1437,6 +1447,7 @@ static void PPrintSection( TidyDocImpl* doc, uint indent, Node *node )
#if 0
/*
** Print script and style elements. For XHTML, wrap the content as follows:
+**
** JavaScript:
** //
** CSS:
-** /**/
+** / *]]>* /
** other:
** content && nodeHasCM(node, CM_NO_INDENT) )
@@ -1847,13 +1861,16 @@ void PPrintTree( TidyDocImpl* doc, uint mode, uint indent, Node *node )
if ( node->type == StartEndTag )
node->type = StartTag;
- if ( node->tag && node->tag->parser == ParsePre )
+ if ( node->tag &&
+ (node->tag->parser == ParsePre || nodeIsTEXTAREA(node)) )
{
+ uint indprev = indent;
PCondFlushLine( doc, indent );
- indent = 0;
PCondFlushLine( doc, indent );
PPrintTag( doc, mode, indent, node );
+
+ indent = 0;
PFlushLine( doc, indent);
for ( content = node->content; content; content = content->next )
@@ -1861,9 +1878,9 @@ void PPrintTree( TidyDocImpl* doc, uint mode, uint indent, Node *node )
PPrintTree( doc, (mode | PREFORMATTED | NOWRAP),
indent, content );
}
+ indent = indprev;
PCondFlushLine( doc, indent );
PPrintEndTag( doc, mode, indent, node );
- PFlushLine( doc, indent );
if ( !cfg(doc, TidyIndentContent) && node->next != null )
PFlushLine( doc, indent );
diff --git a/tidylib-src/src/pprint.h b/tidylib-src/src/pprint.h
old mode 100755
new mode 100644
index c2f1fce9..96318346
--- a/tidylib-src/src/pprint.h
+++ b/tidylib-src/src/pprint.h
@@ -8,8 +8,8 @@
CVS Info:
$Author: creitzel $
- $Date: 2002/08/08 21:58:19 $
- $Revision: 1.1.2.3 $
+ $Date: 2003/02/16 19:33:11 $
+ $Revision: 1.2 $
*/
diff --git a/tidylib-src/src/streamio.c b/tidylib-src/src/streamio.c
old mode 100755
new mode 100644
index 4a0c343e..4b4fa6a4
--- a/tidylib-src/src/streamio.c
+++ b/tidylib-src/src/streamio.c
@@ -6,8 +6,8 @@
CVS Info :
$Author: creitzel $
- $Date: 2002/10/20 18:59:40 $
- $Revision: 1.1.2.9 $
+ $Date: 2003/02/16 19:33:11 $
+ $Revision: 1.2 $
Wrapper around Tidy input source and output sink
that calls appropriate interfaces, and applies
@@ -37,8 +37,10 @@ void UngetByte( StreamIn* in, uint byteValue );
void PutByte( uint byteValue, StreamOut* out );
-extern uint Mac2Unicode[];
-extern uint Win2Unicode[];
+void EncodeWin1252( uint c, StreamOut* out );
+void EncodeMacRoman( uint c, StreamOut* out );
+void EncodeIbm858( uint c, StreamOut* out );
+void EncodeLatin0( uint c, StreamOut* out );
void outcUTF8Bytes( StreamOut *out, byte* buf, int* count );
void outBOM( StreamOut *out );
@@ -51,6 +53,7 @@ static StreamOut stderrStreamOut =
{
ASCII,
FSM_ASCII,
+ DEFAULT_NL_CONFIG,
FileIO,
{ 0, filesink_putByte }
};
@@ -59,6 +62,7 @@ static StreamOut stdoutStreamOut =
{
ASCII,
FSM_ASCII,
+ DEFAULT_NL_CONFIG,
FileIO,
{ 0, filesink_putByte }
};
@@ -197,7 +201,7 @@ uint ReadChar( StreamIn *in )
if ( c == '\015' && !cfgBool(in->doc, TidyXmlTags) )
break;
- if (0 <= c && c < 32)
+ if ( c < 32 )
continue; /* discard control char */
/* watch out for chars that have already been decoded such as */
@@ -252,8 +256,18 @@ uint ReadChar( StreamIn *in )
#endif
/* Do first: acts on range 128 - 255 */
- if ( in->encoding == MACROMAN )
+ switch ( in->encoding )
+ {
+ case MACROMAN:
c = DecodeMacRoman( c );
+ break;
+ case IBM858:
+ c = DecodeIbm850( c );
+ break;
+ case LATIN0:
+ c = DecodeLatin0( c );
+ break;
+ }
/* produced e.g. as a side-effect of smart quotes in Word */
/* but can't happen if using MACROMAN encoding */
@@ -346,32 +360,33 @@ void UngetChar( uint c, StreamIn *in )
** Sink
************************/
-static StreamOut* initStreamOut( int encoding )
+static StreamOut* initStreamOut( int encoding, uint nl )
{
StreamOut* out = (StreamOut*) MemAlloc( sizeof(StreamOut) );
ClearMemory( out, sizeof(StreamOut) );
out->encoding = encoding;
out->state = FSM_ASCII;
+ out->nl = nl;
return out;
}
-StreamOut* FileOutput( FILE* fp, int encoding )
+StreamOut* FileOutput( FILE* fp, int encoding, uint nl )
{
- StreamOut* out = initStreamOut( encoding );
+ StreamOut* out = initStreamOut( encoding, nl );
initFileSink( &out->sink, fp );
out->iotype = FileIO;
return out;
}
-StreamOut* BufferOutput( TidyBuffer* buf, int encoding )
+StreamOut* BufferOutput( TidyBuffer* buf, int encoding, uint nl )
{
- StreamOut* out = initStreamOut( encoding );
+ StreamOut* out = initStreamOut( encoding, nl );
initOutputBuffer( &out->sink, buf );
out->iotype = BufferIO;
return out;
}
-StreamOut* UserOutput( TidyOutputSink* sink, int encoding )
+StreamOut* UserOutput( TidyOutputSink* sink, int encoding, uint nl )
{
- StreamOut* out = initStreamOut( encoding );
+ StreamOut* out = initStreamOut( encoding, nl );
memcpy( &out->sink, sink, sizeof(TidyOutputSink) );
out->iotype = UserIO;
return out;
@@ -379,52 +394,40 @@ StreamOut* UserOutput( TidyOutputSink* sink, int encoding )
void WriteChar( uint c, StreamOut* out )
{
+ /* Translate outgoing newlines */
+ if ( LF == c )
+ {
+ if ( out->nl == TidyCRLF )
+ WriteChar( CR, out );
+ else if ( out->nl == TidyCR )
+ c = CR;
+ }
+
if (out->encoding == MACROMAN)
{
- if (c < 128)
- PutByte(c, out);
- else
- {
- /* For mac users, map Unicode back to MacRoman. */
- int i;
- for (i = 128; i < 256; i++)
- {
- if (Mac2Unicode[i - 128] == c)
- {
- PutByte(i, out);
- break;
- }
- }
- }
+ EncodeMacRoman( c, out );
}
- else
-
- if (out->encoding == WIN1252)
+ else if (out->encoding == WIN1252)
{
- if (c < 128 || (c > 159 && c < 256))
- PutByte(c, out);
- else
- {
- int i;
-
- for (i = 128; i < 160; i++)
- if (Win2Unicode[i - 128] == c)
- {
- PutByte(i, out);
- break;
- }
- }
+ EncodeWin1252( c, out );
}
- else
-
- if (out->encoding == UTF8)
+ else if (out->encoding == IBM858)
+ {
+ EncodeIbm858( c, out );
+ }
+ else if (out->encoding == LATIN0)
+ {
+ EncodeLatin0( c, out );
+ }
+
+ else if (out->encoding == UTF8)
{
int count = 0;
EncodeCharToUTF8Bytes( c, null, &out->sink, &count );
if (count <= 0)
{
- /* ReportEncodingError(in->lexer, INVALID_UTF8 | REPLACED_CHAR, c); */
+ /* ReportEncodingError(in->lexer, INVALID_UTF8 | REPLACED_CHAR, c); */
/* replacement char 0xFFFD encoded as UTF-8 */
PutByte(0xEF, out); PutByte(0xBF, out); PutByte(0xBF, out);
}
@@ -471,7 +474,9 @@ void WriteChar( uint c, StreamOut* out )
}
#if SUPPORT_UTF16_ENCODINGS
- else if (out->encoding == UTF16LE || out->encoding == UTF16BE || out->encoding == UTF16)
+ else if ( out->encoding == UTF16LE ||
+ out->encoding == UTF16BE ||
+ out->encoding == UTF16 )
{
int i, numChars = 1;
uint theChars[2];
@@ -497,7 +502,7 @@ void WriteChar( uint c, StreamOut* out )
else
{
/* just put the char out */
- theChars[0] = (byte) c;
+ theChars[0] = c;
}
for (i = 0; i < numChars; i++)
@@ -546,13 +551,13 @@ void WriteChar( uint c, StreamOut* out )
** regardless of specified encoding. Set at compile time
** to either Windows or Mac.
*/
-int ReplacementCharEncoding = DFLT_REPL_CHARENC;
+const int ReplacementCharEncoding = DFLT_REPL_CHARENC;
/* Mapping for Windows Western character set CP 1252
** (chars 128-159/U+0080-U+009F) to Unicode.
*/
-uint Win2Unicode[32] =
+static const uint Win2Unicode[32] =
{
0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000,
@@ -569,13 +574,30 @@ uint DecodeWin1252(uint c)
return c;
}
+void EncodeWin1252( uint c, StreamOut* out )
+{
+ if (c < 128 || (c > 159 && c < 256))
+ PutByte(c, out);
+ else
+ {
+ int i;
+
+ for (i = 128; i < 160; i++)
+ if (Win2Unicode[i - 128] == c)
+ {
+ PutByte(i, out);
+ break;
+ }
+ }
+}
+
/*
John Love-Jensen contributed this table for mapping MacRoman
character set to Unicode
*/
/* modified to only need chars 128-255/U+0080-U+00FF - Terry Teague 19 Aug 01 */
-uint Mac2Unicode[128] =
+static const uint Mac2Unicode[128] =
{
/* x7F = DEL */
@@ -615,13 +637,121 @@ uint DecodeMacRoman(uint c)
return c;
}
+void EncodeMacRoman( uint c, StreamOut* out )
+{
+ if (c < 128)
+ PutByte(c, out);
+ else
+ {
+ /* For mac users, map Unicode back to MacRoman. */
+ int i;
+ for (i = 128; i < 256; i++)
+ {
+ if (Mac2Unicode[i - 128] == c)
+ {
+ PutByte(i, out);
+ break;
+ }
+ }
+ }
+}
+
+/* Mapping for OS/2 Western character set CP 850
+** (chars 128-255) to Unicode.
+*/
+const uint IBM2Unicode[128] =
+{
+ 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
+ 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
+ 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
+ 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
+ 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
+ 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
+ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
+ 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
+ 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
+ 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4,
+ 0x00f0, 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x20AC, 0x00cd, 0x00ce,
+ 0x00cf, 0x2518, 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
+ 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe,
+ 0x00de, 0x00da, 0x00db, 0x00d9, 0x00fd, 0x00dd, 0x00af, 0x00b4,
+ 0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
+ 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 0x25a0, 0x00a0
+};
+
+/* Function for conversion from OS/2-850 to Unicode */
+uint DecodeIbm850(uint c)
+{
+ if (127 < c && c < 256)
+ c = IBM2Unicode[c - 128];
+
+ return c;
+}
+
+/* For OS/2,Java users, map Unicode back to IBM858 (IBM850+Euro). */
+void EncodeIbm858( uint c, StreamOut* out )
+{
+ if (c < 128)
+ PutByte(c, out);
+ else
+ {
+ int i;
+ for (i = 128; i < 256; i++)
+ {
+ if (IBM2Unicode[i - 128] == c)
+ {
+ PutByte(i, out);
+ break;
+ }
+ }
+ }
+}
+
+
+/* Convert from Latin0 (aka Latin9, ISO-8859-15) to Unicode */
+uint DecodeLatin0(uint c)
+{
+ if (159 < c && c < 191)
+ {
+ switch (c)
+ {
+ case 0xA4: c = 0x20AC; break;
+ case 0xA6: c = 0x0160; break;
+ case 0xA8: c = 0x0161; break;
+ case 0xB4: c = 0x017D; break;
+ case 0xB8: c = 0x017E; break;
+ case 0xBC: c = 0x0152; break;
+ case 0xBD: c = 0x0153; break;
+ case 0xBE: c = 0x0178; break;
+ }
+ }
+ return c;
+}
+
+/* Map Unicode back to ISO-8859-15. */
+void EncodeLatin0( uint c, StreamOut* out )
+{
+ switch (c)
+ {
+ case 0x20AC: c = 0xA4; break;
+ case 0x0160: c = 0xA6; break;
+ case 0x0161: c = 0xA8; break;
+ case 0x017D: c = 0xB4; break;
+ case 0x017E: c = 0xB8; break;
+ case 0x0152: c = 0xBC; break;
+ case 0x0153: c = 0xBD; break;
+ case 0x0178: c = 0xBE; break;
+ }
+ PutByte(c, out);
+}
+
/*
Table to map symbol font characters to Unicode; undefined
characters are mapped to 0x0000 and characters without any
Unicode equivalent are mapped to '?'. Is this appropriate?
*/
-static uint Symbol2Unicode[] =
+static const uint Symbol2Unicode[] =
{
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
diff --git a/tidylib-src/src/streamio.h b/tidylib-src/src/streamio.h
old mode 100755
new mode 100644
index 620afc17..d1af1a0a
--- a/tidylib-src/src/streamio.h
+++ b/tidylib-src/src/streamio.h
@@ -3,14 +3,14 @@
/* streamio.h -- handles character stream I/O
- (c) 1998-2002 (W3C) MIT, INRIA, Keio University
+ (c) 1998-2003 (W3C) MIT, INRIA, Keio University
See tidy.h for the copyright notice.
CVS Info :
- $Author: creitzel $
- $Date: 2002/07/28 18:10:16 $
- $Revision: 1.1.2.5 $
+ $Author: lpassey $
+ $Date: 2003/02/25 21:12:03 $
+ $Revision: 1.3 $
Wrapper around Tidy input source and output sink
that calls appropriate interfaces, and applies
@@ -20,7 +20,6 @@
*/
#include "forward.h"
-#include "tidy.h"
#include "buffio.h"
#include "fileio.h"
@@ -74,16 +73,17 @@ Bool IsEOF( StreamIn* in );
struct _StreamOut
{
- int encoding;
- int state; /* for ISO 2022 */
+ int encoding;
+ int state; /* for ISO 2022 */
+ uint nl;
IOType iotype;
TidyOutputSink sink;
};
-StreamOut* FileOutput( FILE* fp, int encoding );
-StreamOut* BufferOutput( TidyBuffer* buf, int encoding );
-StreamOut* UserOutput( TidyOutputSink* sink, int encoding );
+StreamOut* FileOutput( FILE* fp, int encoding, uint newln );
+StreamOut* BufferOutput( TidyBuffer* buf, int encoding, uint newln );
+StreamOut* UserOutput( TidyOutputSink* sink, int encoding, uint newln );
StreamOut* StdErrOutput();
StreamOut* StdOutOutput();
@@ -100,16 +100,18 @@ void outBOM( StreamOut *out );
*/
#define RAW 0
#define ASCII 1
-#define LATIN1 2
-#define UTF8 3
-#define ISO2022 4
-#define MACROMAN 5
-#define WIN1252 6
+#define LATIN0 2
+#define LATIN1 3
+#define UTF8 4
+#define ISO2022 5
+#define MACROMAN 6
+#define WIN1252 7
+#define IBM858 8
#if SUPPORT_UTF16_ENCODINGS
-#define UTF16LE 7
-#define UTF16BE 8
-#define UTF16 9
+#define UTF16LE 9
+#define UTF16BE 10
+#define UTF16 11
#endif
/* Note that Big5 and SHIFTJIS are not converted to ISO 10646 codepoints
@@ -118,11 +120,11 @@ void outBOM( StreamOut *out );
*/
#if SUPPORT_ASIAN_ENCODINGS
#if SUPPORT_UTF16_ENCODINGS
-#define BIG5 10
-#define SHIFTJIS 11
+#define BIG5 12
+#define SHIFTJIS 13
#else
-#define BIG5 7
-#define SHIFTJIS 8
+#define BIG5 9
+#define SHIFTJIS 10
#endif
#endif
@@ -150,7 +152,7 @@ void outBOM( StreamOut *out );
** regardless of specified encoding. Set at compile time
** to either Windows or Mac.
*/
-extern int ReplacementCharEncoding;
+extern const int ReplacementCharEncoding;
/* Function for conversion from Windows-1252 to Unicode */
uint DecodeWin1252(uint c);
@@ -158,8 +160,29 @@ uint DecodeWin1252(uint c);
/* Function to convert from MacRoman to Unicode */
uint DecodeMacRoman(uint c);
+/* Function for conversion from OS/2-850 to Unicode */
+uint DecodeIbm850(uint c);
+
+/* Function for conversion from Latin0 to Unicode */
+uint DecodeLatin0(uint c);
+
/* Function to convert from Symbol Font chars to Unicode */
uint DecodeSymbolFont(uint c);
+/* Use numeric constants as opposed to escape chars (\r, \n)
+** to avoid conflict Mac compilers that may re-define these.
+*/
+#define CR 0xD
+#define LF 0xA
+
+#if defined(MAC_OS_CLASSIC)
+#define DEFAULT_NL_CONFIG TidyCR
+#elif defined(_WIN32) || defined(OS2_OS)
+#define DEFAULT_NL_CONFIG TidyCRLF
+#else
+#define DEFAULT_NL_CONFIG TidyLF
+#endif
+
+
#endif /* __STREAMIO_H__ */
diff --git a/tidylib-src/src/tagask.c b/tidylib-src/src/tagask.c
old mode 100755
new mode 100644
index 79d0cf6b..ad864f02
--- a/tidylib-src/src/tagask.c
+++ b/tidylib-src/src/tagask.c
@@ -6,8 +6,8 @@
CVS Info :
$Author: creitzel $
- $Date: 2002/07/08 18:03:19 $
- $Revision: 1.1.2.3 $
+ $Date: 2003/02/16 19:33:11 $
+ $Revision: 1.2 $
*/
diff --git a/tidylib-src/src/tags.c b/tidylib-src/src/tags.c
old mode 100755
new mode 100644
index c1731398..1cdcd98e
--- a/tidylib-src/src/tags.c
+++ b/tidylib-src/src/tags.c
@@ -1,16 +1,15 @@
/* tags.c -- recognize HTML tags
- (c) 1998-2002 (W3C) MIT, INRIA, Keio University
+ (c) 1998-2003 (W3C) MIT, INRIA, Keio University
See tidy.h for the copyright notice.
CVS Info :
- $Author: creitzel $
- $Date: 2002/10/15 19:54:31 $
- $Revision: 1.20.2.5 $
+ $Author: terry_teague $
+ $Date: 2003/03/02 04:29:12 $
+ $Revision: 1.22 $
The HTML tags are stored as 8 bit ASCII strings.
- Use lookupw() to find a tag given a wide char string.
*/
@@ -20,7 +19,7 @@
#include "tmbstr.h"
#include "parser.h" /* For FixId() */
-static Dict tag_defs[] =
+static const Dict tag_defs[] =
{
{ TidyTag_UNKNOWN, "unknown!", 0, 0, null, null},
{ TidyTag_A, "a", VERS_ALL, CM_INLINE, ParseInline, CheckAnchor},
@@ -148,13 +147,18 @@ static Dict tag_defs[] =
/* choose what version to use for new doctype */
int HTMLVersion( TidyDocImpl* doc )
{
+ int dtver = doc->lexer->doctype;
uint versions = doc->lexer->versions;
- Bool isXml = cfgBool(doc, TidyXmlOut)
- | cfgBool(doc, TidyXmlTags)
- | doc->lexer->isvoyager;
+ TidyDoctypeModes dtmode = cfg(doc, TidyDoctypeMode);
+
+ Bool wantXhtml = !cfgBool(doc, TidyHtmlOut) &&
+ ( cfgBool(doc, TidyXmlOut) || doc->lexer->isvoyager );
+
+ Bool wantHtml4 = dtmode==TidyDoctypeStrict || dtmode==TidyDoctypeLoose ||
+ dtver==VERS_HTML40_STRICT || dtver==VERS_HTML40_LOOSE;
/* Prefer HTML 4.x for XHTML */
- if ( !isXml )
+ if ( !wantXhtml && !wantHtml4 )
{
if ( versions & VERS_HTML32 ) /* Prefer 3.2 over 2.0 */
return VERS_HTML32;
@@ -163,7 +167,7 @@ int HTMLVersion( TidyDocImpl* doc )
return VERS_HTML20;
}
- if ( versions & VERS_XHTML11 )
+ if ( wantXhtml && (versions & VERS_XHTML11) )
return VERS_XHTML11;
if ( versions & VERS_HTML40_STRICT )
@@ -176,24 +180,21 @@ int HTMLVersion( TidyDocImpl* doc )
return VERS_FRAMESET;
/* Still here? Try these again. */
- if ( isXml )
- {
- if ( versions & VERS_HTML32 ) /* Prefer 3.2 over 2.0 */
- return VERS_HTML32;
+ if ( versions & VERS_HTML32 ) /* Prefer 3.2 over 2.0 */
+ return VERS_HTML32;
- if ( versions & VERS_HTML20 )
- return VERS_HTML20;
- }
+ if ( versions & VERS_HTML20 )
+ return VERS_HTML20;
return VERS_UNKNOWN;
}
-static Dict *lookup( TidyTagImpl* tags, ctmbstr s )
+static const Dict* lookup( TidyTagImpl* tags, ctmbstr s )
{
Dict *np = null;
if ( s )
{
- Dict *np = tag_defs + 1; /* Skip Unknown */
+ const Dict *np = tag_defs + 1; /* Skip Unknown */
for ( /**/; np < tag_defs + N_TIDY_TAGS; ++np )
if ( tmbstrcmp(s, np->name) == 0 )
return np;
@@ -206,32 +207,38 @@ static Dict *lookup( TidyTagImpl* tags, ctmbstr s )
}
-static Dict* declare( TidyTagImpl* tags,
- ctmbstr name, uint versions, uint model,
- Parser *parser, CheckAttribs *chkattrs )
+static void declare( TidyTagImpl* tags,
+ ctmbstr name, uint versions, uint model,
+ Parser *parser, CheckAttribs *chkattrs )
{
- Dict* np = lookup( tags, name );
- if ( np == null )
+ if ( name )
{
- np = (Dict*) MemAlloc( sizeof(Dict) );
- ClearMemory( np, sizeof(Dict) );
+ Dict* np = (Dict*) lookup( tags, name );
+ if ( np == null )
+ {
+ np = (Dict*) MemAlloc( sizeof(Dict) );
+ ClearMemory( np, sizeof(Dict) );
- np->name = tmbstrdup( name );
- np->next = tags->declared_tag_list;
- tags->declared_tag_list = np;
- }
+ np->name = tmbstrdup( name );
+ np->next = tags->declared_tag_list;
+ tags->declared_tag_list = np;
+ }
- np->versions = versions;
- np->model |= model;
- np->parser = parser;
- np->chkattrs = chkattrs;
- return np;
+ /* Make sure we are not over-writing predefined tags */
+ if ( np->id == TidyTag_UNKNOWN )
+ {
+ np->versions = versions;
+ np->model |= model;
+ np->parser = parser;
+ np->chkattrs = chkattrs;
+ }
+ }
}
/* public interface for finding tag by name */
Bool FindTag( TidyDocImpl* doc, Node *node )
{
- Dict *np = null;
+ const Dict *np = null;
if ( cfgBool(doc, TidyXmlTags) )
{
node->tag = doc->tags.xml_tags;
@@ -247,7 +254,7 @@ Bool FindTag( TidyDocImpl* doc, Node *node )
return no;
}
-Dict* LookupTagDef( TidyTagId tid )
+const Dict* LookupTagDef( TidyTagId tid )
{
if ( tid > TidyTag_UNKNOWN && tid < N_TIDY_TAGS )
return tag_defs + tid;
@@ -255,9 +262,9 @@ Dict* LookupTagDef( TidyTagId tid )
}
-Parser *FindParser( TidyDocImpl* doc, Node *node )
+Parser* FindParser( TidyDocImpl* doc, Node *node )
{
- Dict* np = lookup( &doc->tags, node->element );
+ const Dict* np = lookup( &doc->tags, node->element );
if ( np )
return np->parser;
return null;
@@ -332,35 +339,78 @@ ctmbstr GetNextDeclaredTag( TidyDocImpl* doc, int tagType,
break;
}
}
- *iter = (TidyIterator) ( curr ? curr->next : null );
+ *iter = (TidyIterator) ( curr ? curr : null );
return name;
}
void InitTags( TidyDocImpl* doc )
{
+ Dict* xml;
TidyTagImpl* tags = &doc->tags;
ClearMemory( tags, sizeof(TidyTagImpl) );
/* create dummy entry for all xml tags */
- tags->xml_tags = (Dict*) MemAlloc( sizeof(Dict) );
- tags->xml_tags->name = null;
- tags->xml_tags->versions = VERS_XML;
- tags->xml_tags->model = CM_BLOCK;
- tags->xml_tags->parser = null;
- tags->xml_tags->chkattrs = null;
-}
-
-void FreeTags( TidyDocImpl* doc )
+ xml = (Dict*) MemAlloc( sizeof(Dict) );
+ ClearMemory( xml, sizeof(Dict) );
+ xml->name = null;
+ xml->versions = VERS_XML;
+ xml->model = CM_BLOCK;
+ xml->parser = null;
+ xml->chkattrs = null;
+ tags->xml_tags = xml;
+}
+
+/* By default, zap all of them. But allow
+** an single type to be specified.
+*/
+void FreeDeclaredTags( TidyDocImpl* doc, int tagType )
{
TidyTagImpl* tags = &doc->tags;
- Dict* curr;
+ Dict *curr, *next = null, *prev = null;
- while ( curr = tags->declared_tag_list )
+ for ( curr=tags->declared_tag_list; curr; curr = next )
{
- tags->declared_tag_list = curr->next;
- MemFree( curr->name );
- MemFree( curr );
+ Bool deleteIt = yes;
+ next = curr->next;
+ switch ( tagType )
+ {
+ case tagtype_empty:
+ deleteIt = ( curr->model & CM_EMPTY );
+ break;
+
+ case tagtype_inline:
+ deleteIt = ( curr->model & CM_INLINE );
+ break;
+
+ case tagtype_block:
+ deleteIt = ( (curr->model & CM_BLOCK) &&
+ curr->parser == ParseBlock );
+ break;
+
+ case tagtype_pre:
+ deleteIt = ( (curr->model & CM_BLOCK) &&
+ curr->parser == ParsePre );
+ break;
+ }
+
+ if ( deleteIt )
+ {
+ MemFree( curr->name );
+ MemFree( curr );
+ if ( prev )
+ prev->next = next;
+ else
+ tags->declared_tag_list = next;
+ }
+ else
+ prev = curr;
}
+}
+
+void FreeTags( TidyDocImpl* doc )
+{
+ TidyTagImpl* tags = &doc->tags;
+ FreeDeclaredTags( doc, 0 );
MemFree( tags->xml_tags );
@@ -398,7 +448,7 @@ void CheckIMG( TidyDocImpl* doc, Node *node )
AttVal *attval;
for ( attval = node->attributes; attval != null; attval = attval->next )
{
- Attribute* dict = CheckAttribute( doc, node, attval );
+ const Attribute* dict = CheckAttribute( doc, node, attval );
if ( dict )
{
TidyAttrId id = dict->id;
@@ -529,7 +579,7 @@ void CheckAREA( TidyDocImpl* doc, Node *node )
for (attval = node->attributes; attval != null; attval = attval->next)
{
- Attribute* dict = CheckAttribute( doc, node, attval );
+ const Attribute* dict = CheckAttribute( doc, node, attval );
if ( dict )
{
if ( dict->id == TidyAttr_ALT )
@@ -559,7 +609,7 @@ void CheckTABLE( TidyDocImpl* doc, Node *node )
for (attval = node->attributes; attval != null; attval = attval->next)
{
- Attribute* dict = CheckAttribute( doc, node, attval );
+ const Attribute* dict = CheckAttribute( doc, node, attval );
if ( dict && dict->id == TidyAttr_SUMMARY )
HasSummary = yes;
}
@@ -605,7 +655,7 @@ void CheckSCRIPT( TidyDocImpl* doc, Node *node )
if ( !type )
{
- ReportMissingAttr( doc, node, "type" );
+ /* ReportMissingAttr( doc, node, "type" ); */
/* check for javascript */
if ( lang )
@@ -626,6 +676,8 @@ void CheckSCRIPT( TidyDocImpl* doc, Node *node )
}
else
AddAttribute( doc, node, "type", "text/javascript" );
+ type = GetAttrByName( node, "type" );
+ ReportAttrError( doc, node, type, INSERTING_ATTRIBUTE );
}
}
@@ -639,8 +691,9 @@ void CheckSTYLE( TidyDocImpl* doc, Node *node )
if ( !type )
{
- ReportMissingAttr( doc, node, "type");
AddAttribute( doc, node, "type", "text/css" );
+ type = GetAttrByName( node, "type" );
+ ReportAttrError( doc, node, type, INSERTING_ATTRIBUTE );
}
}
@@ -657,8 +710,9 @@ void CheckLINK( TidyDocImpl* doc, Node *node )
AttVal *type = GetAttrByName(node, "type");
if (!type)
{
- ReportMissingAttr( doc, node, "type" );
AddAttribute( doc, node, "type", "text/css" );
+ type = GetAttrByName( node, "type" );
+ ReportAttrError( doc, node, type, INSERTING_ATTRIBUTE );
}
}
}
@@ -690,8 +744,7 @@ Bool nodeIsText( Node* node )
Bool nodeHasText( TidyDocImpl* doc, Node* node )
{
- assert( doc != null );
- if ( nodeIsText(node) )
+ if ( doc && node )
{
uint ix;
Lexer* lexer = doc->lexer;
diff --git a/tidylib-src/src/tags.h b/tidylib-src/src/tags.h
old mode 100755
new mode 100644
index ad8e0b40..1d287994
--- a/tidylib-src/src/tags.h
+++ b/tidylib-src/src/tags.h
@@ -8,9 +8,9 @@
CVS Info :
- $Author: lpassey $
- $Date: 2002/10/22 15:02:23 $
- $Revision: 1.1.2.7 $
+ $Author: creitzel $
+ $Date: 2003/02/16 19:33:11 $
+ $Revision: 1.2 $
The HTML tags are stored as 8 bit ASCII strings.
Use lookupw() to find a tag given a wide char string.
@@ -58,10 +58,11 @@ int HTMLVersion( TidyDocImpl* doc );
/* interface for finding tag by name */
-Dict* LookupTagDef( TidyTagId tid );
+const Dict* LookupTagDef( TidyTagId tid );
Bool FindTag( TidyDocImpl* doc, Node *node );
Parser* FindParser( TidyDocImpl* doc, Node *node );
void DefineTag( TidyDocImpl* doc, int tagType, ctmbstr name );
+void FreeDeclaredTags( TidyDocImpl* doc, int tagType ); /* 0 to free all */
TidyIterator GetDeclaredTagList( TidyDocImpl* doc );
Dict* GetNextDeclaredDict( TidyDocImpl* doc, TidyIterator* iter );
@@ -118,7 +119,7 @@ CheckAttribs CheckMETA;
/* 0 == TidyTag_UNKNOWN */
-#define TagId(node) ((node) ? ((node)->tag ? (node)->tag->id : 0) : 0)
+#define TagId(node) ((node) && (node)->tag ? (node)->tag->id : TidyTag_UNKNOWN)
#define TagIsId(node, tid) ((node) && (node)->tag && (node)->tag->id == tid)
Bool nodeIsText( Node* node );
diff --git a/tidylib-src/src/tidy-int.h b/tidylib-src/src/tidy-int.h
old mode 100755
new mode 100644
index 00d21722..0f60d36a
--- a/tidylib-src/src/tidy-int.h
+++ b/tidylib-src/src/tidy-int.h
@@ -9,8 +9,8 @@
CVS Info :
$Author: creitzel $
- $Date: 2002/08/11 16:44:31 $
- $Revision: 1.1.2.8 $
+ $Date: 2003/02/16 19:33:11 $
+ $Revision: 1.2 $
*/
@@ -36,8 +36,10 @@ struct _TidyDocImpl
TidyTagImpl tags;
TidyAttribImpl attribs;
+#if SUPPORT_ACCESSIBILITY_CHECKS
/* Accessibility Checks state */
TidyAccessImpl access;
+#endif
/* The Pretty Print buffer */
TidyPrintImpl pprint;
@@ -51,6 +53,7 @@ struct _TidyDocImpl
StreamOut* docOut;
StreamOut* errout;
TidyReportFilter mssgFilt;
+ TidyOptCallback pOptCallback;
/* Parse + Repair Results */
uint optionErrors;
@@ -74,6 +77,7 @@ struct _TidyDocImpl
#if PRESERVE_FILE_TIMES
struct utimbuf filetimes;
#endif
+ Node* givenDoctype;
};
diff --git a/tidylib-src/src/tidylib.c b/tidylib-src/src/tidylib.c
old mode 100755
new mode 100644
index fffe1737..3849822f
--- a/tidylib-src/src/tidylib.c
+++ b/tidylib-src/src/tidylib.c
@@ -1,13 +1,13 @@
/* tidylib.c -- internal library definitions
- (c) 1998-2002 (W3C) MIT, INRIA, Keio University
+ (c) 1998-2003 (W3C) MIT, INRIA, Keio University
See tidy.h for the copyright notice.
CVS Info :
- $Author: creitzel $
- $Date: 2002/10/15 19:54:30 $
- $Revision: 1.1.2.9 $
+ $Author: terry_teague $
+ $Date: 2003/03/02 04:31:13 $
+ $Revision: 1.4 $
Defines HTML Tidy API implemented by tidy library.
@@ -163,6 +163,18 @@ ctmbstr tidyReleaseDate()
/* Get/set configuration options
*/
+Bool tidySetOptionCallback( TidyDoc tdoc, TidyOptCallback pOptCallback )
+{
+ TidyDocImpl* impl = tidyDocToImpl( tdoc );
+ if ( impl )
+ {
+ impl->pOptCallback = pOptCallback;
+ return yes;
+ }
+ return -EINVAL;
+}
+
+
int tidyLoadConfig( TidyDoc tdoc, ctmbstr cfgfil )
{
TidyDocImpl* impl = tidyDocToImpl( tdoc );
@@ -381,7 +393,7 @@ ctmbstr tidyOptGetCurrPick( TidyDoc tdoc, TidyOptionId optId )
if ( option && option->pickList )
{
uint ix, pick = tidyOptGetInt( tdoc, optId );
- ctmbstr* pL = option->pickList;
+ const ctmbstr* pL = option->pickList;
for ( ix=0; *pL && ix < pick; ++ix )
++pL;
if ( *pL )
@@ -574,11 +586,13 @@ FILE* tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam )
TidyDocImpl* impl = tidyDocToImpl( tdoc );
if ( impl )
{
- FILE* errout = fopen( errfilnam, "w" );
+ FILE* errout = fopen( errfilnam, "wb" );
if ( errout )
{
+ uint outenc = cfg( impl, TidyOutCharEncoding );
+ uint nl = cfg( impl, TidyNewline );
ReleaseStreamOut( impl->errout );
- impl->errout = FileOutput( errout, ASCII );
+ impl->errout = FileOutput( errout, outenc, nl );
return errout;
}
}
@@ -590,8 +604,10 @@ int tidySetErrorBuffer( TidyDoc tdoc, TidyBuffer* errbuf )
TidyDocImpl* impl = tidyDocToImpl( tdoc );
if ( impl )
{
+ uint outenc = cfg( impl, TidyOutCharEncoding );
+ uint nl = cfg( impl, TidyNewline );
ReleaseStreamOut( impl->errout );
- impl->errout = BufferOutput( errbuf, ASCII );
+ impl->errout = BufferOutput( errbuf, outenc, nl );
return ( impl->errout ? 0 : -ENOMEM );
}
return -EINVAL;
@@ -602,8 +618,10 @@ int tidySetErrorSink( TidyDoc tdoc, TidyOutputSink* sink )
TidyDocImpl* impl = tidyDocToImpl( tdoc );
if ( impl )
{
+ uint outenc = cfg( impl, TidyOutCharEncoding );
+ uint nl = cfg( impl, TidyNewline );
ReleaseStreamOut( impl->errout );
- impl->errout = UserOutput( sink, ASCII );
+ impl->errout = UserOutput( sink, outenc, nl );
return ( impl->errout ? 0 : -ENOMEM );
}
return -EINVAL;
@@ -726,7 +744,7 @@ int tidyDocParseFile( TidyDocImpl* doc, ctmbstr filnam )
{
int status = -ENOENT;
uint inenc = cfg( doc, TidyInCharEncoding );
- FILE* fin = fopen( filnam, "r" );
+ FILE* fin = fopen( filnam, "rb" );
#if PRESERVE_FILE_TIMES
struct stat sbuf = {0};
@@ -831,38 +849,78 @@ int tidySaveSink( TidyDoc tdoc, TidyOutputSink* sink )
int tidyDocSaveFile( TidyDocImpl* doc, ctmbstr filnam )
{
int status = -ENOENT;
- StreamOut* out = null;
- uint outenc = cfg( doc, TidyOutCharEncoding );
- FILE* fout = fopen( filnam, "w" );
+ FILE* fout = null;
+
+ /* Don't zap input file if no output */
+ if ( doc->errors > 0 &&
+ cfgBool(doc, TidyWriteBack) && !cfgBool(doc, TidyForceOutput) )
+ status = tidyDocStatus( doc );
+ else
+ fout = fopen( filnam, "wb" );
+
if ( fout )
{
- out = FileOutput( fout, outenc );
+ uint outenc = cfg( doc, TidyOutCharEncoding );
+ uint nl = cfg( doc, TidyNewline );
+ StreamOut* out = FileOutput( fout, outenc, nl );
+
status = tidyDocSaveStream( doc, out );
+ fclose( fout );
+ MemFree( out );
+
#if PRESERVE_FILE_TIMES
- if ( !doc->filetimes.actime )
- fclose( fout );
- else
+ if ( doc->filetimes.actime )
{
/* set file last accessed/modified times to original values */
- fclose( fout );
utime( filnam, &doc->filetimes );
ClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
}
-#else
- fclose( fout );
#endif /* PRESERVFILETIMES */
- MemFree( out );
}
return status;
}
+
+/* Note, _setmode() does NOT work on Win2K Pro w/ VC++ 6.0 SP3.
+** The code has been left in in case it works w/ other compilers
+** or operating systems. If stdout is in Text mode, be aware that
+** it will garble UTF16 documents. In text mode, when it encounters
+** a single byte of value 10 (0xA), it will insert a single byte
+** value 13 (0xD) just before it. This has the effect of garbling
+** the entire document.
+*/
+
+#if defined(_WIN32) || defined(OS2_OS)
+#include
+#include
+#endif
+
int tidyDocSaveStdout( TidyDocImpl* doc )
{
+ int oldmode = -1, status = 0;
uint outenc = cfg( doc, TidyOutCharEncoding );
- StreamOut* out = FileOutput( stdout, outenc );
- int status = tidyDocSaveStream( doc, out );
+ uint nl = cfg( doc, TidyNewline );
+ StreamOut* out = FileOutput( stdout, outenc, nl );
+
+#if defined(_WIN32) || defined(OS2_OS)
+ oldmode = _setmode( _fileno(stdout), _O_BINARY );
+ if ( out->encoding == UTF16 ||
+ out->encoding == UTF16LE ||
+ out->encoding == UTF16BE )
+ {
+ ReportWarning( doc, NULL, doc->root, ENCODING_IO_CONFLICT );
+ }
+#endif
+
+ if ( 0 == status )
+ status = tidyDocSaveStream( doc, out );
+
+#if defined(_WIN32) || defined(OS2_OS)
+ if ( oldmode != -1 )
+ oldmode = _setmode( _fileno(stdout), oldmode );
+#endif
MemFree( out );
return status;
}
@@ -870,9 +928,10 @@ int tidyDocSaveStdout( TidyDocImpl* doc )
int tidyDocSaveString( TidyDocImpl* doc, tmbstr buffer, uint* buflen )
{
uint outenc = cfg( doc, TidyOutCharEncoding );
+ uint nl = cfg( doc, TidyNewline );
TidyBuffer outbuf = {0};
- StreamOut* out = BufferOutput( &outbuf, outenc );
+ StreamOut* out = BufferOutput( &outbuf, outenc, nl );
int status = tidyDocSaveStream( doc, out );
if ( outbuf.size > *buflen )
@@ -892,7 +951,8 @@ int tidyDocSaveBuffer( TidyDocImpl* doc, TidyBuffer* outbuf )
if ( outbuf )
{
uint outenc = cfg( doc, TidyOutCharEncoding );
- StreamOut* out = BufferOutput( outbuf, outenc );
+ uint nl = cfg( doc, TidyNewline );
+ StreamOut* out = BufferOutput( outbuf, outenc, nl );
status = tidyDocSaveStream( doc, out );
MemFree( out );
@@ -903,7 +963,8 @@ int tidyDocSaveBuffer( TidyDocImpl* doc, TidyBuffer* outbuf )
int tidyDocSaveSink( TidyDocImpl* doc, TidyOutputSink* sink )
{
uint outenc = cfg( doc, TidyOutCharEncoding );
- StreamOut* out = UserOutput( sink, outenc );
+ uint nl = cfg( doc, TidyNewline );
+ StreamOut* out = UserOutput( sink, outenc, nl );
int status = tidyDocSaveStream( doc, out );
MemFree( out );
return status;
@@ -958,6 +1019,7 @@ int tidyDocParseStream( TidyDocImpl* doc, StreamIn* in )
TakeConfigSnapshot( doc ); /* Save config state */
FreeLexer( doc );
+ FreeAnchors( doc );
doc->lexer = NewLexer();
doc->inputHadBOM = no;
@@ -1002,10 +1064,13 @@ int tidyDocRunDiagnostics( TidyDocImpl* doc )
Bool quiet = cfgBool( doc, TidyQuiet );
Bool force = cfgBool( doc, TidyForceOutput );
- ReportMarkupVersion( doc );
+ HTMLVersionCompliance( doc );
if ( !quiet )
+ {
+ ReportMarkupVersion( doc );
ReportNumWarnings( doc );
+ }
if ( doc->errors > 0 && !force )
NeedsAuthorIntervention( doc );
@@ -1024,11 +1089,11 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
Bool logical = cfgBool( doc, TidyLogicalEmphasis );
Bool clean = cfgBool( doc, TidyMakeClean );
Bool dropFont = cfgBool( doc, TidyDropFontTags );
+ Bool htmlOut = cfgBool( doc, TidyHtmlOut );
Bool xmlOut = cfgBool( doc, TidyXmlOut );
Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
Bool xmlDecl = cfgBool( doc, TidyXmlDecl );
Bool tidyMark = cfgBool( doc, TidyMark );
- Node* doctype;
/* simplifies ... ... etc. */
NestedEmphasis( doc, doc->root );
@@ -1054,15 +1119,38 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
if ( clean || dropFont )
CleanDocument( doc );
+ /* Move terminating tags from out of paragraphs */
+ /*! Do we want to do this for all block-level elements? */
+ FixBrakes( doc, FindBody( doc ));
+
+ /* Reconcile http-equiv meta element with output encoding */
+ if (RAW != cfg( doc, TidyOutCharEncoding))
+ VerifyHTTPEquiv( doc, FindHEAD( doc ));
+
if ( !CheckNodeIntegrity(doc->root) )
FatalError( integrity );
- /* remember given doctype */
- doctype = CloneNodeEx( doc, FindDocType(doc) );
+ /* remember given doctype for reporting */
+ doc->givenDoctype = CloneNodeEx( doc, FindDocType(doc) );
if ( doc->root->content )
{
- if ( xhtmlOut )
+ /* If we had XHTML input but want HTML output */
+ if ( htmlOut && doc->lexer->isvoyager )
+ {
+ Node* node = FindDocType( doc );
+ /* Remove reference, but do not free */
+ if ( node )
+ RemoveNode( node );
+ if ( node = FindHTML(doc) )
+ {
+ AttVal* av = AttrGetById( node, TidyAttr_XMLNS );
+ if ( av )
+ RemoveAttribute( node, av );
+ }
+ }
+
+ if ( xhtmlOut && !htmlOut )
SetXHTMLDocType( doc );
else
FixDocType( doc );
@@ -1082,20 +1170,28 @@ int tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out )
{
Bool showMarkup = cfgBool( doc, TidyShowMarkup );
Bool forceOutput = cfgBool( doc, TidyForceOutput );
+#if SUPPORT_UTF16_ENCODINGS
Bool outputBOM = ( cfg(doc, TidyOutputBOM) == yes );
Bool smartBOM = ( cfg(doc, TidyOutputBOM) == TidyAutoState );
+#endif
Bool xmlOut = cfgBool( doc, TidyXmlOut );
Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
Bool bodyOnly = cfgBool( doc, TidyBodyOnly );
if ( showMarkup && (doc->errors == 0 || forceOutput) )
{
+#if SUPPORT_UTF16_ENCODINGS
/* Output a Byte Order Mark if required */
if ( outputBOM || (doc->inputHadBOM && smartBOM) )
outBOM( out );
+#endif
- if ( !FindDocType(doc) )
- SetOptionBool( doc, TidyNumEntities, yes );
+ /* No longer necessary. No DOCTYPE == HTML 3.2,
+ ** which gives you only the basic character entities,
+ ** which are safe in any browser.
+ ** if ( !FindDocType(doc) )
+ ** SetOptionBool( doc, TidyNumEntities, yes );
+ */
doc->docOut = out;
if ( xmlOut && !xhtmlOut )
@@ -1207,14 +1303,17 @@ Bool tidyNodeHasText( TidyDoc tdoc, TidyNode tnod )
return nodeHasText( doc, tidyNodeToImpl(tnod) );
return no;
}
+
+
Bool tidyNodeGetText( TidyDoc tdoc, TidyNode tnod, TidyBuffer* outbuf )
{
TidyDocImpl* doc = tidyDocToImpl( tdoc );
Node* nimp = tidyNodeToImpl( tnod );
- if ( doc && nodeHasText(doc, nimp) && outbuf )
+ if ( doc && nimp && outbuf )
{
uint outenc = cfg( doc, TidyOutCharEncoding );
- StreamOut* out = BufferOutput( outbuf, outenc );
+ uint nl = cfg( doc, TidyNewline );
+ StreamOut* out = BufferOutput( outbuf, outenc, nl );
Bool xmlOut = cfgBool( doc, TidyXmlOut );
Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
diff --git a/tidylib-src/src/tmbstr.c b/tidylib-src/src/tmbstr.c
old mode 100755
new mode 100644
index 41280979..a2d36302
--- a/tidylib-src/src/tmbstr.c
+++ b/tidylib-src/src/tmbstr.c
@@ -5,9 +5,9 @@
CVS Info :
- $Author: lpassey $
- $Date: 2002/10/29 23:52:26 $
- $Revision: 1.1.2.5 $
+ $Author: creitzel $
+ $Date: 2003/02/16 19:33:11 $
+ $Revision: 1.2 $
*/
diff --git a/tidylib-src/src/tmbstr.h b/tidylib-src/src/tmbstr.h
old mode 100755
new mode 100644
index e0c56314..b0ad2fcb
--- a/tidylib-src/src/tmbstr.h
+++ b/tidylib-src/src/tmbstr.h
@@ -8,9 +8,9 @@
CVS Info :
- $Author: lpassey $
- $Date: 2002/10/29 23:52:03 $
- $Revision: 1.1.2.3 $
+ $Author: creitzel $
+ $Date: 2003/02/16 19:33:11 $
+ $Revision: 1.2 $
*/
diff --git a/tidylib-src/src/utf8.c b/tidylib-src/src/utf8.c
old mode 100755
new mode 100644
index d01db5b4..7c829d1c
--- a/tidylib-src/src/utf8.c
+++ b/tidylib-src/src/utf8.c
@@ -6,8 +6,8 @@
CVS Info :
$Author: creitzel $
- $Date: 2002/10/15 19:50:30 $
- $Revision: 1.1.2.6 $
+ $Date: 2003/02/16 19:33:11 $
+ $Revision: 1.2 $
Uses public interfaces to abstract input source and output
sink, which may be user supplied or either FILE* or memory
@@ -139,7 +139,7 @@ DBFF DFF* F4 8F BF B* 0010FFF*
/* offsets into validUTF8 table below */
-static int offsetUTF8Sequences[kMaxUTF8Bytes + 1] =
+static const int offsetUTF8Sequences[kMaxUTF8Bytes + 1] =
{
0, /* 1 byte */
1, /* 2 bytes */
@@ -148,7 +148,7 @@ static int offsetUTF8Sequences[kMaxUTF8Bytes + 1] =
kNumUTF8Sequences /* must be last */
};
-static struct validUTF8Sequence
+static const struct validUTF8Sequence
{
uint lowChar;
uint highChar;
diff --git a/tidylib-src/src/utf8.h b/tidylib-src/src/utf8.h
old mode 100755
new mode 100644
index f9895e9a..e74eaafd
--- a/tidylib-src/src/utf8.h
+++ b/tidylib-src/src/utf8.h
@@ -9,8 +9,8 @@
CVS Info :
$Author: creitzel $
- $Date: 2002/07/08 18:03:19 $
- $Revision: 1.1.2.3 $
+ $Date: 2003/02/16 19:33:11 $
+ $Revision: 1.2 $
*/
diff --git a/tidylib-src/test/input/cfg_427812.txt b/tidylib-src/test/input/cfg_427812.txt
deleted file mode 100755
index 3670e37c..00000000
--- a/tidylib-src/test/input/cfg_427812.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #427812
-output-xhtml: yes
diff --git a/tidylib-src/test/input/cfg_427821.txt b/tidylib-src/test/input/cfg_427821.txt
deleted file mode 100755
index c17f0b27..00000000
--- a/tidylib-src/test/input/cfg_427821.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #427821
-output-xhtml: yes
diff --git a/tidylib-src/test/input/cfg_427825.txt b/tidylib-src/test/input/cfg_427825.txt
deleted file mode 100755
index 045aa062..00000000
--- a/tidylib-src/test/input/cfg_427825.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-// Tidy configuration file for bug #427825
-new-inline-tags: lm:xcode
-
diff --git a/tidylib-src/test/input/cfg_427826.txt b/tidylib-src/test/input/cfg_427826.txt
deleted file mode 100755
index 0fc9caee..00000000
--- a/tidylib-src/test/input/cfg_427826.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-// Tidy configuration file for bug #427826
-indent: auto
-char-encoding: latin1
-tidy-mark: no
-clean: yes
-drop-font-tags: yes
-logical-emphasis: yes
-indent-attributes: yes
-output-xhtml: yes
-
diff --git a/tidylib-src/test/input/cfg_427835.txt b/tidylib-src/test/input/cfg_427835.txt
deleted file mode 100755
index 547e5244..00000000
--- a/tidylib-src/test/input/cfg_427835.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #427835
-output-xhtml: yes
diff --git a/tidylib-src/test/input/cfg_427837.txt b/tidylib-src/test/input/cfg_427837.txt
deleted file mode 100755
index d0a3ff97..00000000
--- a/tidylib-src/test/input/cfg_427837.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-// Tidy configuration file for bug #427837
-// add-xml-decl: yes
-input-xml: yes
-output-xml: yes
-char-encoding: latin1
diff --git a/tidylib-src/test/input/cfg_427839.txt b/tidylib-src/test/input/cfg_427839.txt
deleted file mode 100755
index e5ec6e5e..00000000
--- a/tidylib-src/test/input/cfg_427839.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-// Tidy configuration file for bug #427839
-output-xhtml: yes
-doctype: omit
\ No newline at end of file
diff --git a/tidylib-src/test/input/cfg_427845.txt b/tidylib-src/test/input/cfg_427845.txt
deleted file mode 100755
index 2169624d..00000000
--- a/tidylib-src/test/input/cfg_427845.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #427845
-wrap: 60
\ No newline at end of file
diff --git a/tidylib-src/test/input/cfg_431716.txt b/tidylib-src/test/input/cfg_431716.txt
deleted file mode 100755
index afc3a5ad..00000000
--- a/tidylib-src/test/input/cfg_431716.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #431716
-split: yes
\ No newline at end of file
diff --git a/tidylib-src/test/input/cfg_431721.txt b/tidylib-src/test/input/cfg_431721.txt
deleted file mode 100755
index ab04c701..00000000
--- a/tidylib-src/test/input/cfg_431721.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-// Tidy configuration file for bug #431721
-indent: auto
-new-inline-tags: o:p
-char-encoding: latin1
-tidy-mark: no
-clean: yes
-drop-font-tags: yes
-logical-emphasis: yes
-word-2000: yes
-indent-attributes: yes
-
diff --git a/tidylib-src/test/input/cfg_431736.txt b/tidylib-src/test/input/cfg_431736.txt
deleted file mode 100755
index e7643211..00000000
--- a/tidylib-src/test/input/cfg_431736.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #431736
-output-xhtml: yes
diff --git a/tidylib-src/test/input/cfg_431889.txt b/tidylib-src/test/input/cfg_431889.txt
deleted file mode 100755
index 3dc21a66..00000000
--- a/tidylib-src/test/input/cfg_431889.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-// Config file for bug [ #431889 ] Config file options w/"param" don't work
-doctype: "-//ACME//DTD HTML 3.14159//EN"
-alt-text: "Alternate"
\ No newline at end of file
diff --git a/tidylib-src/test/input/cfg_431895.txt b/tidylib-src/test/input/cfg_431895.txt
deleted file mode 100755
index 41584939..00000000
--- a/tidylib-src/test/input/cfg_431895.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-// Tidy configuration file for bug #431895
-quiet: yes
-markup: no
-gnu-emacs: yes
\ No newline at end of file
diff --git a/tidylib-src/test/input/cfg_431956.txt b/tidylib-src/test/input/cfg_431956.txt
deleted file mode 100755
index 15c73145..00000000
--- a/tidylib-src/test/input/cfg_431956.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-// Tidy configuration file for bug #431956
-input-xml: yes
-output-xml: yes
diff --git a/tidylib-src/test/input/cfg_431958.txt b/tidylib-src/test/input/cfg_431958.txt
deleted file mode 100755
index 990c0130..00000000
--- a/tidylib-src/test/input/cfg_431958.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-// Tidy configuration file for bug #431958
-// Warning - this will modify the INPUT file (each time it is run)
-indent: auto
-write-back: yes
\ No newline at end of file
diff --git a/tidylib-src/test/input/cfg_432677.txt b/tidylib-src/test/input/cfg_432677.txt
deleted file mode 100755
index 4e73e6dd..00000000
--- a/tidylib-src/test/input/cfg_432677.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #432677
-output-xml: yes
diff --git a/tidylib-src/test/input/cfg_433604.txt b/tidylib-src/test/input/cfg_433604.txt
deleted file mode 100755
index 2a03e4c9..00000000
--- a/tidylib-src/test/input/cfg_433604.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #433604
-input-xml: yes
diff --git a/tidylib-src/test/input/cfg_433607.txt b/tidylib-src/test/input/cfg_433607.txt
deleted file mode 100755
index e4478c3f..00000000
--- a/tidylib-src/test/input/cfg_433607.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #433607
-input-xml: yes
diff --git a/tidylib-src/test/input/cfg_433670.txt b/tidylib-src/test/input/cfg_433670.txt
deleted file mode 100755
index 9cc682f5..00000000
--- a/tidylib-src/test/input/cfg_433670.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #433670
-input-xml: yes
diff --git a/tidylib-src/test/input/cfg_433856.txt b/tidylib-src/test/input/cfg_433856.txt
deleted file mode 100755
index c61c8293..00000000
--- a/tidylib-src/test/input/cfg_433856.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #433856
-drop-font-tags: yes
\ No newline at end of file
diff --git a/tidylib-src/test/input/cfg_434100.txt b/tidylib-src/test/input/cfg_434100.txt
deleted file mode 100755
index 713d5d5d..00000000
--- a/tidylib-src/test/input/cfg_434100.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #434100
-input-xml: yes
\ No newline at end of file
diff --git a/tidylib-src/test/input/cfg_434940.txt b/tidylib-src/test/input/cfg_434940.txt
deleted file mode 100755
index 1e174070..00000000
--- a/tidylib-src/test/input/cfg_434940.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #434940
-show-body-only: yes
\ No newline at end of file
diff --git a/tidylib-src/test/input/cfg_438954.txt b/tidylib-src/test/input/cfg_438954.txt
deleted file mode 100755
index bb4e33b0..00000000
--- a/tidylib-src/test/input/cfg_438954.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #438954
-hide-endtags: yes
\ No newline at end of file
diff --git a/tidylib-src/test/input/cfg_444394.txt b/tidylib-src/test/input/cfg_444394.txt
deleted file mode 100755
index 0048a266..00000000
--- a/tidylib-src/test/input/cfg_444394.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-// Tidy configuration file for bug #444394
-indent: auto
-new-inline-tags: o:p
-char-encoding: latin1
-tidy-mark: no
-clean: yes
-drop-font-tags: yes
-logical-emphasis: yes
-word-2000: yes
-indent-attributes: yes
-
diff --git a/tidylib-src/test/input/cfg_449348.txt b/tidylib-src/test/input/cfg_449348.txt
deleted file mode 100755
index 8717b2cf..00000000
--- a/tidylib-src/test/input/cfg_449348.txt
+++ /dev/null
@@ -1 +0,0 @@
-output-xhtml: yes
diff --git a/tidylib-src/test/input/cfg_463066.txt b/tidylib-src/test/input/cfg_463066.txt
deleted file mode 100755
index 682eb3f8..00000000
--- a/tidylib-src/test/input/cfg_463066.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-// Tidy configuration file for bug #463066
-word-2000: yes
-
diff --git a/tidylib-src/test/input/cfg_470663.txt b/tidylib-src/test/input/cfg_470663.txt
deleted file mode 100755
index f00aace1..00000000
--- a/tidylib-src/test/input/cfg_470663.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-// Tidy configuration file for bug #470663
-word-2000: yes
-
diff --git a/tidylib-src/test/input/cfg_473490.txt b/tidylib-src/test/input/cfg_473490.txt
deleted file mode 100755
index 55679514..00000000
--- a/tidylib-src/test/input/cfg_473490.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-// Tidy configuration file for bug #473490
-tidy-mark: no
-wrap: 0
-output-xhtml: yes
-doctype: auto
-quote-nbsp: yes
-uppercase-tags: yes
-quote-ampersand: yes
-add-xml-space: no
-show-warnings:no
-quiet: yes
diff --git a/tidylib-src/test/input/cfg_480406.txt b/tidylib-src/test/input/cfg_480406.txt
deleted file mode 100755
index 50bc5f57..00000000
--- a/tidylib-src/test/input/cfg_480406.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-// Tidy configuration file for bug #480406
-input-xml: yes
-output-xml: yes
diff --git a/tidylib-src/test/input/cfg_480701.txt b/tidylib-src/test/input/cfg_480701.txt
deleted file mode 100755
index 9d95a808..00000000
--- a/tidylib-src/test/input/cfg_480701.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-// Tidy configuration file for bug #480701
-input-xml: yes
-output-xhtml: yes
diff --git a/tidylib-src/test/input/cfg_480843.txt b/tidylib-src/test/input/cfg_480843.txt
deleted file mode 100755
index b49a0b45..00000000
--- a/tidylib-src/test/input/cfg_480843.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #480843
-output-xhtml: yes
diff --git a/tidylib-src/test/input/cfg_500236.txt b/tidylib-src/test/input/cfg_500236.txt
deleted file mode 100755
index 410fadfb..00000000
--- a/tidylib-src/test/input/cfg_500236.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-// Tidy configuration file for bug #500236
-word-2000: yes
-input-xml: yes
-output-xml: yes
-
diff --git a/tidylib-src/test/input/cfg_503436.txt b/tidylib-src/test/input/cfg_503436.txt
deleted file mode 100755
index 8a380d20..00000000
--- a/tidylib-src/test/input/cfg_503436.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #503436
-input-xml: yes
diff --git a/tidylib-src/test/input/cfg_508936.txt b/tidylib-src/test/input/cfg_508936.txt
deleted file mode 100755
index ba9c0e97..00000000
--- a/tidylib-src/test/input/cfg_508936.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-clean: yes
-
-# Error 1: escaped number too long. Max 4 hex digits
-# css-prefix: \77777abc
-
-# Error 2: class prefix starts with digit
-# css-prefix: 77abc
-
-# Error 3: Unescaped invalid character
-# css-prefix: abc
-
-# OK 1: Plain old name
-# css-prefix: abc123
-
-# OK 2: Begin w/ escaped number
-# css-prefix: \77abc
-
-# OK 3: escaped number
-css-prefix: abc\8
diff --git a/tidylib-src/test/input/cfg_511243.txt b/tidylib-src/test/input/cfg_511243.txt
deleted file mode 100755
index c305be86..00000000
--- a/tidylib-src/test/input/cfg_511243.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #511243
-char-encoding: utf8
diff --git a/tidylib-src/test/input/cfg_514348.txt b/tidylib-src/test/input/cfg_514348.txt
deleted file mode 100755
index a0afb0c6..00000000
--- a/tidylib-src/test/input/cfg_514348.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-// Tidy configuration file for bug #514348
-uppercase-tags: true
-indent: auto
-indent-spaces: 2
\ No newline at end of file
diff --git a/tidylib-src/test/input/cfg_517550.txt b/tidylib-src/test/input/cfg_517550.txt
deleted file mode 100755
index b94453ec..00000000
--- a/tidylib-src/test/input/cfg_517550.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #517550
-output-xhtml: yes
diff --git a/tidylib-src/test/input/cfg_531964.txt b/tidylib-src/test/input/cfg_531964.txt
deleted file mode 100755
index 2d18ab5f..00000000
--- a/tidylib-src/test/input/cfg_531964.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug 531964
-output-xhtml: yes
diff --git a/tidylib-src/test/input/cfg_532535.txt b/tidylib-src/test/input/cfg_532535.txt
deleted file mode 100755
index 27188213..00000000
--- a/tidylib-src/test/input/cfg_532535.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #532535
-word-2000: yes
diff --git a/tidylib-src/test/input/cfg_533233.txt b/tidylib-src/test/input/cfg_533233.txt
deleted file mode 100755
index 1de4c82c..00000000
--- a/tidylib-src/test/input/cfg_533233.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-output-xhtml: yes
-indent: auto
diff --git a/tidylib-src/test/input/cfg_537604.txt b/tidylib-src/test/input/cfg_537604.txt
deleted file mode 100755
index c41660fb..00000000
--- a/tidylib-src/test/input/cfg_537604.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-// Tidy configuration file for bug #537604
-input-xml: yes
-clean: no
diff --git a/tidylib-src/test/input/cfg_540045.txt b/tidylib-src/test/input/cfg_540045.txt
deleted file mode 100755
index d0a1e859..00000000
--- a/tidylib-src/test/input/cfg_540045.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-// Tidy configuration file for bug #540045
-wrap: 64
-indent: no
-indent-spaces: 4
-add-xml-decl: yes
-#output-xhtml: yes
-break-before-br: yes
-clean: yes
-logical-emphasis: yes
-enclose-text: yes
-enclose-block-text: yes
diff --git a/tidylib-src/test/input/cfg_540571.txt b/tidylib-src/test/input/cfg_540571.txt
deleted file mode 100755
index 48a1c073..00000000
--- a/tidylib-src/test/input/cfg_540571.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-// Tidy configuration file for bug #540571 Inconsistent behaviour with span inline element
-output-xml: yes
-wrap: 255
-clean: no
diff --git a/tidylib-src/test/input/cfg_542029.txt b/tidylib-src/test/input/cfg_542029.txt
deleted file mode 100755
index 5b27d6f1..00000000
--- a/tidylib-src/test/input/cfg_542029.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-// Tidy configuration file for bug #542029
-add-xml-decl: yes
-output-xml: yes
diff --git a/tidylib-src/test/input/cfg_543262.txt b/tidylib-src/test/input/cfg_543262.txt
deleted file mode 100755
index 02ead1fa..00000000
--- a/tidylib-src/test/input/cfg_543262.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-doctype: omit
-output-xhtml: yes
-char-encoding: latin1
-numeric-entities: yes
-quiet: yes
diff --git a/tidylib-src/test/input/cfg_545772.txt b/tidylib-src/test/input/cfg_545772.txt
deleted file mode 100755
index dcbc2ade..00000000
--- a/tidylib-src/test/input/cfg_545772.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-// Tidy configuration file for bug #547057
-output-xhtml: yes
diff --git a/tidylib-src/test/input/cfg_570027.txt b/tidylib-src/test/input/cfg_570027.txt
deleted file mode 100755
index 82c04c95..00000000
--- a/tidylib-src/test/input/cfg_570027.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-// Tidy configuration file for bug 570027
-clean: yes
-word-2000: yes
-
diff --git a/tidylib-src/test/input/cfg_586555.txt b/tidylib-src/test/input/cfg_586555.txt
deleted file mode 100755
index f458aea1..00000000
--- a/tidylib-src/test/input/cfg_586555.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-wrap: 68
-tab-size: 4
-repeated-attributes: keep-last
-alt-text: None, says tidy
-show-warnings: no
-quiet: yes
-indent: auto
-indent-attributes: yes
-output-xml: yes
-output-xhtml: yes
-add-xml-decl: yes
-bare: yes
-logical-emphasis: yes
-drop-proprietary-attributes: yes
-break-before-br: yes
-quote-nbsp: no
-assume-xml-procins: yes
-keep-time: no
-word-2000: yes
-tidy-mark: no
-literal-attributes: yes
-hide-comments: yes
-ascii-chars: no
-join-styles: no
-output-bom: no
diff --git a/tidylib-src/test/input/cfg_default.txt b/tidylib-src/test/input/cfg_default.txt
deleted file mode 100755
index 5fd52cf6..00000000
--- a/tidylib-src/test/input/cfg_default.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-// HTML Tidy configuration file created by TidyGUI
-indent: auto
-char-encoding: latin1
-tidy-mark: no
-clean: yes
-drop-font-tags: yes
-logical-emphasis: yes
-indent-attributes: yes
-// output-xhtml: yes
-
diff --git a/tidylib-src/test/input/in_426885.html b/tidylib-src/test/input/in_426885.html
deleted file mode 100755
index be3296b9..00000000
--- a/tidylib-src/test/input/in_426885.html
+++ /dev/null
@@ -1,15 +0,0 @@
-
-
-[ #426885 ] Definition list w/Center crashes
-
-
-
Heading 1
-
Term 1
-
Term 2
-
Heading 2
-
-
Term 3
-
Term 4
-
-
-
\ No newline at end of file
diff --git a/tidylib-src/test/input/in_427633.html b/tidylib-src/test/input/in_427633.html
deleted file mode 100755
index d3637f23..00000000
--- a/tidylib-src/test/input/in_427633.html
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
- [#427663] Line endings not supported correctly
-
-
-
This is a carriage return
This is a Unix line-ending
-This is a DOS line ending
-
-
-
diff --git a/tidylib-src/test/input/in_427662.html b/tidylib-src/test/input/in_427662.html
deleted file mode 100755
index cbaebe97..00000000
--- a/tidylib-src/test/input/in_427662.html
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
- [#427662] BLOCK/INLINE before TABLE parsed wrong
-
-
-
Problem is spec want "HTML 3.2 Final", but everyone
- in the world, including Tidy, uses "HTML 3.2". So the
- software has to recognize both FPI's as equivalent.
-
-
Bryan
-Joe-Bob LLP is a leading national and international corporate, litigation and
-private client law firm.We represent a
-wide variety of business, institutional and individual clients for whom our
-lawyers handle a wide range of matters.
-As a result, our lawyers are well prepared to meet the needs of clients
-whether large or small, public or private, for-profit or not-for-profit.
-
-
-
-
Joe-Bob
-Briggs has more offices than you can shake a stick at.
-These locations give Joe-Bob the geographic reach to assist his
-clients where their needs are most pressing.
-
-
·
-Estate Planning
-
-
·
-Closely-Held Business Practice
-
-
·
-Estate, Gift, Income and Other Tax Advice
-
-
-
-
Joe-Bob
-joined the Firm in 1995 after 15 years with the Kansas City firm of Fish,
-Gill, Smoker & Butts, where he was a Shareholder/Director.John is a past Chair of the Estate Planning,
-Probate and Trust Committee of the Kansas City Metropolitan Bar Association and
-co-authored the Drinking Procedures Manual for County
-Practitioners.Currently, JB is a
-member of the Missouri Bar Probate and Trust Committee, the Estate Planning Society
-and the Mid-America Planned Giving Council.
-A fellow of the American College of Trust and Estate Counsel, JB
-lectures frequently on Estate Planning topics for both legal and lay
-organizations.
-
-
-
-
-
-
-
-
-
diff --git a/tidylib-src/test/input/in_431731.html b/tidylib-src/test/input/in_431731.html
deleted file mode 100755
index 16850537..00000000
--- a/tidylib-src/test/input/in_431731.html
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-[ #431731 ] Inline emphasis inconsistent propagation
-
-
-OUTSIDE
-OUTSIDE
-
-
\ No newline at end of file
diff --git a/tidylib-src/test/input/in_431736.html b/tidylib-src/test/input/in_431736.html
deleted file mode 100755
index b8b46c3f..00000000
--- a/tidylib-src/test/input/in_431736.html
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-
- [#431736] Doctype decl added before XML decl
-
-
-
Run tidy w/ -asxhtml or -asxml options...
-
-
diff --git a/tidylib-src/test/input/in_431739.html b/tidylib-src/test/input/in_431739.html
deleted file mode 100755
index 86b2a1f5..00000000
--- a/tidylib-src/test/input/in_431739.html
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
- [#431739] Spaces carried into empty block tags
-
-
-This is a test
-Example
-
-
\ No newline at end of file
diff --git a/tidylib-src/test/input/in_431874.html b/tidylib-src/test/input/in_431874.html
deleted file mode 100755
index 3af169c3..00000000
--- a/tidylib-src/test/input/in_431874.html
+++ /dev/null
@@ -1,6 +0,0 @@
-
-Test for bug #431874
-
-Test for bug #431874
-What does "org.xml.sax.SAXException:
-Please, fix your system identifier (URI) in the DOCTYPE rule." mean?
-
-
-
Your XHTML document contains a document type
-declaration but the system identifier points at some
-non-W3C URI. Your document probably contains something
-like this:
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/tidylib-src/test/input/in_443381.xhtml b/tidylib-src/test/input/in_443381.xhtml
deleted file mode 100755
index b5c6b9ac..00000000
--- a/tidylib-src/test/input/in_443381.xhtml
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-[ #443381 ] end tags for empty elements in XHTML
-
-
-
TestTest
-
-
\ No newline at end of file
diff --git a/tidylib-src/test/input/in_443576.html b/tidylib-src/test/input/in_443576.html
deleted file mode 100755
index f3485c76..00000000
--- a/tidylib-src/test/input/in_443576.html
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-[ #443576 ] End script tag inside scripts problem
-
-
-');
-// -->
-
-
-
\ No newline at end of file
diff --git a/tidylib-src/test/input/in_443678.html b/tidylib-src/test/input/in_443678.html
deleted file mode 100755
index 2fe2cc9d..00000000
--- a/tidylib-src/test/input/in_443678.html
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-[ #443678 ] Unclosed <script> in <head> messes Tidy
-
-
-
-
diff --git a/tidylib-src/test/input/in_444394.html b/tidylib-src/test/input/in_444394.html
deleted file mode 100755
index da104694..00000000
--- a/tidylib-src/test/input/in_444394.html
+++ /dev/null
@@ -1,149 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-Hello
-
-
-
-
-
-
-
-
-
Hello
-
-
-
-
This is a nice document
-
-
-
-
With a nice picture
-
-
-
-
-
-
-
-
diff --git a/tidylib-src/test/input/in_445074.html b/tidylib-src/test/input/in_445074.html
deleted file mode 100755
index 761d0d39..00000000
--- a/tidylib-src/test/input/in_445074.html
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-
-[ #445074 ] XHTML requires form method="post"
-
-
-
-
-
-
diff --git a/tidylib-src/test/input/in_445394.html b/tidylib-src/test/input/in_445394.html
deleted file mode 100755
index 626aa648..00000000
--- a/tidylib-src/test/input/in_445394.html
+++ /dev/null
@@ -1,8 +0,0 @@
-
-
-[ #445394 ] Improve handling of missing trailing "
-
-
-link
-
-
\ No newline at end of file
diff --git a/tidylib-src/test/input/in_445557.html b/tidylib-src/test/input/in_445557.html
deleted file mode 100755
index f1be0b87..00000000
--- a/tidylib-src/test/input/in_445557.html
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-
-[ #445557 ] Convert Symbol font chars to Unicode
-
-
-
The predicate calculus has a number of theorems and axioms for proving logical statements. Here are the main symbols used in predicate calculus:
-
P(x) proposition a logical statement in the condition x.
-
x any condition in the set of possible conditions.
-
c a particular condition in the set of possible conditions.
-Test black
-Test green
-Test silver
-Test lime
-Test gray
-Test olive
-Test white
-Test yellow
-Test maroon
-Test navy
-Test red
-Test blue
-Test purple
-Test teal
-Test fuchsia
-Test aqua
-
-
-
-Test Red
-Test RED
-
-
-
-Test invalid reddish
-
-
-
-Test black #000000
-Test green #008000
-Test silver #C0C0C0
-Test lime #00FF00
-Test gray #808080
-Test olive #808000
-Test white #FFFFFF
-Test yellow #FFFF00
-Test maroon #800000
-Test navy #000080
-Test red #FF0000
-Test blue #0000FF
-Test purple #800080
-Test teal #008080
-Test fuchsia #FF00FF
-Test aqua #00FFFF
-
-
-
\ No newline at end of file
diff --git a/tidylib-src/test/input/in_540555.html b/tidylib-src/test/input/in_540555.html
deleted file mode 100755
index 2976f135..00000000
--- a/tidylib-src/test/input/in_540555.html
+++ /dev/null
@@ -1,5 +0,0 @@
-
-
-
-
#540555 Empty title tag is trimmed
-
\ No newline at end of file
diff --git a/tidylib-src/test/input/in_540571.html b/tidylib-src/test/input/in_540571.html
deleted file mode 100755
index 03bdf6f9..00000000
--- a/tidylib-src/test/input/in_540571.html
+++ /dev/null
@@ -1,15 +0,0 @@
-
-
-#540571 Inconsistent behaviour with span inline element
-
-
-
Hello World
-
-The font inline is moved so it becomes a child of the h1 element.
-
-
Hello World
-
-The span inline is not moved so it becomes a child of the h1 element, which is inconsistent and does not correspond with current browser behaviour any more.
-
-
-
\ No newline at end of file
diff --git a/tidylib-src/test/input/in_542029.html b/tidylib-src/test/input/in_542029.html
deleted file mode 100755
index 2d2aff51..00000000
--- a/tidylib-src/test/input/in_542029.html
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-
-[ 542029 ] PPrintXmlDecl reads outside array range
-
-
-Test
-
-
diff --git a/tidylib-src/test/input/in_543262.html b/tidylib-src/test/input/in_543262.html
deleted file mode 100755
index b6cf95af..00000000
--- a/tidylib-src/test/input/in_543262.html
+++ /dev/null
@@ -1,13 +0,0 @@
-
-
- Preferences
-
-
-
-
Test
-
-
diff --git a/tidylib-src/test/input/in_545067.html b/tidylib-src/test/input/in_545067.html
deleted file mode 100755
index 7b28f1c1..00000000
--- a/tidylib-src/test/input/in_545067.html
+++ /dev/null
@@ -1,3 +0,0 @@
-
-[ 545067 ] Implicit closing of head broken
-
-**We have R-A-M mounts now in stock for most Garmin units....Call or email us for prices and availability.**
-
-
-
-
-
-
-
-
Total Video became an authorized Garmin dealer in January 1999. We sold 300+ GPS units prior to becoming a Garmin direct dealer, picking them up from various distributors and individuals to sell. By becoming a Garmin direct dealer we now are able to sell for less! Total Video prides itself with *very quick shipping and a strong history of customer satisfaction. Comments from customers.
-
-
-
-Want to learn more about GPS? Click here for further GPS information.
-
-
-
-*Fishing Hot Spots MapSource (includes one coverage area unlock)....$85.00 IN STOCK!
-
-
-
-*U.S. Waterways & Lights MapSource....$60.00 IN STOCK!
-
-
-
-StreetPilot with dash mount, cigarette power cable and PC interface cable....$385.00 IN STOCK!
-
-
-
-StreetPilot ColorMap with dash mount, cigarette power cable and PC interface cable....$540.00 IN STOCK!
-
-
-
-
-
-Stock status subject to change. We try to update the stock status continuously but we sometimes don't get it changed immediately. Check with us for current stock status.
-We charge a flat $10.00 shipping and handling charge (via UPS ground) per GPS order (not per item) in the 48/US.
$5.00 shipping and handling for accessories in the 48/US.
An additional $10.00 charge for COD orders (COD s/h must be credit card secured).
Faster shipping available.
*3 day select (usually arrives in 2 days!)--addtl. $3.00. *2nd day air--addtl. $5.00. *Next day air saver--addtl. $20.00. *Next day air-Saturday delivery--addtl. $35.00 *More shipping may be required on larger packages for 3 day, 2nd day and next day air packages.
Click here to get UPS Ground delivery times. Our zip code is 67601 (Hays, Kansas).
-Add $10 to UPS charges for FedEx shipping. (minimum FedEx s/h is $18)
-
-
-
-$20 for Priority Mail s/h on GPS units and $15 for accessories in the US.
-
-
-
-Email us for requirements/costs for out of 48/US sales or click here.
-
-
-
-* Most orders received by 2:00 p.m. central time for in stock items will ship the same day (business days only).
-
-
-
-All orders in Kansas must pay a 6.8% sales tax.
-
-
-
-We accept payment by Discover/MasterCard/Visa/Pre-pay (orders paid by personal/company check orders held for 10 business days for check clearing. Cashier checks/money orders ship same day.). COD orders welcome (cashiers check or money order).
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Toll Free Order Line (877) 625-3546 (US only)
FAX (413) 383-8800
Information /International Order Line (785) 625-3546
Bryan Joe-Bob LLP is a leading national and international
- corporate, litigation and private client law firm. We
- represent a wide variety of business, institutional and
- individual clients for whom our lawyers handle a wide range
- of matters. As a result, our lawyers are well prepared
- to meet the needs of clients whether large or small, public
- or private, for-profit or not-for-profit.
-
-
-
-
Joe-Bob Briggs has more offices than you can shake a stick
- at. These locations give Joe-Bob the geographic reach
- to assist his clients where their needs are most
- pressing.
-
-
-
Estate Planning
-
-
Closely-Held Business Practice
-
-
Estate, Gift, Income and Other Tax Advice
-
-
-
-
-
Joe-Bob joined the Firm in 1995 after 15 years with the
- Kansas City firm of Fish, Gill, Smoker & Butts, where he
- was a Shareholder/Director. John is a past Chair of the
- Estate Planning, Probate and Trust Committee of the Kansas
- City Metropolitan Bar Association and co-authored the
- Drinking Procedures Manual for County Practitioners.
- Currently, JB is a member of the Missouri Bar Probate and
- Trust Committee, the Estate Planning Society and the
- Mid-America Planned Giving Council. A fellow of the
- American College of Trust and Estate Counsel, JB lectures
- frequently on Estate Planning topics for both legal and lay
- organizations.
The font inline is moved so it becomes a child of the h1 element.
-
-
- Hello World
-
-
-
The span inline is not moved so it becomes a child of the h1 element, which is inconsistent and does not correspond with current browser behaviour any more.