root/feedmelinks/include/tidy/tidy.h
| Revision 1362, 35.9 kB (checked in by jm3, 2 years ago) |
|---|
| Line | |
|---|---|
| 1 | #ifndef __TIDY_H__ |
| 2 | #define __TIDY_H__ |
| 3 | |
| 4 | /** @file tidy.h - Defines HTML Tidy API implemented by tidy library. |
| 5 | |
| 6 | Public interface is const-correct and doesn't explicitly depend |
| 7 | on any globals. Thus, thread-safety may be introduced w/out |
| 8 | changing the interface. |
| 9 | |
| 10 | Looking ahead to a C++ wrapper, C functions always pass |
| 11 | this-equivalent as 1st arg. |
| 12 | |
| 13 | |
| 14 | Copyright (c) 1998-2005 World Wide Web Consortium |
| 15 | (Massachusetts Institute of Technology, European Research |
| 16 | Consortium for Informatics and Mathematics, Keio University). |
| 17 | All Rights Reserved. |
| 18 | |
| 19 | CVS Info : |
| 20 | |
| 21 | $Author: arnaud02 $ |
| 22 | $Date: 2005/04/08 09:11:12 $ |
| 23 | $Revision: 1.13 $ |
| 24 | |
| 25 | Contributing Author(s): |
| 26 | |
| 27 | Dave Raggett <dsr@w3.org> |
| 28 | |
| 29 | The contributing author(s) would like to thank all those who |
| 30 | helped with testing, bug fixes and suggestions for improvements. |
| 31 | This wouldn't have been possible without your help. |
| 32 | |
| 33 | COPYRIGHT NOTICE: |
| 34 | |
| 35 | This software and documentation is provided "as is," and |
| 36 | the copyright holders and contributing author(s) make no |
| 37 | representations or warranties, express or implied, including |
| 38 | but not limited to, warranties of merchantability or fitness |
| 39 | for any particular purpose or that the use of the software or |
| 40 | documentation will not infringe any third party patents, |
| 41 | copyrights, trademarks or other rights. |
| 42 | |
| 43 | The copyright holders and contributing author(s) will not be held |
| 44 | liable for any direct, indirect, special or consequential damages |
| 45 | arising out of any use of the software or documentation, even if |
| 46 | advised of the possibility of such damage. |
| 47 | |
| 48 | Permission is hereby granted to use, copy, modify, and distribute |
| 49 | this source code, or portions hereof, documentation and executables, |
| 50 | for any purpose, without fee, subject to the following restrictions: |
| 51 | |
| 52 | 1. The origin of this source code must not be misrepresented. |
| 53 | 2. Altered versions must be plainly marked as such and must |
| 54 | not be misrepresented as being the original source. |
| 55 | 3. This Copyright notice may not be removed or altered from any |
| 56 | source or altered source distribution. |
| 57 | |
| 58 | The copyright holders and contributing author(s) specifically |
| 59 | permit, without fee, and encourage the use of this source code |
| 60 | as a component for supporting the Hypertext Markup Language in |
| 61 | commercial products. If you use this source code in a product, |
| 62 | acknowledgment is not required but would be appreciated. |
| 63 | |
| 64 | |
| 65 | Created 2001-05-20 by Charles Reitzel |
| 66 | Updated 2002-07-01 by Charles Reitzel - 1st Implementation |
| 67 | |
| 68 | */ |
| 69 | |
| 70 | #include "platform.h" |
| 71 | #include "tidyenum.h" |
| 72 | |
| 73 | #ifdef __cplusplus |
| 74 | extern "C" { |
| 75 | #endif |
| 76 | |
| 77 | /** @defgroup Opaque Opaque Types |
| 78 | ** |
| 79 | ** Cast to implementation types within lib. |
| 80 | ** Reduces inter-dependencies/conflicts w/ application code. |
| 81 | ** @{ |
| 82 | */ |
| 83 | |
| 84 | /** @struct TidyDoc |
| 85 | ** Opaque document datatype |
| 86 | */ |
| 87 | opaque_type( TidyDoc ); |
| 88 | |
| 89 | /** @struct TidyOption |
| 90 | ** Opaque option datatype |
| 91 | */ |
| 92 | opaque_type( TidyOption ); |
| 93 | |
| 94 | /** @struct TidyNode |
| 95 | ** Opaque node datatype |
| 96 | */ |
| 97 | opaque_type( TidyNode ); |
| 98 | |
| 99 | /** @struct TidyAttr |
| 100 | ** Opaque attribute datatype |
| 101 | */ |
| 102 | opaque_type( TidyAttr ); |
| 103 | |
| 104 | /** @} */ |
| 105 | |
| 106 | TIDY_STRUCT struct _TidyBuffer; |
| 107 | typedef struct _TidyBuffer TidyBuffer; |
| 108 | |
| 109 | |
| 110 | /** @defgroup Basic Basic Operations |
| 111 | ** |
| 112 | ** Tidy public interface |
| 113 | ** |
| 114 | ** Several functions return an integer document status: |
| 115 | ** |
| 116 | ** <pre> |
| 117 | ** 0 -> SUCCESS |
| 118 | ** >0 -> 1 == TIDY WARNING, 2 == TIDY ERROR |
| 119 | ** <0 -> SEVERE ERROR |
| 120 | ** </pre> |
| 121 | ** |
| 122 | The following is a short example program. |
| 123 | |
| 124 | <pre> |
| 125 | #include <tidy.h> |
| 126 | #include <buffio.h> |
| 127 | #include <stdio.h> |
| 128 | #include <errno.h> |
| 129 | |
| 130 | |
| 131 | int main(int argc, char **argv ) |
| 132 | { |
| 133 | const char* input = "<title>Foo</title><p>Foo!"; |
| 134 | TidyBuffer output = {0}; |
| 135 | TidyBuffer errbuf = {0}; |
| 136 | int rc = -1; |
| 137 | Bool ok; |
| 138 | |
| 139 | TidyDoc tdoc = tidyCreate(); // Initialize "document" |
| 140 | printf( "Tidying:\t\%s\\n", input ); |
| 141 | |
| 142 | ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); // Convert to XHTML |
| 143 | if ( ok ) |
| 144 | rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics |
| 145 | if ( rc >= 0 ) |
| 146 | rc = tidyParseString( tdoc, input ); // Parse the input |
| 147 | if ( rc >= 0 ) |
| 148 | rc = tidyCleanAndRepair( tdoc ); // Tidy it up! |
| 149 | if ( rc >= 0 ) |
| 150 | rc = tidyRunDiagnostics( tdoc ); // Kvetch |
| 151 | if ( rc > 1 ) // If error, force output. |
| 152 | rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); |
| 153 | if ( rc >= 0 ) |
| 154 | rc = tidySaveBuffer( tdoc, &output ); // Pretty Print |
| 155 | |
| 156 | if ( rc >= 0 ) |
| 157 | { |
| 158 | if ( rc > 0 ) |
| 159 | printf( "\\nDiagnostics:\\n\\n\%s", errbuf.bp ); |
| 160 | printf( "\\nAnd here is the result:\\n\\n\%s", output.bp ); |
| 161 | } |
| 162 | else |
| 163 | printf( "A severe error (\%d) occurred.\\n", rc ); |
| 164 | |
| 165 | tidyBufFree( &output ); |
| 166 | tidyBufFree( &errbuf ); |
| 167 | tidyRelease( tdoc ); |
| 168 | return rc; |
| 169 | } |
| 170 | </pre> |
| 171 | ** @{ |
| 172 | */ |
| 173 | |
| 174 | TIDY_EXPORT TidyDoc TIDY_CALL tidyCreate(void); |
| 175 | TIDY_EXPORT void TIDY_CALL tidyRelease( TidyDoc tdoc ); |
| 176 | |
| 177 | /** Let application store a chunk of data w/ each Tidy instance. |
| 178 | ** Useful for callbacks. |
| 179 | */ |
| 180 | TIDY_EXPORT void TIDY_CALL tidySetAppData( TidyDoc tdoc, ulong appData ); |
| 181 | |
| 182 | /** Get application data set previously */ |
| 183 | TIDY_EXPORT ulong TIDY_CALL tidyGetAppData( TidyDoc tdoc ); |
| 184 | |
| 185 | /** Get release date (version) for current library */ |
| 186 | TIDY_EXPORT ctmbstr TIDY_CALL tidyReleaseDate(void); |
| 187 | |
| 188 | /* Diagnostics and Repair |
| 189 | */ |
| 190 | |
| 191 | /** Get status of current document. */ |
| 192 | TIDY_EXPORT int TIDY_CALL tidyStatus( TidyDoc tdoc ); |
| 193 | |
| 194 | /** Detected HTML version: 0, 2, 3 or 4 */ |
| 195 | TIDY_EXPORT int TIDY_CALL tidyDetectedHtmlVersion( TidyDoc tdoc ); |
| 196 | |
| 197 | /** Input is XHTML? */ |
| 198 | TIDY_EXPORT Bool TIDY_CALL tidyDetectedXhtml( TidyDoc tdoc ); |
| 199 | |
| 200 | /** Input is generic XML (not HTML or XHTML)? */ |
| 201 | TIDY_EXPORT Bool TIDY_CALL tidyDetectedGenericXml( TidyDoc tdoc ); |
| 202 | |
| 203 | /** Number of Tidy errors encountered. If > 0, output is suppressed |
| 204 | ** unless TidyForceOutput is set. |
| 205 | */ |
| 206 | TIDY_EXPORT uint TIDY_CALL tidyErrorCount( TidyDoc tdoc ); |
| 207 | |
| 208 | /** Number of Tidy warnings encountered. */ |
| 209 | TIDY_EXPORT uint TIDY_CALL tidyWarningCount( TidyDoc tdoc ); |
| 210 | |
| 211 | /** Number of Tidy accessibility warnings encountered. */ |
| 212 | TIDY_EXPORT uint TIDY_CALL tidyAccessWarningCount( TidyDoc tdoc ); |
| 213 | |
| 214 | /** Number of Tidy configuration errors encountered. */ |
| 215 | TIDY_EXPORT uint TIDY_CALL tidyConfigErrorCount( TidyDoc tdoc ); |
| 216 | |
| 217 | /* Get/Set configuration options |
| 218 | */ |
| 219 | /** Load an ASCII Tidy configuration file */ |
| 220 | TIDY_EXPORT int TIDY_CALL tidyLoadConfig( TidyDoc tdoc, ctmbstr configFile ); |
| 221 | |
| 222 | /** Load a Tidy configuration file with the specified character encoding */ |
| 223 | TIDY_EXPORT int TIDY_CALL tidyLoadConfigEnc( TidyDoc tdoc, ctmbstr configFile, |
| 224 | ctmbstr charenc ); |
| 225 | |
| 226 | TIDY_EXPORT Bool TIDY_CALL tidyFileExists( ctmbstr filename ); |
| 227 | |
| 228 | |
| 229 | /** Set the input/output character encoding for parsing markup. |
| 230 | ** Values include: ascii, latin1, raw, utf8, iso2022, mac, |
| 231 | ** win1252, utf16le, utf16be, utf16, big5 and shiftjis. Case in-sensitive. |
| 232 | */ |
| 233 | TIDY_EXPORT int TIDY_CALL tidySetCharEncoding( TidyDoc tdoc, ctmbstr encnam ); |
| 234 | |
| 235 | /** Set the input encoding for parsing markup. |
| 236 | ** As for tidySetCharEncoding but only affects the input encoding |
| 237 | **/ |
| 238 | TIDY_EXPORT int TIDY_CALL tidySetInCharEncoding( TidyDoc tdoc, ctmbstr encnam ); |
| 239 | |
| 240 | /** Set the output encoding. |
| 241 | **/ |
| 242 | TIDY_EXPORT int TIDY_CALL tidySetOutCharEncoding( TidyDoc tdoc, ctmbstr encnam ); |
| 243 | |
| 244 | /** @} end Basic group */ |
| 245 | |
| 246 | |
| 247 | /** @defgroup Configuration Configuration Options |
| 248 | ** |
| 249 | ** Functions for getting and setting Tidy configuration options. |
| 250 | ** @{ |
| 251 | */ |
| 252 | |
| 253 | /** Applications using TidyLib may want to augment command-line and |
| 254 | ** configuration file options. Setting this callback allows an application |
| 255 | ** developer to examine command-line and configuration file options after |
| 256 | ** TidyLib has examined them and failed to recognize them. |
| 257 | **/ |
| 258 | |
| 259 | typedef Bool (TIDY_CALL *TidyOptCallback)( ctmbstr option, ctmbstr value ); |
| 260 | |
| 261 | TIDY_EXPORT Bool TIDY_CALL tidySetOptionCallback( TidyDoc tdoc, TidyOptCallback pOptCallback ); |
| 262 | |
| 263 | /** Get option ID by name */ |
| 264 | TIDY_EXPORT TidyOptionId TIDY_CALL tidyOptGetIdForName( ctmbstr optnam ); |
| 265 | |
| 266 | /** Get iterator for list of option */ |
| 267 | /** |
| 268 | Example: |
| 269 | <pre> |
| 270 | TidyIterator itOpt = tidyGetOptionList( tdoc ); |
| 271 | while ( itOpt ) |
| 272 | { |
| 273 | TidyOption opt = tidyGetNextOption( tdoc, &itOpt ); |
| 274 | .. get/set option values .. |
| 275 | } |
| 276 | </pre> |
| 277 | */ |
| 278 | |
| 279 | TIDY_EXPORT TidyIterator TIDY_CALL tidyGetOptionList( TidyDoc tdoc ); |
| 280 | /** Get next Option */ |
| 281 | TIDY_EXPORT TidyOption TIDY_CALL tidyGetNextOption( TidyDoc tdoc, TidyIterator* pos ); |
| 282 | |
| 283 | /** Lookup option by ID */ |
| 284 | TIDY_EXPORT TidyOption TIDY_CALL tidyGetOption( TidyDoc tdoc, TidyOptionId optId ); |
| 285 | /** Lookup option by name */ |
| 286 | TIDY_EXPORT TidyOption TIDY_CALL tidyGetOptionByName( TidyDoc tdoc, ctmbstr optnam ); |
| 287 | |
| 288 | /** Get ID of given Option */ |
| 289 | TIDY_EXPORT TidyOptionId TIDY_CALL tidyOptGetId( TidyOption opt ); |
| 290 | |
| 291 | /** Get name of given Option */ |
| 292 | TIDY_EXPORT ctmbstr TIDY_CALL tidyOptGetName( TidyOption opt ); |
| 293 | |
| 294 | /** Get datatype of given Option */ |
| 295 | TIDY_EXPORT TidyOptionType TIDY_CALL tidyOptGetType( TidyOption opt ); |
| 296 | |
| 297 | /** Is Option read-only? */ |
| 298 | TIDY_EXPORT Bool TIDY_CALL tidyOptIsReadOnly( TidyOption opt ); |
| 299 | |
| 300 | /** Get category of given Option */ |
| 301 | TIDY_EXPORT TidyConfigCategory TIDY_CALL tidyOptGetCategory( TidyOption opt ); |
| 302 | |
| 303 | /** Get default value of given Option as a string */ |
| 304 | TIDY_EXPORT ctmbstr TIDY_CALL tidyOptGetDefault( TidyOption opt ); |
| 305 | |
| 306 | /** Get default value of given Option as an unsigned integer */ |
| 307 | TIDY_EXPORT ulong TIDY_CALL tidyOptGetDefaultInt( TidyOption opt ); |
| 308 | |
| 309 | /** Get default value of given Option as a Boolean value */ |
| 310 | TIDY_EXPORT Bool TIDY_CALL tidyOptGetDefaultBool( TidyOption opt ); |
| 311 | |
| 312 | /** Iterate over Option "pick list" */ |
| 313 | TIDY_EXPORT TidyIterator TIDY_CALL tidyOptGetPickList( TidyOption opt ); |
| 314 | /** Get next string value of Option "pick list" */ |
| 315 | TIDY_EXPORT ctmbstr TIDY_CALL tidyOptGetNextPick( TidyOption opt, TidyIterator* pos ); |
| 316 | |
| 317 | /** Get current Option value as a string */ |
| 318 | TIDY_EXPORT ctmbstr TIDY_CALL tidyOptGetValue( TidyDoc tdoc, TidyOptionId optId ); |
| 319 | /** Set Option value as a string */ |
| 320 | TIDY_EXPORT Bool TIDY_CALL tidyOptSetValue( TidyDoc tdoc, TidyOptionId optId, ctmbstr val ); |
| 321 | /** Set named Option value as a string. Good if not sure of type. */ |
| 322 | TIDY_EXPORT Bool TIDY_CALL tidyOptParseValue( TidyDoc tdoc, ctmbstr optnam, ctmbstr val ); |
| 323 | |
| 324 | /** Get current Option value as an integer */ |
| 325 | TIDY_EXPORT ulong TIDY_CALL tidyOptGetInt( TidyDoc tdoc, TidyOptionId optId ); |
| 326 | /** Set Option value as an integer */ |
| 327 | TIDY_EXPORT Bool TIDY_CALL tidyOptSetInt( TidyDoc tdoc, TidyOptionId optId, ulong val ); |
| 328 | |
| 329 | /** Get current Option value as a Boolean flag */ |
| 330 | TIDY_EXPORT Bool TIDY_CALL tidyOptGetBool( TidyDoc tdoc, TidyOptionId optId ); |
| 331 | /** Set Option value as a Boolean flag */ |
| 332 | TIDY_EXPORT Bool TIDY_CALL tidyOptSetBool( TidyDoc tdoc, TidyOptionId optId, Bool val ); |
| 333 | |
| 334 | /** Reset option to default value by ID */ |
| 335 | TIDY_EXPORT Bool TIDY_CALL tidyOptResetToDefault( TidyDoc tdoc, TidyOptionId opt ); |
| 336 | /** Reset all options to their default values */ |
| 337 | TIDY_EXPORT Bool TIDY_CALL tidyOptResetAllToDefault( TidyDoc tdoc ); |
| 338 | |
| 339 | /** Take a snapshot of current config settings */ |
| 340 | TIDY_EXPORT Bool TIDY_CALL tidyOptSnapshot( TidyDoc tdoc ); |
| 341 | /** Reset config settings to snapshot (after document processing) */ |
| 342 | TIDY_EXPORT Bool TIDY_CALL tidyOptResetToSnapshot( TidyDoc tdoc ); |
| 343 | |
| 344 | /** Any settings different than default? */ |
| 345 | TIDY_EXPORT Bool TIDY_CALL tidyOptDiffThanDefault( TidyDoc tdoc ); |
| 346 | /** Any settings different than snapshot? */ |
| 347 | TIDY_EXPORT Bool TIDY_CALL tidyOptDiffThanSnapshot( TidyDoc tdoc ); |
| 348 | |
| 349 | /** Copy current configuration settings from one document to another */ |
| 350 | TIDY_EXPORT Bool TIDY_CALL tidyOptCopyConfig( TidyDoc tdocTo, TidyDoc tdocFrom ); |
| 351 | |
| 352 | /** Get character encoding name. Used with TidyCharEncoding, |
| 353 | ** TidyOutCharEncoding, TidyInCharEncoding */ |
| 354 | TIDY_EXPORT ctmbstr TIDY_CALL tidyOptGetEncName( TidyDoc tdoc, TidyOptionId optId ); |
| 355 | |
| 356 | /** Get current pick list value for option by ID. Useful for enum types. */ |
| 357 | TIDY_EXPORT ctmbstr TIDY_CALL tidyOptGetCurrPick( TidyDoc tdoc, TidyOptionId optId); |
| 358 | |
| 359 | /** Iterate over user declared tags */ |
| 360 | TIDY_EXPORT TidyIterator TIDY_CALL tidyOptGetDeclTagList( TidyDoc tdoc ); |
| 361 | /** Get next declared tag of specified type: TidyInlineTags, TidyBlockTags, |
| 362 | ** TidyEmptyTags, TidyPreTags */ |
| 363 | TIDY_EXPORT ctmbstr TIDY_CALL tidyOptGetNextDeclTag( TidyDoc tdoc, |
| 364 | TidyOptionId optId, |
| 365 | TidyIterator* iter ); |
| 366 | /** Get option description */ |
| 367 | TIDY_EXPORT ctmbstr TIDY_CALL tidyOptGetDoc( TidyDoc tdoc, TidyOption opt ); |
| 368 | |
| 369 | /** Iterate over a list of related options */ |
| 370 | TIDY_EXPORT TidyIterator TIDY_CALL tidyOptGetDocLinksList( TidyDoc tdoc, |
| 371 | TidyOption opt ); |
| 372 | /** Get next related option */ |
| 373 | TIDY_EXPORT TidyOption TIDY_CALL tidyOptGetNextDocLinks( TidyDoc tdoc, |
| 374 | TidyIterator* pos ); |
| 375 | |
| 376 | /** @} end Configuration group */ |
| 377 | |
| 378 | /** @defgroup IO I/O and Messages |
| 379 | ** |
| 380 | ** By default, Tidy will define, create and use |
| 381 | ** instances of input and output handlers for |
| 382 | ** standard C buffered I/O (i.e. FILE* stdin, |
| 383 | ** FILE* stdout and FILE* stderr for content |
| 384 | ** input, content output and diagnostic output, |
| 385 | ** respectively. A FILE* cfgFile input handler |
| 386 | ** will be used for config files. Command line |
| 387 | ** options will just be set directly. |
| 388 | ** |
| 389 | ** @{ |
| 390 | */ |
| 391 | |
| 392 | /***************** |
| 393 | Input Source |
| 394 | *****************/ |
| 395 | /** Input Callback: get next byte of input */ |
| 396 | typedef int (TIDY_CALL *TidyGetByteFunc)( ulong sourceData ); |
| 397 | |
| 398 | /** Input Callback: unget a byte of input */ |
| 399 | typedef void (TIDY_CALL *TidyUngetByteFunc)( ulong sourceData, byte bt ); |
| 400 | |
| 401 | /** Input Callback: is end of input? */ |
| 402 | typedef Bool (TIDY_CALL *TidyEOFFunc)( ulong sourceData ); |
| 403 | |
| 404 | /** End of input "character" */ |
| 405 | #define EndOfStream (~0u) |
| 406 | |
| 407 | /** TidyInputSource - Delivers raw bytes of input |
| 408 | */ |
| 409 | TIDY_STRUCT |
| 410 | typedef struct _TidyInputSource |
| 411 | { |
| 412 | /* Instance data */ |
| 413 | ulong sourceData; /**< Input context. Passed to callbacks */ |
| 414 | |
| 415 | /* Methods */ |
| 416 | TidyGetByteFunc getByte; /**< Pointer to "get byte" callback */ |
| 417 | TidyUngetByteFunc ungetByte; /**< Pointer to "unget" callback */ |
| 418 | TidyEOFFunc eof; /**< Pointer to "eof" callback */ |
| 419 | } TidyInputSource; |
| 420 | |
| 421 | /** Facilitates user defined source by providing |
| 422 | ** an entry point to marshal pointers-to-functions. |
| 423 | ** Needed by .NET and possibly other language bindings. |
| 424 | */ |
| 425 | TIDY_EXPORT Bool TIDY_CALL tidyInitSource( TidyInputSource* source, |
| 426 | void* srcData, |
| 427 | TidyGetByteFunc gbFunc, |
| 428 | TidyUngetByteFunc ugbFunc, |
| 429 | TidyEOFFunc endFunc ); |
| 430 | |
| 431 | /** Helper: get next byte from input source */ |
| 432 | TIDY_EXPORT uint TIDY_CALL tidyGetByte( TidyInputSource* source ); |
| 433 | |
| 434 | /** Helper: unget byte back to input source */ |
| 435 | TIDY_EXPORT void TIDY_CALL tidyUngetByte( TidyInputSource* source, uint byteValue ); |
| 436 | |
| 437 | /** Helper: check if input source at end */ |
| 438 | TIDY_EXPORT Bool TIDY_CALL tidyIsEOF( TidyInputSource* source ); |
| 439 | |
| 440 | |
| 441 | /**************** |
| 442 | Output Sink |
| 443 | ****************/ |
| 444 | /** Output callback: send a byte to output */ |
| 445 | typedef void (TIDY_CALL *TidyPutByteFunc)( ulong sinkData, byte bt ); |
| 446 | |
| 447 | |
| 448 | /** TidyOutputSink - accepts raw bytes of output |
| 449 | */ |
| 450 | TIDY_STRUCT |
| 451 | typedef struct _TidyOutputSink |
| 452 | { |
| 453 | /* Instance data */ |
| 454 | ulong sinkData; /**< Output context. Passed to callbacks */ |
| 455 | |
| 456 | /* Methods */ |
| 457 | TidyPutByteFunc putByte; /**< Pointer to "put byte" callback */ |
| 458 | } TidyOutputSink; |
| 459 | |
| 460 | /** Facilitates user defined sinks by providing |
| 461 | ** an entry point to marshal pointers-to-functions. |
| 462 | ** Needed by .NET and possibly other language bindings. |
| 463 | */ |
| 464 | TIDY_EXPORT Bool TIDY_CALL tidyInitSink( TidyOutputSink* sink, |
| 465 | void* snkData, |
| 466 | TidyPutByteFunc pbFunc ); |
| 467 | |
| 468 | /** Helper: send a byte to output */ |
| 469 | TIDY_EXPORT void TIDY_CALL tidyPutByte( TidyOutputSink* sink, uint byteValue ); |
| 470 | |
| 471 | |
| 472 | /** Callback to filter messages by diagnostic level: |
| 473 | ** info, warning, etc. Just set diagnostic output |
| 474 | ** handler to redirect all diagnostics output. Return true |
| 475 | ** to proceed with output, false to cancel. |
| 476 | */ |
| 477 | typedef Bool (TIDY_CALL *TidyReportFilter)( TidyDoc tdoc, TidyReportLevel lvl, |
| 478 | uint line, uint col, ctmbstr mssg ); |
| 479 | |
| 480 | /** Give Tidy a filter callback to use */ |
| 481 | TIDY_EXPORT Bool TIDY_CALL tidySetReportFilter( TidyDoc tdoc, |
| 482 | TidyReportFilter filtCallback ); |
| 483 | |
| 484 | /** Set error sink to named file */ |
| 485 | TIDY_EXPORT FILE* TIDY_CALL tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam ); |
| 486 | /** Set error sink to given buffer */ |
| 487 | TIDY_EXPORT int TIDY_CALL tidySetErrorBuffer( TidyDoc tdoc, TidyBuffer* errbuf ); |
| 488 | /** Set error sink to given generic sink */ |
| 489 | TIDY_EXPORT int TIDY_CALL tidySetErrorSink( TidyDoc tdoc, TidyOutputSink* sink ); |
| 490 | |
| 491 | /** @} end IO group */ |
| 492 | |
| 493 | |
| 494 | /** @defgroup Memory Memory Allocation |
| 495 | ** |
| 496 | ** By default, Tidy will use its own wrappers |
| 497 | ** around standard C malloc/free calls. |
| 498 | ** These wrappers will abort upon any failures. |
| 499 | ** If any are set, all must be set. |
| 500 | ** Pass NULL to clear previous setting. |
| 501 | ** |
| 502 | ** May be used to set environment-specific allocators |
| 503 | ** such as used by web server plugins, etc. |
| 504 | ** |
| 505 | ** @{ |
| 506 | */ |
| 507 | |
| 508 | /** Callback for "malloc" replacement */ |
| 509 | typedef void* (TIDY_CALL *TidyMalloc)( size_t len ); |
| 510 | /** Callback for "realloc" replacement */ |
| 511 | typedef void* (TIDY_CALL *TidyRealloc)( void* buf, size_t len ); |
| 512 | /** Callback for "free" replacement */ |
| 513 | typedef void (TIDY_CALL *TidyFree)( void* buf ); |
| 514 | /** Callback for "out of memory" panic state */ |
| 515 | typedef void (TIDY_CALL *TidyPanic)( ctmbstr mssg ); |
| 516 | |
| 517 | /** Give Tidy a malloc() replacement */ |
| 518 | TIDY_EXPORT Bool TIDY_CALL tidySetMallocCall( TidyMalloc fmalloc ); |
| 519 | /** Give Tidy a realloc() replacement */ |
| 520 | TIDY_EXPORT Bool TIDY_CALL tidySetReallocCall( TidyRealloc frealloc ); |
| 521 | /** Give Tidy a free() replacement */ |
| 522 | TIDY_EXPORT Bool TIDY_CALL tidySetFreeCall( TidyFree ffree ); |
| 523 | /** Give Tidy an "out of memory" handler */ |
| 524 | TIDY_EXPORT Bool TIDY_CALL tidySetPanicCall( TidyPanic fpanic ); |
| 525 | |
| 526 | /** @} end Memory group */ |
| 527 | |
| 528 | /* TODO: Catalog all messages for easy translation |
| 529 | TIDY_EXPORT ctmbstr tidyLookupMessage( int errorNo ); |
| 530 | */ |
| 531 | |
| 532 | |
| 533 | |
| 534 | /** @defgroup Parse Document Parse |
| 535 | ** |
| 536 | ** Parse markup from a given input source. String and filename |
| 537 | ** functions added for convenience. HTML/XHTML version determined |
| 538 | ** from input. |
| 539 | ** @{ |
| 540 | */ |
| 541 | |
| 542 | /** Parse markup in named file */ |
| 543 | TIDY_EXPORT int TIDY_CALL tidyParseFile( TidyDoc tdoc, ctmbstr filename ); |
| 544 | |
| 545 | /** Parse markup from the standard input */ |
| 546 | TIDY_EXPORT int TIDY_CALL tidyParseStdin( TidyDoc tdoc ); |
| 547 | |
| 548 | /** Parse markup in given string */ |
| 549 | TIDY_EXPORT int TIDY_CALL tidyParseString( TidyDoc tdoc, ctmbstr content ); |
| 550 | |
| 551 | /** Parse markup in given buffer */ |
| 552 | TIDY_EXPORT int TIDY_CALL tidyParseBuffer( TidyDoc tdoc, TidyBuffer* buf ); |
| 553 | |
| 554 | /** Parse markup in given generic input source */ |
| 555 | TIDY_EXPORT int TIDY_CALL tidyParseSource( TidyDoc tdoc, TidyInputSource* source); |
| 556 | |
| 557 | /** @} End Parse group */ |
| 558 | |
| 559 | |
| 560 | /** @defgroup Clean Diagnostics and Repair |
| 561 | ** |
| 562 | ** @{ |
| 563 | */ |
| 564 | /** Execute configured cleanup and repair operations on parsed markup */ |
| 565 | TIDY_EXPORT int TIDY_CALL tidyCleanAndRepair( TidyDoc tdoc ); |
| 566 | |
| 567 | /** Run configured diagnostics on parsed and repaired markup. |
| 568 | ** Must call tidyCleanAndRepair() first. |
| 569 | */ |
| 570 | TIDY_EXPORT int TIDY_CALL tidyRunDiagnostics( TidyDoc tdoc ); |
| 571 | |
| 572 | /** @} end Clean group */ |
| 573 | |
| 574 | |
| 575 | /** @defgroup Save Document Save Functions |
| 576 | ** |
| 577 | ** Save currently parsed document to the given output sink. File name |
| 578 | ** and string/buffer functions provided for convenience. |
| 579 | ** @{ |
| 580 | */ |
| 581 | |
| 582 | /** Save to named file */ |
| 583 | TIDY_EXPORT int TIDY_CALL tidySaveFile( TidyDoc tdoc, ctmbstr filename ); |
| 584 | |
| 585 | /** Save to standard output (FILE*) */ |
| 586 | TIDY_EXPORT int TIDY_CALL tidySaveStdout( TidyDoc tdoc ); |
| 587 | |
| 588 | /** Save to given TidyBuffer object */ |
| 589 | TIDY_EXPORT int TIDY_CALL tidySaveBuffer( TidyDoc tdoc, TidyBuffer* buf ); |
| 590 | |
| 591 | /** Save document to application buffer. If buffer is not big enough, |
| 592 | ** ENOMEM will be returned and the necessary buffer size will be placed |
| 593 | ** in *buflen. |
| 594 | */ |
| 595 | TIDY_EXPORT int TIDY_CALL tidySaveString( TidyDoc tdoc, |
| 596 | tmbstr buffer, uint* buflen ); |
| 597 | |
| 598 | /** Save to given generic output sink */ |
| 599 | TIDY_EXPORT int TIDY_CALL tidySaveSink( TidyDoc tdoc, TidyOutputSink* sink ); |
| 600 | |
| 601 | /** @} end Save group */ |
| 602 | |
| 603 | |
| 604 | /** @addtogroup Basic |
| 605 | ** @{ |
| 606 | */ |
| 607 | /** Save current settings to named file. |
| 608 | Only non-default values are written. */ |
| 609 | TIDY_EXPORT int TIDY_CALL tidyOptSaveFile( TidyDoc tdoc, ctmbstr cfgfil ); |
| 610 | |
| 611 | /** Save current settings to given output sink. |
| 612 | Only non-default values are written. */ |
| 613 | TIDY_EXPORT int TIDY_CALL tidyOptSaveSink( TidyDoc tdoc, TidyOutputSink* sink ); |
| 614 | |
| 615 | |
| 616 | /* Error reporting functions |
| 617 | */ |
| 618 | |
| 619 | /** Write more complete information about errors to current error sink. */ |
| 620 | TIDY_EXPORT void TIDY_CALL tidyErrorSummary( TidyDoc tdoc ); |
| 621 | |
| 622 | /** Write more general information about markup to current error sink. */ |
| 623 | TIDY_EXPORT void TIDY_CALL tidyGeneralInfo( TidyDoc tdoc ); |
| 624 | |
| 625 | /** @} end Basic group (again) */ |
| 626 | |
| 627 | |
| 628 | /** @defgroup Tree Document Tree |
| 629 | ** |
| 630 | ** A parsed and, optionally, repaired document is |
| 631 | ** represented by Tidy as a Tree, much like a W3C DOM. |
| 632 | ** This tree may be traversed using these functions. |
| 633 | ** The following snippet gives a basic idea how these |
| 634 | ** functions can be used. |
| 635 | ** |
| 636 | <pre> |
| 637 | void dumpNode( TidyNode tnod, int indent ) |
| 638 | { |
| 639 | TidyNode child; |
| 640 | |
| 641 | for ( child = tidyGetChild(tnod); child; child = tidyGetNext(child) ) |
| 642 | { |
| 643 | ctmbstr name; |
| 644 | switch ( tidyNodeGetType(child) ) |
| 645 | { |
| 646 | case TidyNode_Root: name = "Root"; break; |
| 647 | case TidyNode_DocType: name = "DOCTYPE"; break; |
| 648 | case TidyNode_Comment: name = "Comment"; break; |
| 649 | case TidyNode_ProcIns: name = "Processing Instruction"; break; |
| 650 | case TidyNode_Text: name = "Text"; break; |
| 651 | case TidyNode_CDATA: name = "CDATA"; break; |
| 652 | case TidyNode_Section: name = "XML Section"; break; |
| 653 | case TidyNode_Asp: name = "ASP"; break; |
| 654 | case TidyNode_Jste: name = "JSTE"; break; |
| 655 | case TidyNode_Php: name = "PHP"; break; |
| 656 | case TidyNode_XmlDecl: name = "XML Declaration"; break; |
| 657 | |
| 658 | case TidyNode_Start: |
| 659 | case TidyNode_End: |
| 660 | case TidyNode_StartEnd: |
| 661 | default: |
| 662 | name = tidyNodeGetName( child ); |
| 663 | break; |
| 664 | } |
| 665 | assert( name != NULL ); |
| 666 | printf( "\%*.*sNode: \%s\\n", indent, indent, " ", name ); |
| 667 | dumpNode( child, indent + 4 ); |
| 668 | } |
| 669 | } |
| 670 | |
| 671 | void dumpDoc( TidyDoc tdoc ) |
| 672 | { |
| 673 | dumpNode( tidyGetRoot(tdoc), 0 ); |
| 674 | } |
| 675 | |
| 676 | void dumpBody( TidyDoc tdoc ) |
| 677 | { |
| 678 | dumpNode( tidyGetBody(tdoc), 0 ); |
| 679 | } |
| 680 | </pre> |
| 681 | |
| 682 | @{ |
| 683 | |
| 684 | */ |
| 685 | |
| 686 | TIDY_EXPORT TidyNode TIDY_CALL tidyGetRoot( TidyDoc tdoc ); |
| 687 | TIDY_EXPORT TidyNode TIDY_CALL tidyGetHtml( TidyDoc tdoc ); |
| 688 | TIDY_EXPORT TidyNode TIDY_CALL tidyGetHead( TidyDoc tdoc ); |
| 689 | TIDY_EXPORT TidyNode TIDY_CALL tidyGetBody( TidyDoc tdoc ); |
| 690 | |
| 691 | /* parent / child */ |
| 692 | TIDY_EXPORT TidyNode TIDY_CAL |