Main Page   Class Hierarchy   Compound List   File List   Compound Members  

SAXParser.hpp

00001 /*
00002  * The Apache Software License, Version 1.1
00003  *
00004  * Copyright (c) 1999-2001 The Apache Software Foundation.  All rights
00005  * reserved.
00006  *
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  *
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  *
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  *
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  *
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written
00029  *    permission, please contact apache\@apache.org.
00030  *
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  *
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  *
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  * $Log: SAXParser.hpp,v $
00059  * Revision 1.1  2002/05/11 20:21:48  bhavani
00060  * CR#CR062582# adding xercesc 1.7 file
00061  *
00062  * Revision 1.2  2002/02/20 18:17:01  tng
00063  * [Bug 5977] Warnings on generating apiDocs.
00064  *
00065  * Revision 1.1.1.1  2002/02/01 22:22:07  peiyongz
00066  * sane_include
00067  *
00068  * Revision 1.22  2001/12/05 22:09:02  tng
00069  * Update documentation for setExternalSchemaLocation and setExternalNoNamespaceSchemaLocation.
00070  *
00071  * Revision 1.21  2001/11/20 18:51:44  tng
00072  * Schema: schemaLocation and noNamespaceSchemaLocation to be specified outside the instance document.  New methods setExternalSchemaLocation and setExternalNoNamespaceSchemaLocation are added (for SAX2, two new properties are added).
00073  *
00074  * Revision 1.20  2001/08/01 19:11:02  tng
00075  * Add full schema constraint checking flag to the samples and the parser.
00076  *
00077  * Revision 1.19  2001/07/27 20:24:21  tng
00078  * put getScanner() back as they were there before, not to break existing apps.
00079  *
00080  * Revision 1.18  2001/07/16 12:52:09  tng
00081  * APIDocs fix: default for schema processing in DOMParser, IDOMParser, and SAXParser should be false.
00082  *
00083  * Revision 1.17  2001/06/23 14:13:16  tng
00084  * Remove getScanner from the Parser headers as this is not needed and Scanner is not internal class.
00085  *
00086  * Revision 1.16  2001/06/03 19:26:20  jberry
00087  * Add support for querying error count following parse; enables simple parse without requiring error handler.
00088  *
00089  * Revision 1.15  2001/05/11 13:26:22  tng
00090  * Copyright update.
00091  *
00092  * Revision 1.14  2001/05/03 19:09:25  knoaman
00093  * Support Warning/Error/FatalError messaging.
00094  * Validity constraints errors are treated as errors, with the ability by user to set
00095  * validity constraints as fatal errors.
00096  *
00097  * Revision 1.13  2001/03/30 16:46:57  tng
00098  * Schema: Use setDoSchema instead of setSchemaValidation which makes more sense.
00099  *
00100  * Revision 1.12  2001/03/21 21:56:09  tng
00101  * Schema: Add Schema Grammar, Schema Validator, and split the DTDValidator into DTDValidator, DTDScanner, and DTDGrammar.
00102  *
00103  * Revision 1.11  2001/02/15 15:56:29  tng
00104  * Schema: Add setSchemaValidation and getSchemaValidation for DOMParser and SAXParser.
00105  * Add feature "http://apache.org/xml/features/validation/schema" for SAX2XMLReader.
00106  * New data field  fSchemaValidation in XMLScanner as the flag.
00107  *
00108  * Revision 1.10  2001/01/12 21:23:41  tng
00109  * Documentation Enhancement: explain values of Val_Scheme
00110  *
00111  * Revision 1.9  2000/08/02 18:05:15  jpolast
00112  * changes required for sax2
00113  * (changed private members to protected)
00114  *
00115  * Revision 1.8  2000/04/12 22:58:30  roddey
00116  * Added support for 'auto validate' mode.
00117  *
00118  * Revision 1.7  2000/03/03 01:29:34  roddey
00119  * Added a scanReset()/parseReset() method to the scanner and
00120  * parsers, to allow for reset after early exit from a progressive parse.
00121  * Added calls to new Terminate() call to all of the samples. Improved
00122  * documentation in SAX and DOM parsers.
00123  *
00124  * Revision 1.6  2000/02/17 03:54:27  rahulj
00125  * Added some new getters to query the parser state and
00126  * clarified the documentation.
00127  *
00128  * Revision 1.5  2000/02/16 03:42:58  rahulj
00129  * Finished documenting the SAX Driver implementation.
00130  *
00131  * Revision 1.4  2000/02/15 04:47:37  rahulj
00132  * Documenting the SAXParser framework. Not done yet.
00133  *
00134  * Revision 1.3  2000/02/06 07:47:56  rahulj
00135  * Year 2K copyright swat.
00136  *
00137  * Revision 1.2  1999/12/15 19:57:48  roddey
00138  * Got rid of redundant 'const' on boolean return value. Some compilers choke
00139  * on this and its useless.
00140  *
00141  * Revision 1.1.1.1  1999/11/09 01:07:51  twl
00142  * Initial checkin
00143  *
00144  * Revision 1.6  1999/11/08 20:44:54  rahul
00145  * Swat for adding in Product name and CVS comment log variable.
00146  *
00147  */
00148 
00149 #if !defined(SAXPARSER_HPP)
00150 #define SAXPARSER_HPP
00151 
00152 #include <xercesc/sax/Parser.hpp>
00153 #include <xercesc/internal/VecAttrListImpl.hpp>
00154 #include <xercesc/framework/XMLDocumentHandler.hpp>
00155 #include <xercesc/framework/XMLElementDecl.hpp>
00156 #include <xercesc/framework/XMLEntityHandler.hpp>
00157 #include <xercesc/framework/XMLErrorReporter.hpp>
00158 #include <xercesc/validators/DTD/DocTypeHandler.hpp>
00159 
00160 class DocumentHandler;
00161 class EntityResolver;
00162 class XMLPScanToken;
00163 class XMLScanner;
00164 class XMLValidator;
00165 
00166 
00177 class PARSERS_EXPORT SAXParser :
00178 
00179     public Parser
00180     , public XMLDocumentHandler
00181     , public XMLErrorReporter
00182     , public XMLEntityHandler
00183     , public DocTypeHandler
00184 {
00185 public :
00186     // -----------------------------------------------------------------------
00187     //  Class types
00188     // -----------------------------------------------------------------------
00189     enum ValSchemes
00190     {
00191         Val_Never
00192         , Val_Always
00193         , Val_Auto
00194     };
00195 
00196 
00197     // -----------------------------------------------------------------------
00198     //  Constructors and Destructor
00199     // -----------------------------------------------------------------------
00200 
00208     SAXParser(XMLValidator* const valToAdopt = 0);
00209 
00213     ~SAXParser();
00215 
00216 
00225     DocumentHandler* getDocumentHandler();
00226 
00233     const DocumentHandler* getDocumentHandler() const;
00234 
00241     EntityResolver* getEntityResolver();
00242 
00249     const EntityResolver* getEntityResolver() const;
00250 
00257     ErrorHandler* getErrorHandler();
00258 
00265     const ErrorHandler* getErrorHandler() const;
00266 
00273     const XMLScanner& getScanner() const;
00274 
00281     const XMLValidator& getValidator() const;
00282 
00290     ValSchemes getValidationScheme() const;
00291 
00302     bool getDoSchema() const;
00303 
00314     bool getValidationSchemaFullChecking() const;
00315 
00326     int getErrorCount() const;
00327 
00337     bool getDoNamespaces() const;
00338 
00348     bool getExitOnFirstFatalError() const;
00349 
00360     bool getValidationConstraintFatal() const;
00361 
00381     XMLCh* getExternalSchemaLocation() const;
00382 
00402     XMLCh* getExternalNoNamespaceSchemaLocation() const;
00403 
00413     bool getURLEntityCaching() const;
00414 
00424     XMLCh* getURLEntityCacheDir() const;
00425 
00427 
00428 
00429     // -----------------------------------------------------------------------
00430     //  Setter methods
00431     // -----------------------------------------------------------------------
00432 
00452     void setDoNamespaces(const bool newState);
00453 
00470     void setValidationScheme(const ValSchemes newScheme);
00471 
00485     void setDoSchema(const bool newState);
00486 
00503     void setValidationSchemaFullChecking(const bool schemaFullChecking);
00504 
00520     void setExitOnFirstFatalError(const bool newState);
00521 
00537     void setValidationConstraintFatal(const bool newState);
00538 
00559     void setExternalSchemaLocation(const XMLCh* const schemaLocation);
00560 
00569     void setExternalSchemaLocation(const char* const schemaLocation);
00570 
00585     void setExternalNoNamespaceSchemaLocation(const XMLCh* const noNamespaceSchemaLocation);
00586 
00595     void setExternalNoNamespaceSchemaLocation(const char* const noNamespaceSchemaLocation);
00596 
00610     void setURLEntityCaching(const bool useCache);
00611 
00626     void setURLEntityCacheDir(const XMLCh* cachedir);
00627 
00637     void setURLEntityCacheDir(const char* cachedir);
00638 
00640 
00641 
00642     // -----------------------------------------------------------------------
00643     //  Advanced document handler list maintenance methods
00644     // -----------------------------------------------------------------------
00645 
00661     void installAdvDocHandler(XMLDocumentHandler* const toInstall);
00662 
00672     bool removeAdvDocHandler(XMLDocumentHandler* const toRemove);
00674 
00675 
00676     // -----------------------------------------------------------------------
00677     //  Implementation of the SAXParser interface
00678     // -----------------------------------------------------------------------
00679 
00694     virtual void parse(const InputSource& source, const bool reuseGrammar = false);
00695 
00708     virtual void parse(const XMLCh* const systemId, const bool reuseGrammar = false);
00709 
00720     virtual void parse(const char* const systemId, const bool reuseGrammar = false);
00721 
00732     virtual void setDocumentHandler(DocumentHandler* const handler);
00733 
00743     virtual void setDTDHandler(DTDHandler* const handler);
00744 
00755     virtual void setErrorHandler(ErrorHandler* const handler);
00756 
00768     virtual void setEntityResolver(EntityResolver* const resolver);
00770 
00771 
00772     // -----------------------------------------------------------------------
00773     //  Progressive scan methods
00774     // -----------------------------------------------------------------------
00775 
00778 
00809     bool parseFirst
00810     (
00811         const   XMLCh* const    systemId
00812         ,       XMLPScanToken&  toFill
00813         , const bool            reuseGrammar = false
00814     );
00815 
00846     bool parseFirst
00847     (
00848         const   char* const     systemId
00849         ,       XMLPScanToken&  toFill
00850         , const bool            reuseGrammar = false
00851     );
00852 
00883     bool parseFirst
00884     (
00885         const   InputSource&    source
00886         ,       XMLPScanToken&  toFill
00887         , const bool            reuseGrammar = false
00888     );
00889 
00914     bool parseNext(XMLPScanToken& token);
00915 
00937     void parseReset(XMLPScanToken& token);
00938 
00940 
00941 
00942 
00943     // -----------------------------------------------------------------------
00944     //  Implementation of the DocTypeHandler Interface
00945     // -----------------------------------------------------------------------
00946 
00963     virtual void attDef
00964     (
00965         const   DTDElementDecl& elemDecl
00966         , const DTDAttDef&      attDef
00967         , const bool            ignore
00968     );
00969 
00979     virtual void doctypeComment
00980     (
00981         const   XMLCh* const    comment
00982     );
00983 
01000     virtual void doctypeDecl
01001     (
01002         const   DTDElementDecl& elemDecl
01003         , const XMLCh* const    publicId
01004         , const XMLCh* const    systemId
01005         , const bool            hasIntSubset
01006     );
01007 
01021     virtual void doctypePI
01022     (
01023         const   XMLCh* const    target
01024         , const XMLCh* const    data
01025     );
01026 
01038     virtual void doctypeWhitespace
01039     (
01040         const   XMLCh* const    chars
01041         , const unsigned int    length
01042     );
01043 
01056     virtual void elementDecl
01057     (
01058         const   DTDElementDecl& decl
01059         , const bool            isIgnored
01060     );
01061 
01072     virtual void endAttList
01073     (
01074         const   DTDElementDecl& elemDecl
01075     );
01076 
01083     virtual void endIntSubset();
01084 
01091     virtual void endExtSubset();
01092 
01107     virtual void entityDecl
01108     (
01109         const   DTDEntityDecl&  entityDecl
01110         , const bool            isPEDecl
01111         , const bool            isIgnored
01112     );
01113 
01118     virtual void resetDocType();
01119 
01132     virtual void notationDecl
01133     (
01134         const   XMLNotationDecl&    notDecl
01135         , const bool                isIgnored
01136     );
01137 
01148     virtual void startAttList
01149     (
01150         const   DTDElementDecl& elemDecl
01151     );
01152 
01159     virtual void startIntSubset();
01160 
01167     virtual void startExtSubset();
01168 
01181     virtual void TextDecl
01182     (
01183         const   XMLCh* const    versionStr
01184         , const XMLCh* const    encodingStr
01185     );
01187 
01188 
01189     // -----------------------------------------------------------------------
01190     //  Implementation of the XMLDocumentHandler interface
01191     // -----------------------------------------------------------------------
01192 
01210     virtual void docCharacters
01211     (
01212         const   XMLCh* const    chars
01213         , const unsigned int    length
01214         , const bool            cdataSection
01215     );
01216 
01226     virtual void docComment
01227     (
01228         const   XMLCh* const    comment
01229     );
01230 
01250     virtual void docPI
01251     (
01252         const   XMLCh* const    target
01253         , const XMLCh* const    data
01254     );
01255 
01267     virtual void endDocument();
01268 
01285     virtual void endElement
01286     (
01287         const   XMLElementDecl& elemDecl
01288         , const unsigned int    urlId
01289         , const bool            isRoot
01290     );
01291 
01302     virtual void endEntityReference
01303     (
01304         const   XMLEntityDecl&  entDecl
01305     );
01306 
01326     virtual void ignorableWhitespace
01327     (
01328         const   XMLCh* const    chars
01329         , const unsigned int    length
01330         , const bool            cdataSection
01331     );
01332 
01337     virtual void resetDocument();
01338 
01349     virtual void startDocument();
01350 
01377     virtual void startElement
01378     (
01379         const   XMLElementDecl&         elemDecl
01380         , const unsigned int            urlId
01381         , const XMLCh* const            elemPrefix
01382         , const RefVectorOf<XMLAttr>&   attrList
01383         , const unsigned int            attrCount
01384         , const bool                    isEmpty
01385         , const bool                    isRoot
01386     );
01387 
01397     virtual void startEntityReference
01398     (
01399         const   XMLEntityDecl&  entDecl
01400     );
01401 
01419     virtual void XMLDecl
01420     (
01421         const   XMLCh* const    versionStr
01422         , const XMLCh* const    encodingStr
01423         , const XMLCh* const    standaloneStr
01424         , const XMLCh* const    actualEncodingStr
01425     );
01427 
01428 
01429     // -----------------------------------------------------------------------
01430     //  Implementation of the XMLErrorReporter interface
01431     // -----------------------------------------------------------------------
01432 
01458     virtual void error
01459     (
01460         const   unsigned int                errCode
01461         , const XMLCh* const                msgDomain
01462         , const XMLErrorReporter::ErrTypes  errType
01463         , const XMLCh* const                errorText
01464         , const XMLCh* const                systemId
01465         , const XMLCh* const                publicId
01466         , const unsigned int                lineNum
01467         , const unsigned int                colNum
01468     );
01469 
01478     virtual void resetErrors();
01480 
01481 
01482     // -----------------------------------------------------------------------
01483     //  Implementation of the XMLEntityHandler interface
01484     // -----------------------------------------------------------------------
01485 
01499     virtual void endInputSource(const InputSource& inputSource);
01500 
01515     virtual bool expandSystemId
01516     (
01517         const   XMLCh* const    systemId
01518         ,       XMLBuffer&      toFill
01519     );
01520 
01528     virtual void resetEntities();
01529 
01544     virtual InputSource* resolveEntity
01545     (
01546         const   XMLCh* const    publicId
01547         , const XMLCh* const    systemId
01548     );
01549 
01561     virtual void startInputSource(const InputSource& inputSource);
01563 
01564 
01577     bool getDoValidation() const;
01578 
01592     void setDoValidation(const bool newState);
01594 
01595 
01596 protected :
01597     // -----------------------------------------------------------------------
01598     //  Unimplemented constructors and operators
01599     // -----------------------------------------------------------------------
01600     SAXParser(const SAXParser&);
01601     void operator=(const SAXParser&);
01602 
01603 
01604     // -----------------------------------------------------------------------
01605     //  Private data members
01606     //
01607     //  fAttrList
01608     //      A temporary implementation of the basic SAX attribute list
01609     //      interface. We use this one over and over on each startElement
01610     //      event to allow SAX-like access to the element attributes.
01611     //
01612     //  fDocHandler
01613     //      The installed SAX doc handler, if any. Null if none.
01614     //
01615     //  fDTDHandler
01616     //      The installed SAX DTD handler, if any. Null if none.
01617     //
01618     //  fElemDepth
01619     //      This is used to track the element nesting depth, so that we can
01620     //      know when we are inside content. This is so we can ignore char
01621     //      data outside of content.
01622     //
01623     //  fEntityResolver
01624     //      The installed SAX entity handler, if any. Null if none.
01625     //
01626     //  fErrorHandler
01627     //      The installed SAX error handler, if any. Null if none.
01628     //
01629     //  fAdvDHCount
01630     //  fAdvDHList
01631     //  fAdvDHListSize
01632     //      This is an array of pointers to XMLDocumentHandlers, which is
01633     //      how we see installed advanced document handlers. There will
01634     //      usually not be very many at all, so a simple array is used
01635     //      instead of a collection, for performance. It will grow if needed,
01636     //      but that is unlikely.
01637     //
01638     //      The count is how many handlers are currently installed. The size
01639     //      is how big the array itself is (for expansion purposes.) When
01640     //      count == size, is time to expand.
01641     //
01642     //  fParseInProgress
01643     //      This flag is set once a parse starts. It is used to prevent
01644     //      multiple entrance or reentrance of the parser.
01645     //
01646     //  fScanner
01647     //      The scanner being used by this parser. It is created internally
01648     //      during construction.
01649     //
01650     // -----------------------------------------------------------------------
01651     VecAttrListImpl         fAttrList;
01652     DocumentHandler*        fDocHandler;
01653     DTDHandler*             fDTDHandler;
01654     unsigned int            fElemDepth;
01655     EntityResolver*         fEntityResolver;
01656     ErrorHandler*           fErrorHandler;
01657     unsigned int            fAdvDHCount;
01658     XMLDocumentHandler**    fAdvDHList;
01659     unsigned int            fAdvDHListSize;
01660     bool                    fParseInProgress;
01661     XMLScanner*             fScanner;
01662 };
01663 
01664 
01665 // ---------------------------------------------------------------------------
01666 //  SAXParser: Getter methods
01667 // ---------------------------------------------------------------------------
01668 inline DocumentHandler* SAXParser::getDocumentHandler()
01669 {
01670     return fDocHandler;
01671 }
01672 
01673 inline const DocumentHandler* SAXParser::getDocumentHandler() const
01674 {
01675     return fDocHandler;
01676 }
01677 
01678 inline EntityResolver* SAXParser::getEntityResolver()
01679 {
01680     return fEntityResolver;
01681 }
01682 
01683 inline const EntityResolver* SAXParser::getEntityResolver() const
01684 {
01685     return fEntityResolver;
01686 }
01687 
01688 inline ErrorHandler* SAXParser::getErrorHandler()
01689 {
01690     return fErrorHandler;
01691 }
01692 
01693 inline const ErrorHandler* SAXParser::getErrorHandler() const
01694 {
01695     return fErrorHandler;
01696 }
01697 
01698 inline const XMLScanner& SAXParser::getScanner() const
01699 {
01700     return *fScanner;
01701 }
01702 
01703 #endif

Generated on Tue Nov 19 09:36:33 2002 by doxygen1.3-rc1