Main Page   Class Hierarchy   Compound List   File List   Compound Members  

SAX2XMLReaderImpl.hpp

00001 /*
00002  * The Apache Software License, Version 1.1
00003  *
00004  * Copyright (c) 1999-2001 The Apache Software Foundation.  All rights
00005  * reserved.
00006  *
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  *
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  *
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  *
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  *
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written
00029  *    permission, please contact apache\@apache.org.
00030  *
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  *
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  *
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  * $Log: SAX2XMLReaderImpl.hpp,v $
00059  * Revision 1.1  2002/05/11 20:21:34  bhavani
00060  * CR#CR062582# adding xercesc 1.7 file
00061  *
00062  * Revision 1.2  2002/02/13 16:09:24  knoaman
00063  * Move SAX2 features/properties names constants to XMLUni.
00064  *
00065  * Revision 1.1.1.1  2002/02/01 22:22:07  peiyongz
00066  * sane_include
00067  *
00068  * Revision 1.21  2002/01/28 18:45:40  knoaman
00069  * Update documentation for SAX2 feature 'namespace-prefixes'.
00070  *
00071  * Revision 1.20  2002/01/28 17:08:47  knoaman
00072  * SAX2-ext's DeclHandler support.
00073  *
00074  * Revision 1.19  2002/01/24 16:30:34  tng
00075  * [Bug 3111] Problem with LexicalHandler::startDTD() and LexicalHandler::endDTD() .
00076  *
00077  * Revision 1.18  2002/01/18 16:31:38  tng
00078  * Break program.xml which takes too long to load, into program-sax.xml, program-sax2.xml, program-dom.xml, program-idom.xml.
00079  *
00080  * Revision 1.17  2002/01/02 15:36:41  tng
00081  * Some documentation update.
00082  *
00083  * Revision 1.16  2001/11/20 18:51:44  tng
00084  * Schema: schemaLocation and noNamespaceSchemaLocation to be specified outside the instance document.  New methods setExternalSchemaLocation and setExternalNoNamespaceSchemaLocation are added (for SAX2, two new properties are added).
00085  *
00086  * Revision 1.15  2001/11/14 14:15:42  tng
00087  * Update SAX2 feature documentation.
00088  *
00089  * Revision 1.14  2001/09/12 13:03:43  tng
00090  * [Bug 3155] SAX2 does not offer progressive parse.
00091  *
00092  * Revision 1.13  2001/08/01 19:11:02  tng
00093  * Add full schema constraint checking flag to the samples and the parser.
00094  *
00095  * Revision 1.12  2001/06/27 17:39:52  knoaman
00096  * Fix for bug #2353.
00097  *
00098  * Revision 1.11  2001/06/19 16:45:08  tng
00099  * Add installAdvDocHandler to SAX2XMLReader as the code is there already.
00100  *
00101  * Revision 1.10  2001/06/04 21:01:49  jberry
00102  * getErrorCount is virtual in this class reflecting derivation from SAX2XMLReader.
00103  *
00104  * Revision 1.9  2001/06/03 19:26:19  jberry
00105  * Add support for querying error count following parse; enables simple parse without requiring error handler.
00106  *
00107  * Revision 1.8  2001/05/11 13:26:21  tng
00108  * Copyright update.
00109  *
00110  * Revision 1.7  2001/03/30 16:46:57  tng
00111  * Schema: Use setDoSchema instead of setSchemaValidation which makes more sense.
00112  *
00113  * Revision 1.6  2001/03/21 21:56:08  tng
00114  * Schema: Add Schema Grammar, Schema Validator, and split the DTDValidator into DTDValidator, DTDScanner, and DTDGrammar.
00115  *
00116  * Revision 1.5  2001/02/15 15:56:29  tng
00117  * Schema: Add setSchemaValidation and getSchemaValidation for DOMParser and SAXParser.
00118  * Add feature "http://apache.org/xml/features/validation/schema" for SAX2XMLReader.
00119  * New data field  fSchemaValidation in XMLScanner as the flag.
00120  *
00121  * Revision 1.4  2000/12/22 15:16:53  tng
00122  * SAX2-ext's LexicalHandler support added by David Bertoni.
00123  *
00124  * Revision 1.3  2000/08/09 22:16:13  jpolast
00125  * many conformance & stability changes:
00126  *   - ContentHandler::resetDocument() removed
00127  *   - attrs param of ContentHandler::startDocument() made const
00128  *   - SAXExceptions thrown now have msgs
00129  *   - removed duplicate function signatures that had 'const'
00130  *       [ eg: getContentHander() ]
00131  *   - changed getFeature and getProperty to apply to const objs
00132  *   - setProperty now takes a void* instead of const void*
00133  *   - SAX2XMLReaderImpl does not inherit from SAXParser anymore
00134  *   - Reuse Validator (http://apache.org/xml/features/reuse-validator) implemented
00135  *   - Features & Properties now read-only during parse
00136  *
00137  * Revision 1.2  2000/08/02 20:46:32  aruna1
00138  * sax2 changes
00139  *
00140  * Revision 1.1  2000/08/02 18:04:41  jpolast
00141  * initial checkin of sax2 implemenation
00142  * submitted by Simon Fell (simon@fell.com)
00143  * and Joe Polastre (jpolast@apache.org)
00144  *
00145  *
00146  */
00147 
00148 #if !defined(SAX2XMLReaderImpl_HPP)
00149 #define SAX2XMLReaderImpl_HPP
00150 
00151 #include <xercesc/parsers/SAXParser.hpp>
00152 #include <xercesc/sax/Parser.hpp>
00153 #include <xercesc/framework/XMLBuffer.hpp>
00154 #include <xercesc/internal/VecAttributesImpl.hpp>
00155 #include <xercesc/sax2/SAX2XMLReader.hpp>
00156 #include <xercesc/util/RefStackOf.hpp>
00157 #include <xercesc/util/ValueStackOf.hpp>
00158 #include <xercesc/framework/XMLBufferMgr.hpp>
00159 
00160 class ContentHandler;
00161 class LexicalHandler;
00162 class DeclHandler;
00163 
00179 class PARSERS_EXPORT SAX2XMLReaderImpl :
00180         public SAX2XMLReader
00181 //    , public Parser
00182     , public XMLDocumentHandler
00183     , public XMLErrorReporter
00184     , public XMLEntityHandler
00185     , public DocTypeHandler
00186 {
00187 public :
00188         
00189         SAX2XMLReaderImpl() ;
00190         ~SAX2XMLReaderImpl() ;
00191 
00192         // -----------------------------------------------------------------------
00193     //  Implementation of the XMLDocumentHandler interface
00194     // -----------------------------------------------------------------------
00195 
00213     virtual void docCharacters
00214     (
00215         const   XMLCh* const    chars
00216         , const unsigned int    length
00217         , const bool            cdataSection
00218     );
00219 
00229     virtual void docComment
00230     (
00231         const   XMLCh* const    comment
00232     );
00233 
00253     virtual void docPI
00254     (
00255         const   XMLCh* const    target
00256         , const XMLCh* const    data
00257     );
00258 
00270     virtual void endDocument();
00271 
00288     virtual void endElement
00289     (
00290         const   XMLElementDecl& elemDecl
00291         , const unsigned int    urlId
00292         , const bool            isRoot
00293     );
00294 
00305     virtual void endEntityReference
00306     (
00307         const   XMLEntityDecl&  entDecl
00308     );
00309 
00329     virtual void ignorableWhitespace
00330     (
00331         const   XMLCh* const    chars
00332         , const unsigned int    length
00333         , const bool            cdataSection
00334     );
00335 
00340     virtual void resetDocument();
00341 
00352     virtual void startDocument();
00353 
00380     virtual void startElement
00381     (
00382         const   XMLElementDecl&         elemDecl
00383         , const unsigned int            urlId
00384         , const XMLCh* const            elemPrefix
00385         , const RefVectorOf<XMLAttr>&   attrList
00386         , const unsigned int            attrCount
00387         , const bool                    isEmpty
00388         , const bool                    isRoot
00389     );
00390 
00400     virtual void startEntityReference
00401     (
00402         const   XMLEntityDecl&  entDecl
00403     );
00404 
00422     virtual void XMLDecl
00423     (
00424         const   XMLCh* const    versionStr
00425         , const XMLCh* const    encodingStr
00426         , const XMLCh* const    standaloneStr
00427         , const XMLCh* const    actualEncodingStr
00428     );
00430 
00431         // -----------------------------------------------------------------------
00432     //  Implementation of the XMLReader interface
00433     // -----------------------------------------------------------------------
00434 
00437 
00448     virtual int getErrorCount() const;
00449 
00455     virtual ContentHandler* getContentHandler() const;
00456 
00462     virtual DTDHandler* getDTDHandler() const ;
00463 
00469     virtual EntityResolver* getEntityResolver() const  ;
00470 
00476     virtual ErrorHandler* getErrorHandler() const ;
00477 
00483     virtual LexicalHandler* getLexicalHandler() const ;
00484 
00490     virtual DeclHandler* getDeclarationHandler() const ;
00491 
00501     virtual bool getExitOnFirstFatalError() const;
00502 
00513     virtual bool getValidationConstraintFatal() const;
00514 
00524     virtual void parse(const InputSource& source);
00525 
00533     virtual void parse(const XMLCh* const systemId);
00534 
00542     virtual void parse(const char* const systemId);
00543 
00552     virtual void setContentHandler(ContentHandler* const handler);
00553 
00569     virtual void setDTDHandler(DTDHandler* const handler) ;
00570 
00587     virtual void setEntityResolver(EntityResolver* const resolver) ;
00588 
00606     virtual void setErrorHandler(ErrorHandler* const handler) ;
00607 
00624     virtual void setLexicalHandler(LexicalHandler* const handler) ;
00625 
00642     virtual void setDeclarationHandler(DeclHandler* const handler);
00643 
00659     virtual void setExitOnFirstFatalError(const bool newState);
00660 
00676     virtual void setValidationConstraintFatal(const bool newState);
00677 
00698         virtual void setFeature(const XMLCh* const name, const bool value);
00699 
00707         virtual bool getFeature(const XMLCh* const name) const;
00708 
00732         virtual void setProperty(const XMLCh* const name, void* value);
00733 
00753         virtual void* getProperty(const XMLCh* const name) const;
00755 
00756     // -----------------------------------------------------------------------
00757     //  Implementation of the XMLErrorReporter interface
00758     // -----------------------------------------------------------------------
00759 
00785     virtual void error
00786     (
00787         const   unsigned int                errCode
00788         , const XMLCh* const                msgDomain
00789         , const XMLErrorReporter::ErrTypes  errType
00790         , const XMLCh* const                errorText
00791         , const XMLCh* const                systemId
00792         , const XMLCh* const                publicId
00793         , const unsigned int                lineNum
00794         , const unsigned int                colNum
00795     );
00796 
00805     virtual void resetErrors();
00807 
00808 
00809     // -----------------------------------------------------------------------
00810     //  Implementation of the XMLEntityHandler interface
00811     // -----------------------------------------------------------------------
00812 
00826     virtual void endInputSource(const InputSource& inputSource);
00827 
00842     virtual bool expandSystemId
00843     (
00844         const   XMLCh* const    systemId
00845         ,       XMLBuffer&      toFill
00846     );
00847 
00855     virtual void resetEntities();
00856 
00871     virtual InputSource* resolveEntity
00872     (
00873         const   XMLCh* const    publicId
00874         , const XMLCh* const    systemId
00875     );
00876 
00888     virtual void startInputSource(const InputSource& inputSource);
00890 
00891     // -----------------------------------------------------------------------
00892     //  Implementation of the DocTypeHandler Interface
00893     // -----------------------------------------------------------------------
00894 
00911     virtual void attDef
00912     (
00913         const   DTDElementDecl& elemDecl
00914         , const DTDAttDef&      attDef
00915         , const bool            ignoring
00916     );
00917 
00927     virtual void doctypeComment
00928     (
00929         const   XMLCh* const    comment
00930     );
00931 
00948     virtual void doctypeDecl
00949     (
00950         const   DTDElementDecl& elemDecl
00951         , const XMLCh* const    publicId
00952         , const XMLCh* const    systemId
00953         , const bool            hasIntSubset
00954     );
00955 
00969     virtual void doctypePI
00970     (
00971         const   XMLCh* const    target
00972         , const XMLCh* const    data
00973     );
00974 
00986     virtual void doctypeWhitespace
00987     (
00988         const   XMLCh* const    chars
00989         , const unsigned int    length
00990     );
00991 
01004     virtual void elementDecl
01005     (
01006         const   DTDElementDecl& decl
01007         , const bool            isIgnored
01008     );
01009 
01020     virtual void endAttList
01021     (
01022         const   DTDElementDecl& elemDecl
01023     );
01024 
01031     virtual void endIntSubset();
01032 
01039     virtual void endExtSubset();
01040 
01055     virtual void entityDecl
01056     (
01057         const   DTDEntityDecl&  entityDecl
01058         , const bool            isPEDecl
01059         , const bool            isIgnored
01060     );
01061 
01066     virtual void resetDocType();
01067 
01080     virtual void notationDecl
01081     (
01082         const   XMLNotationDecl&    notDecl
01083         , const bool                isIgnored
01084     );
01085 
01096     virtual void startAttList
01097     (
01098         const   DTDElementDecl& elemDecl
01099     );
01100 
01107     virtual void startIntSubset();
01108 
01115     virtual void startExtSubset();
01116 
01129     virtual void TextDecl
01130     (
01131         const   XMLCh* const    versionStr
01132         , const XMLCh* const    encodingStr
01133     );
01135 
01136     // -----------------------------------------------------------------------
01137     //  Validator: setters and getters
01138     // -----------------------------------------------------------------------
01150         virtual void setValidator(XMLValidator* valueToAdopt);
01151 
01162         virtual XMLValidator* getValidator() const;
01164 
01165     // -----------------------------------------------------------------------
01166     //  Advanced document handler list maintenance methods
01167     // -----------------------------------------------------------------------
01168 
01184     virtual void installAdvDocHandler(XMLDocumentHandler* const toInstall);
01185 
01195     virtual bool removeAdvDocHandler(XMLDocumentHandler* const toRemove);
01197 
01198     // -----------------------------------------------------------------------
01199     //  Progressive scan methods
01200     // -----------------------------------------------------------------------
01201 
01204 
01235     virtual bool parseFirst
01236     (
01237         const   XMLCh* const    systemId
01238         ,       XMLPScanToken&  toFill
01239         , const bool            reuseGrammar = false
01240     );
01241 
01272     virtual bool parseFirst
01273     (
01274         const   char* const     systemId
01275         ,       XMLPScanToken&  toFill
01276         , const bool            reuseGrammar = false
01277     );
01278 
01309     virtual bool parseFirst
01310     (
01311         const   InputSource&    source
01312         ,       XMLPScanToken&  toFill
01313         , const bool            reuseGrammar = false
01314     );
01315 
01340     virtual bool parseNext(XMLPScanToken& token);
01341 
01363     virtual void parseReset(XMLPScanToken& token);
01364 
01366 
01367 private :
01368     // -----------------------------------------------------------------------
01369     //  Unimplemented constructors and operators
01370     // -----------------------------------------------------------------------
01371     SAX2XMLReaderImpl(const SAX2XMLReaderImpl&);
01372     void operator=(const SAX2XMLReaderImpl&);
01373 
01374     // -----------------------------------------------------------------------
01375     //  Private data members
01376     //
01377     //  fAttrList
01378     //      A temporary implementation of the basic SAX2 Attributes
01379     //      interface. We use this one over and over on each startElement
01380     //      event to allow SAX-like access to the element attributes.
01381     //
01382     //  fDocHandler
01383     //      The installed SAX content handler, if any. Null if none.
01384     //
01385     //  fnamespacePrefix
01386     //      Indicates whether the namespace-prefix feature is on or off.
01387     //
01388     //  fautoValidation
01389     //      Indicates whether automatic validation is on or off
01390     //
01391     //  fValidation
01392     //      Indicates whether the 'validation' core features is on or off
01393     //
01394     //  fReuseGrammar
01395     //      Tells the parser whether it should reuse the grammar or not.
01396     //      If true, there cannot be any internal subset.
01397     //
01398     //  fStringBuffers
01399     //          Any temporary strings we need are pulled out of this pool
01400     //
01401     //  fPrefixes
01402     //          A Stack of the current namespace prefixes that need calls to
01403     //          endPrefixMapping
01404     //
01405     //  fPrefixCounts
01406     //          A Stack of the number of prefixes that need endPrefixMapping
01407     //          calls for that element
01408     //
01409     //  fDTDHandler
01410     //      The installed SAX DTD handler, if any. Null if none.
01411     //
01412     //  fElemDepth
01413     //      This is used to track the element nesting depth, so that we can
01414     //      know when we are inside content. This is so we can ignore char
01415     //      data outside of content.
01416     //
01417     //  fEntityResolver
01418     //      The installed SAX entity handler, if any. Null if none.
01419     //
01420     //  fErrorHandler
01421     //      The installed SAX error handler, if any. Null if none.
01422     //
01423     //  fLexicalHandler
01424     //      The installed SAX lexical handler, if any.  Null if none.
01425     //
01426     //  fDecllHandler
01427     //      The installed SAX declaration handler, if any.  Null if none.
01428     //
01429     //  fAdvDHCount
01430     //  fAdvDHList
01431     //  fAdvDHListSize
01432     //      This is an array of pointers to XMLDocumentHandlers, which is
01433     //      how we see installed advanced document handlers. There will
01434     //      usually not be very many at all, so a simple array is used
01435     //      instead of a collection, for performance. It will grow if needed,
01436     //      but that is unlikely.
01437     //
01438     //      The count is how many handlers are currently installed. The size
01439     //      is how big the array itself is (for expansion purposes.) When
01440     //      count == size, is time to expand.
01441     //
01442     //  fParseInProgress
01443     //      This flag is set once a parse starts. It is used to prevent
01444     //      multiple entrance or reentrance of the parser.
01445     //
01446     //  fScanner
01447     //      The scanner being used by this parser. It is created internally
01448     //      during construction.
01449     //
01450     //  fHasExternalSubset
01451     //      Indicate if the document has external DTD subset.
01452     //
01453     // -----------------------------------------------------------------------
01454         VecAttributesImpl                  fAttrList ;
01455         ContentHandler*                    fDocHandler ;
01456         RefVectorOf<XMLAttr>*      tempAttrVec ;
01457 
01458         bool                       fnamespacePrefix;
01459         bool                       fautoValidation;
01460         bool                       fValidation;
01461         bool                       fReuseGrammar;
01462 
01463         XMLBufferMgr                       fStringBuffers ;
01464         RefStackOf<XMLBuffer> *    fPrefixes ;
01465         ValueStackOf<unsigned int> * prefixCounts ;
01466 
01467     DTDHandler*                fDTDHandler;
01468     unsigned int               fElemDepth;
01469     EntityResolver*            fEntityResolver;
01470     ErrorHandler*              fErrorHandler;
01471     LexicalHandler*            fLexicalHandler;
01472     DeclHandler*               fDeclHandler;
01473     unsigned int               fAdvDHCount;
01474     XMLDocumentHandler**       fAdvDHList;
01475     unsigned int               fAdvDHListSize;
01476     bool                       fParseInProgress;
01477     XMLScanner*                fScanner;
01478     bool                       fHasExternalSubset;
01479         
01480     // -----------------------------------------------------------------------
01481     // internal function used to set the state of validation: always, never, or auto
01482     // -----------------------------------------------------------------------
01483     void setValidationScheme(const ValSchemes newScheme);
01484     void setDoNamespaces(const bool newState);
01485     bool getDoNamespaces() const;
01486     void setDoSchema(const bool newState);
01487     bool getDoSchema() const;
01488 
01489 };
01490 
01491 
01492 // ---------------------------------------------------------------------------
01493 //  SAX2XMLReader: Getter methods
01494 // ---------------------------------------------------------------------------
01495 inline ContentHandler* SAX2XMLReaderImpl::getContentHandler() const
01496 {
01497     return fDocHandler;
01498 }
01499 
01500 inline DTDHandler* SAX2XMLReaderImpl::getDTDHandler() const
01501 {
01502         return fDTDHandler ;
01503 }
01504 
01505 inline EntityResolver* SAX2XMLReaderImpl::getEntityResolver() const
01506 {
01507         return fEntityResolver;
01508 }
01509 
01510 inline ErrorHandler* SAX2XMLReaderImpl::getErrorHandler() const
01511 {
01512         return fErrorHandler;
01513 }
01514 
01515 inline LexicalHandler* SAX2XMLReaderImpl::getLexicalHandler() const
01516 {
01517    return fLexicalHandler;
01518 }
01519 
01520 inline DeclHandler* SAX2XMLReaderImpl::getDeclarationHandler() const
01521 {
01522    return fDeclHandler;
01523 }
01524 
01525 inline bool SAX2XMLReaderImpl::getExitOnFirstFatalError() const
01526 {
01527     return fScanner->getExitOnFirstFatal();
01528 }
01529 
01530 inline bool SAX2XMLReaderImpl::getValidationConstraintFatal() const
01531 {
01532     return fScanner->getValidationConstraintFatal();
01533 }
01534 
01535 #endif

Generated on Tue Nov 19 09:36:33 2002 by doxygen1.3-rc1