Main Page   Class Hierarchy   Compound List   File List   Compound Members  

ReaderMgr.hpp

00001 /*
00002  * The Apache Software License, Version 1.1
00003  *
00004  * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
00005  * reserved.
00006  *
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  *
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  *
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  *
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  *
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written
00029  *    permission, please contact apache\@apache.org.
00030  *
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  *
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  *
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  * $Log: ReaderMgr.hpp,v $
00059  * Revision 1.1  2002/05/11 20:18:20  bhavani
00060  * CR#CR062582# adding xercesc 1.7 file
00061  *
00062  * Revision 1.1.1.1  2002/02/01 22:21:58  peiyongz
00063  * sane_include
00064  *
00065  * Revision 1.13  2001/07/12 18:50:08  tng
00066  * Some performance modification regarding standalone check and xml decl check.
00067  *
00068  * Revision 1.12  2000/09/09 00:18:18  andyh
00069  * Reordered member variables in ThrowEOEJanitor.  Patch submitted
00070  * by Kirk Wylie.
00071  *
00072  * Revision 1.11  2000/07/08 00:17:13  andyh
00073  * Cleanup of yesterday's speedup changes.  Merged new bit into the
00074  * scanner character properties table.
00075  *
00076  * Revision 1.10  2000/07/07 01:08:44  andyh
00077  * Parser speed up in scan of XML content.
00078  *
00079  * Revision 1.9  2000/03/02 19:54:29  roddey
00080  * This checkin includes many changes done while waiting for the
00081  * 1.1.0 code to be finished. I can't list them all here, but a list is
00082  * available elsewhere.
00083  *
00084  * Revision 1.8  2000/02/24 20:18:07  abagchi
00085  * Swat for removing Log from API docs
00086  *
00087  * Revision 1.7  2000/02/24 02:12:53  aruna1
00088  * ReaderMgr:;getReaderDepth() added
00089  *
00090  * Revision 1.6  2000/02/06 07:47:53  rahulj
00091  * Year 2K copyright swat.
00092  *
00093  * Revision 1.5  2000/01/25 01:04:21  roddey
00094  * Fixes a bogus error about ]]> in char data.
00095  *
00096  * Revision 1.4  2000/01/24 20:40:43  roddey
00097  * Exposed the APIs to get to the byte offset in the source XML buffer. This stuff
00098  * is not tested yet, but I wanted to get the API changes in now so that the API
00099  * can be stablized.
00100  *
00101  * Revision 1.3  2000/01/12 00:15:04  roddey
00102  * Changes to deal with multiply nested, relative pathed, entities and to deal
00103  * with the new URL class changes.
00104  *
00105  * Revision 1.2  1999/12/15 19:48:03  roddey
00106  * Changed to use new split of transcoder interfaces into XML transcoders and
00107  * LCP transcoders, and implementation of intrinsic transcoders as pluggable
00108  * transcoders, and addition of Latin1 intrinsic support.
00109  *
00110  * Revision 1.1.1.1  1999/11/09 01:08:13  twl
00111  * Initial checkin
00112  *
00113  * Revision 1.4  1999/11/08 20:56:54  droddey
00114  * If the main xml entity does not exist, we need to get the error handling for that
00115  * inside the main XMLScanner::scanDocument() try block so that it gets reported
00116  * in the normal way. We have to add a little extra safety code because, when this
00117  * happens, there is no reader on the reader stack to get position ino from.
00118  *
00119  * Revision 1.3  1999/11/08 20:44:43  rahul
00120  * Swat for adding in Product name and CVS comment log variable.
00121  *
00122  */
00123 
00124 #if !defined(READERMGR_HPP)
00125 #define READERMGR_HPP
00126 
00127 #include <xercesc/util/RefStackOf.hpp>
00128 #include <xercesc/util/XMLString.hpp>
00129 #include <xercesc/sax/Locator.hpp>
00130 #include <xercesc/framework/XMLBuffer.hpp>
00131 #include <xercesc/internal/XMLReader.hpp>
00132 
00133 class XMLBuffer;
00134 class XMLEntityDecl;
00135 class XMLEntityHandler;
00136 class XMLDocumentHandler;
00137 class XMLScanner;
00138 
00139 
00140 // ---------------------------------------------------------------------------
00141 //  This class is used by the scanner. The scanner must deal with expansion
00142 //  of entities, some of which are totally different files (external parsed
00143 //  entities.) It does so by pushing readers onto a stack. The top reader is
00144 //  the one it wants to read out of, but that one must be popped when it is
00145 //  empty. To keep that logic from being all over the place, the scanner
00146 //  talks to the reader manager, which handles the stack and popping off
00147 //  used up readers.
00148 // ---------------------------------------------------------------------------
00149 class XMLPARSER_EXPORT ReaderMgr : public Locator
00150 {
00151 public :
00152     // -----------------------------------------------------------------------
00153     //  Class specific types
00154     // -----------------------------------------------------------------------
00155     struct LastExtEntityInfo
00156     {
00157         const   XMLCh*          systemId;
00158         const   XMLCh*          publicId;
00159                 unsigned int    lineNumber;
00160                 unsigned int    colNumber;
00161     };
00162 
00163 
00164     // -----------------------------------------------------------------------
00165     //  Constructors and Destructor
00166     // -----------------------------------------------------------------------
00167     ReaderMgr();
00168     ~ReaderMgr();
00169 
00170 
00171     // -----------------------------------------------------------------------
00172     //  Convenience scanning methods
00173     //
00174     //  This are all convenience methods that work in terms of the core
00175     //  character spooling methods.
00176     // -----------------------------------------------------------------------
00177     bool atEOF() const;
00178     bool getName(XMLBuffer& toFill);
00179     bool getNameToken(XMLBuffer& toFill);
00180     XMLCh getNextChar();
00181     bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
00182     void movePlainContentChars(XMLBuffer &dest);
00183     void getSpaces(XMLBuffer& toFill);
00184     void getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
00185     bool isEmpty() const;
00186     bool lookingAtChar(const XMLCh toCheck);
00187     bool lookingAtSpace();
00188     XMLCh peekNextChar();
00189     bool skipIfQuote(XMLCh& chGotten);
00190     void skipPastChar(const XMLCh toSkip);
00191     bool skipPastSpaces();
00192     void skipToChar(const XMLCh toSkipTo);
00193     bool skippedChar(const XMLCh toSkip);
00194     bool skippedSpace();
00195     bool skippedString(const XMLCh* const toSkip);
00196     void skipQuotedString(const XMLCh quoteCh);
00197     XMLCh skipUntilIn(const XMLCh* const listToSkip);
00198     XMLCh skipUntilInOrWS(const XMLCh* const listToSkip);
00199     bool peekString(const XMLCh* const toPeek);
00200 
00201 
00202     // -----------------------------------------------------------------------
00203     //  Control methods
00204     // -----------------------------------------------------------------------
00205     void cleanStackBackTo(const unsigned int readerNum);
00206     XMLReader* createReader
00207     (
00208         const   InputSource&        src
00209         , const bool                xmlDecl
00210         , const XMLReader::RefFrom  refFrom
00211         , const XMLReader::Types    type
00212         , const XMLReader::Sources  source
00213         , const XMLCh* const       diskCachePath = 0
00214     );
00215     XMLReader* createReader
00216     (
00217         const   XMLCh* const        sysId
00218         , const XMLCh* const        pubId
00219         , const bool                xmlDecl
00220         , const XMLReader::RefFrom  refFrom
00221         , const XMLReader::Types    type
00222         , const XMLReader::Sources  source
00223         ,       InputSource*&       srcToFill
00224     );
00225     XMLReader* createReader
00226     (
00227         const   XMLCh* const        sysId
00228         , const XMLCh* const        pubId
00229         , const bool                xmlDecl
00230         , const XMLReader::RefFrom  refFrom
00231         , const XMLReader::Types    type
00232         , const XMLReader::Sources  source
00233         ,       InputSource*&       srcToFill
00234         , const bool                urlCaching
00235         , const XMLCh* const       urlCacheDir
00236     );
00237     XMLReader* createIntEntReader
00238     (
00239         const   XMLCh* const        sysId
00240         , const XMLReader::RefFrom  refFrom
00241         , const XMLReader::Types    type
00242         , const XMLCh* const        dataBuf
00243         , const unsigned int        dataLen
00244         , const bool                copyBuf
00245     );
00246     bool isScanningPERefOutOfLiteral() const;
00247     bool pushReader
00248     (
00249                 XMLReader* const        reader
00250         ,       XMLEntityDecl* const    entity
00251     );
00252     void reset();
00253 
00254 
00255     // -----------------------------------------------------------------------
00256     //  Getter methods
00257     // -----------------------------------------------------------------------
00258     const XMLCh* getCurrentEncodingStr() const;
00259     const XMLEntityDecl* getCurrentEntity() const;
00260     XMLEntityDecl* getCurrentEntity();
00261     const XMLReader* getCurrentReader() const;
00262     XMLReader* getCurrentReader();
00263     unsigned int getCurrentReaderNum() const;
00264     unsigned int getReaderDepth() const;
00265     void getLastExtEntityInfo(LastExtEntityInfo& lastInfo) const;
00266     unsigned int getSrcOffset() const;
00267     bool getThrowEOE() const;
00268 
00269 
00270     // -----------------------------------------------------------------------
00271     //  Setter methods
00272     // -----------------------------------------------------------------------
00273     void setEntityHandler(XMLEntityHandler* const newHandler);
00274     void setThrowEOE(const bool newValue);
00275 
00276 
00277     // -----------------------------------------------------------------------
00278     //  Implement the SAX Locator interface
00279     // -----------------------------------------------------------------------
00280     virtual const XMLCh* getPublicId() const;
00281     virtual const XMLCh* getSystemId() const;
00282     virtual int getLineNumber() const;
00283     virtual int getColumnNumber() const;
00284 
00285 
00286 private :
00287     // -----------------------------------------------------------------------
00288     //  Private helper methods
00289     // -----------------------------------------------------------------------
00290     const XMLReader* getLastExtEntity(const XMLEntityDecl*& itsEntity) const;
00291     bool popReader();
00292 
00293 
00294     // -----------------------------------------------------------------------
00295     //  Private data members
00296     //
00297     //  fCurEntity
00298     //      This is the current top of stack entity. We pull it off the stack
00299     //      and store it here for efficiency.
00300     //
00301     //  fCurReader
00302     //      This is the current top of stack reader. We pull it off the
00303     //      stack and store it here for efficiency.
00304     //
00305     //  fEntityHandler
00306     //      This is the installed entity handler. Its installed via the
00307     //      scanner but he passes it on to us since we need it the most, in
00308     //      process of creating external entity readers.
00309     //
00310     //  fEntityStack
00311     //      We need to keep up with which of the pushed readers are pushed
00312     //      entity values that are being spooled. This is done to avoid the
00313     //      problem of recursive definitions. This stack consists of refs to
00314     //      EntityDecl objects for the pushed entities.
00315     //
00316     //  fNextReaderNum
00317     //      This is the reader serial number value. Each new reader that is
00318     //      created from this reader is given a successive number. This lets
00319     //      us catch things like partial markup errors and such.
00320     //
00321     //  fReaderStack
00322     //      This is the stack of reader references. We own all the readers
00323     //      and destroy them when they are used up.
00324     //
00325     //  fThrowEOE
00326     //      This flag controls whether we throw an exception when we hit an
00327     //      end of entity. The scanner doesn't really need to know about ends
00328     //      of entities in the int/ext subsets, so it will turn this flag off
00329     //      until it gets into the content usually.
00330     //
00331     //  fURLEntityCaching
00332     //      This flag indicates if the external entities should be
00333     //      cached or not.
00334     //
00335     //  fURLEntityCacheDir
00336     //      The location where the external entities are cached.
00337     //
00338     // -----------------------------------------------------------------------
00339     XMLEntityDecl*              fCurEntity;
00340     XMLReader*                  fCurReader;
00341     XMLEntityHandler*           fEntityHandler;
00342     RefStackOf<XMLEntityDecl>*  fEntityStack;
00343     unsigned int                fNextReaderNum;
00344     RefStackOf<XMLReader>*      fReaderStack;
00345     bool                        fThrowEOE;
00346     bool                        fURLEntityCaching;
00347     XMLCh*                      fURLEntityCacheDir;
00348 };
00349 
00350 
00351 
00352 // ---------------------------------------------------------------------------
00353 //  ReaderMgr: Inlined methods
00354 //
00355 //  NOTE: We cannot put these in alphabetical and type order as we usually
00356 //  do because some of the compilers we have to support are too stupid to
00357 //  understand out of order inlines!
00358 // ---------------------------------------------------------------------------
00359 inline unsigned int ReaderMgr::getCurrentReaderNum() const
00360 {
00361     return fCurReader->getReaderNum();
00362 }
00363 
00364 inline bool ReaderMgr::getName(XMLBuffer& toFill)
00365 {
00366     toFill.reset();
00367     return fCurReader->getName(toFill, false);
00368 }
00369 
00370 inline bool ReaderMgr::getNameToken(XMLBuffer& toFill)
00371 {
00372     toFill.reset();
00373     return fCurReader->getName(toFill, true);
00374 }
00375 
00376 inline bool ReaderMgr::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
00377 {
00378     return fCurReader->getNextCharIfNot(chNotToGet, chGotten);
00379 }
00380 
00381 inline void ReaderMgr::movePlainContentChars(XMLBuffer &dest)
00382 {
00383     fCurReader->movePlainContentChars(dest);
00384 }
00385 
00386 inline bool ReaderMgr::getThrowEOE() const
00387 {
00388     return fThrowEOE;
00389 }
00390 
00391 inline unsigned int ReaderMgr::getSrcOffset() const
00392 {
00393     return fCurReader->getSrcOffset();
00394 }
00395 
00396 inline bool ReaderMgr::lookingAtChar(const XMLCh chToCheck)
00397 {
00398     return (chToCheck == peekNextChar());
00399 }
00400 
00401 inline bool ReaderMgr::lookingAtSpace()
00402 {
00403     return XMLReader::isWhitespace(peekNextChar());
00404 }
00405 
00406 inline void ReaderMgr::setThrowEOE(const bool newValue)
00407 {
00408     fThrowEOE = newValue;
00409 }
00410 
00411 inline bool ReaderMgr::skippedString(const XMLCh* const toSkip)
00412 {
00413     return fCurReader->skippedString(toSkip);
00414 }
00415 
00416 inline void ReaderMgr::skipToChar(const XMLCh toSkipTo)
00417 {
00418     while (true)
00419     {
00420         // Get chars until we find the one to skip
00421         const XMLCh nextCh = getNextChar();
00422 
00423         // Break out at end of input or the char to skip
00424         if ((nextCh == toSkipTo) || !nextCh)
00425             break;
00426     }
00427 }
00428 
00429 inline void ReaderMgr::skipPastChar(const XMLCh toSkipPast)
00430 {
00431     while (true)
00432     {
00433         // Get chars until we find the one to skip
00434         const XMLCh nextCh = getNextChar();
00435 
00436         if ((nextCh == toSkipPast) || !nextCh)
00437             break;
00438     }
00439 }
00440 
00441 inline bool ReaderMgr::peekString(const XMLCh* const toPeek)
00442 {
00443     return fCurReader->peekString(toPeek);
00444 }
00445 
00446 inline void ReaderMgr::setEntityHandler(XMLEntityHandler* const newHandler)
00447 {
00448     fEntityHandler = newHandler;
00449 }
00450 
00451 
00452 //
00453 //  This is a simple class to temporarily change the 'throw at end of entity'
00454 //  flag of the reader manager. There are some places where we need to
00455 //  turn this on and off on a scoped basis.
00456 //
00457 class XMLPARSER_EXPORT ThrowEOEJanitor
00458 {
00459 public :
00460     // -----------------------------------------------------------------------
00461     //  Constructors and destructor
00462     // -----------------------------------------------------------------------
00463     ThrowEOEJanitor(ReaderMgr* mgrTarget, const bool newValue) :
00464 
00465         fOld(mgrTarget->getThrowEOE())
00466         , fMgr(mgrTarget)
00467     {
00468         mgrTarget->setThrowEOE(newValue);
00469     }
00470 
00471     ~ThrowEOEJanitor()
00472     {
00473         fMgr->setThrowEOE(fOld);
00474     };
00475 
00476 private :
00477     // -----------------------------------------------------------------------
00478     //  Private data members
00479     //
00480     //  fOld
00481     //      The previous value of the flag, which we replaced during ctor,
00482     //      and will replace during dtor.
00483     //
00484     //  fMgr
00485     //      A pointer to the reader manager we are going to set/reset the
00486     //      flag on.
00487     // -----------------------------------------------------------------------
00488     bool        fOld;
00489     ReaderMgr*  fMgr;
00490 };
00491 
00492 #endif

Generated on Tue Nov 19 09:36:32 2002 by doxygen1.3-rc1