00001 /* 00002 * The Apache Software License, Version 1.1 00003 * 00004 * Copyright (c) 1999-2000 The Apache Software Foundation. All rights 00005 * reserved. 00006 * 00007 * Redistribution and use in source and binary forms, with or without 00008 * modification, are permitted provided that the following conditions 00009 * are met: 00010 * 00011 * 1. Redistributions of source code must retain the above copyright 00012 * notice, this list of conditions and the following disclaimer. 00013 * 00014 * 2. Redistributions in binary form must reproduce the above copyright 00015 * notice, this list of conditions and the following disclaimer in 00016 * the documentation and/or other materials provided with the 00017 * distribution. 00018 * 00019 * 3. The end-user documentation included with the redistribution, 00020 * if any, must include the following acknowledgment: 00021 * "This product includes software developed by the 00022 * Apache Software Foundation (http://www.apache.org/)." 00023 * Alternately, this acknowledgment may appear in the software itself, 00024 * if and wherever such third-party acknowledgments normally appear. 00025 * 00026 * 4. The names "Xerces" and "Apache Software Foundation" must 00027 * not be used to endorse or promote products derived from this 00028 * software without prior written permission. For written 00029 * permission, please contact apache\@apache.org. 00030 * 00031 * 5. Products derived from this software may not be called "Apache", 00032 * nor may "Apache" appear in their name, without prior written 00033 * permission of the Apache Software Foundation. 00034 * 00035 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 00036 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00037 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00038 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 00039 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00040 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00041 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 00042 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 00043 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 00044 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 00045 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00046 * SUCH DAMAGE. 00047 * ==================================================================== 00048 * 00049 * This software consists of voluntary contributions made by many 00050 * individuals on behalf of the Apache Software Foundation, and was 00051 * originally based on software copyright (c) 1999, International 00052 * Business Machines, Inc., http://www.ibm.com . For more information 00053 * on the Apache Software Foundation, please see 00054 * <http://www.apache.org/>. 00055 */ 00056 00057 /* 00058 * $Log: ReaderMgr.hpp,v $ 00059 * Revision 1.1 2002/05/11 20:18:20 bhavani 00060 * CR#CR062582# adding xercesc 1.7 file 00061 * 00062 * Revision 1.1.1.1 2002/02/01 22:21:58 peiyongz 00063 * sane_include 00064 * 00065 * Revision 1.13 2001/07/12 18:50:08 tng 00066 * Some performance modification regarding standalone check and xml decl check. 00067 * 00068 * Revision 1.12 2000/09/09 00:18:18 andyh 00069 * Reordered member variables in ThrowEOEJanitor. Patch submitted 00070 * by Kirk Wylie. 00071 * 00072 * Revision 1.11 2000/07/08 00:17:13 andyh 00073 * Cleanup of yesterday's speedup changes. Merged new bit into the 00074 * scanner character properties table. 00075 * 00076 * Revision 1.10 2000/07/07 01:08:44 andyh 00077 * Parser speed up in scan of XML content. 00078 * 00079 * Revision 1.9 2000/03/02 19:54:29 roddey 00080 * This checkin includes many changes done while waiting for the 00081 * 1.1.0 code to be finished. I can't list them all here, but a list is 00082 * available elsewhere. 00083 * 00084 * Revision 1.8 2000/02/24 20:18:07 abagchi 00085 * Swat for removing Log from API docs 00086 * 00087 * Revision 1.7 2000/02/24 02:12:53 aruna1 00088 * ReaderMgr:;getReaderDepth() added 00089 * 00090 * Revision 1.6 2000/02/06 07:47:53 rahulj 00091 * Year 2K copyright swat. 00092 * 00093 * Revision 1.5 2000/01/25 01:04:21 roddey 00094 * Fixes a bogus error about ]]> in char data. 00095 * 00096 * Revision 1.4 2000/01/24 20:40:43 roddey 00097 * Exposed the APIs to get to the byte offset in the source XML buffer. This stuff 00098 * is not tested yet, but I wanted to get the API changes in now so that the API 00099 * can be stablized. 00100 * 00101 * Revision 1.3 2000/01/12 00:15:04 roddey 00102 * Changes to deal with multiply nested, relative pathed, entities and to deal 00103 * with the new URL class changes. 00104 * 00105 * Revision 1.2 1999/12/15 19:48:03 roddey 00106 * Changed to use new split of transcoder interfaces into XML transcoders and 00107 * LCP transcoders, and implementation of intrinsic transcoders as pluggable 00108 * transcoders, and addition of Latin1 intrinsic support. 00109 * 00110 * Revision 1.1.1.1 1999/11/09 01:08:13 twl 00111 * Initial checkin 00112 * 00113 * Revision 1.4 1999/11/08 20:56:54 droddey 00114 * If the main xml entity does not exist, we need to get the error handling for that 00115 * inside the main XMLScanner::scanDocument() try block so that it gets reported 00116 * in the normal way. We have to add a little extra safety code because, when this 00117 * happens, there is no reader on the reader stack to get position ino from. 00118 * 00119 * Revision 1.3 1999/11/08 20:44:43 rahul 00120 * Swat for adding in Product name and CVS comment log variable. 00121 * 00122 */ 00123 00124 #if !defined(READERMGR_HPP) 00125 #define READERMGR_HPP 00126 00127 #include <xercesc/util/RefStackOf.hpp> 00128 #include <xercesc/util/XMLString.hpp> 00129 #include <xercesc/sax/Locator.hpp> 00130 #include <xercesc/framework/XMLBuffer.hpp> 00131 #include <xercesc/internal/XMLReader.hpp> 00132 00133 class XMLBuffer; 00134 class XMLEntityDecl; 00135 class XMLEntityHandler; 00136 class XMLDocumentHandler; 00137 class XMLScanner; 00138 00139 00140 // --------------------------------------------------------------------------- 00141 // This class is used by the scanner. The scanner must deal with expansion 00142 // of entities, some of which are totally different files (external parsed 00143 // entities.) It does so by pushing readers onto a stack. The top reader is 00144 // the one it wants to read out of, but that one must be popped when it is 00145 // empty. To keep that logic from being all over the place, the scanner 00146 // talks to the reader manager, which handles the stack and popping off 00147 // used up readers. 00148 // --------------------------------------------------------------------------- 00149 class XMLPARSER_EXPORT ReaderMgr : public Locator 00150 { 00151 public : 00152 // ----------------------------------------------------------------------- 00153 // Class specific types 00154 // ----------------------------------------------------------------------- 00155 struct LastExtEntityInfo 00156 { 00157 const XMLCh* systemId; 00158 const XMLCh* publicId; 00159 unsigned int lineNumber; 00160 unsigned int colNumber; 00161 }; 00162 00163 00164 // ----------------------------------------------------------------------- 00165 // Constructors and Destructor 00166 // ----------------------------------------------------------------------- 00167 ReaderMgr(); 00168 ~ReaderMgr(); 00169 00170 00171 // ----------------------------------------------------------------------- 00172 // Convenience scanning methods 00173 // 00174 // This are all convenience methods that work in terms of the core 00175 // character spooling methods. 00176 // ----------------------------------------------------------------------- 00177 bool atEOF() const; 00178 bool getName(XMLBuffer& toFill); 00179 bool getNameToken(XMLBuffer& toFill); 00180 XMLCh getNextChar(); 00181 bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten); 00182 void movePlainContentChars(XMLBuffer &dest); 00183 void getSpaces(XMLBuffer& toFill); 00184 void getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck); 00185 bool isEmpty() const; 00186 bool lookingAtChar(const XMLCh toCheck); 00187 bool lookingAtSpace(); 00188 XMLCh peekNextChar(); 00189 bool skipIfQuote(XMLCh& chGotten); 00190 void skipPastChar(const XMLCh toSkip); 00191 bool skipPastSpaces(); 00192 void skipToChar(const XMLCh toSkipTo); 00193 bool skippedChar(const XMLCh toSkip); 00194 bool skippedSpace(); 00195 bool skippedString(const XMLCh* const toSkip); 00196 void skipQuotedString(const XMLCh quoteCh); 00197 XMLCh skipUntilIn(const XMLCh* const listToSkip); 00198 XMLCh skipUntilInOrWS(const XMLCh* const listToSkip); 00199 bool peekString(const XMLCh* const toPeek); 00200 00201 00202 // ----------------------------------------------------------------------- 00203 // Control methods 00204 // ----------------------------------------------------------------------- 00205 void cleanStackBackTo(const unsigned int readerNum); 00206 XMLReader* createReader 00207 ( 00208 const InputSource& src 00209 , const bool xmlDecl 00210 , const XMLReader::RefFrom refFrom 00211 , const XMLReader::Types type 00212 , const XMLReader::Sources source 00213 , const XMLCh* const diskCachePath = 0 00214 ); 00215 XMLReader* createReader 00216 ( 00217 const XMLCh* const sysId 00218 , const XMLCh* const pubId 00219 , const bool xmlDecl 00220 , const XMLReader::RefFrom refFrom 00221 , const XMLReader::Types type 00222 , const XMLReader::Sources source 00223 , InputSource*& srcToFill 00224 ); 00225 XMLReader* createReader 00226 ( 00227 const XMLCh* const sysId 00228 , const XMLCh* const pubId 00229 , const bool xmlDecl 00230 , const XMLReader::RefFrom refFrom 00231 , const XMLReader::Types type 00232 , const XMLReader::Sources source 00233 , InputSource*& srcToFill 00234 , const bool urlCaching 00235 , const XMLCh* const urlCacheDir 00236 ); 00237 XMLReader* createIntEntReader 00238 ( 00239 const XMLCh* const sysId 00240 , const XMLReader::RefFrom refFrom 00241 , const XMLReader::Types type 00242 , const XMLCh* const dataBuf 00243 , const unsigned int dataLen 00244 , const bool copyBuf 00245 ); 00246 bool isScanningPERefOutOfLiteral() const; 00247 bool pushReader 00248 ( 00249 XMLReader* const reader 00250 , XMLEntityDecl* const entity 00251 ); 00252 void reset(); 00253 00254 00255 // ----------------------------------------------------------------------- 00256 // Getter methods 00257 // ----------------------------------------------------------------------- 00258 const XMLCh* getCurrentEncodingStr() const; 00259 const XMLEntityDecl* getCurrentEntity() const; 00260 XMLEntityDecl* getCurrentEntity(); 00261 const XMLReader* getCurrentReader() const; 00262 XMLReader* getCurrentReader(); 00263 unsigned int getCurrentReaderNum() const; 00264 unsigned int getReaderDepth() const; 00265 void getLastExtEntityInfo(LastExtEntityInfo& lastInfo) const; 00266 unsigned int getSrcOffset() const; 00267 bool getThrowEOE() const; 00268 00269 00270 // ----------------------------------------------------------------------- 00271 // Setter methods 00272 // ----------------------------------------------------------------------- 00273 void setEntityHandler(XMLEntityHandler* const newHandler); 00274 void setThrowEOE(const bool newValue); 00275 00276 00277 // ----------------------------------------------------------------------- 00278 // Implement the SAX Locator interface 00279 // ----------------------------------------------------------------------- 00280 virtual const XMLCh* getPublicId() const; 00281 virtual const XMLCh* getSystemId() const; 00282 virtual int getLineNumber() const; 00283 virtual int getColumnNumber() const; 00284 00285 00286 private : 00287 // ----------------------------------------------------------------------- 00288 // Private helper methods 00289 // ----------------------------------------------------------------------- 00290 const XMLReader* getLastExtEntity(const XMLEntityDecl*& itsEntity) const; 00291 bool popReader(); 00292 00293 00294 // ----------------------------------------------------------------------- 00295 // Private data members 00296 // 00297 // fCurEntity 00298 // This is the current top of stack entity. We pull it off the stack 00299 // and store it here for efficiency. 00300 // 00301 // fCurReader 00302 // This is the current top of stack reader. We pull it off the 00303 // stack and store it here for efficiency. 00304 // 00305 // fEntityHandler 00306 // This is the installed entity handler. Its installed via the 00307 // scanner but he passes it on to us since we need it the most, in 00308 // process of creating external entity readers. 00309 // 00310 // fEntityStack 00311 // We need to keep up with which of the pushed readers are pushed 00312 // entity values that are being spooled. This is done to avoid the 00313 // problem of recursive definitions. This stack consists of refs to 00314 // EntityDecl objects for the pushed entities. 00315 // 00316 // fNextReaderNum 00317 // This is the reader serial number value. Each new reader that is 00318 // created from this reader is given a successive number. This lets 00319 // us catch things like partial markup errors and such. 00320 // 00321 // fReaderStack 00322 // This is the stack of reader references. We own all the readers 00323 // and destroy them when they are used up. 00324 // 00325 // fThrowEOE 00326 // This flag controls whether we throw an exception when we hit an 00327 // end of entity. The scanner doesn't really need to know about ends 00328 // of entities in the int/ext subsets, so it will turn this flag off 00329 // until it gets into the content usually. 00330 // 00331 // fURLEntityCaching 00332 // This flag indicates if the external entities should be 00333 // cached or not. 00334 // 00335 // fURLEntityCacheDir 00336 // The location where the external entities are cached. 00337 // 00338 // ----------------------------------------------------------------------- 00339 XMLEntityDecl* fCurEntity; 00340 XMLReader* fCurReader; 00341 XMLEntityHandler* fEntityHandler; 00342 RefStackOf<XMLEntityDecl>* fEntityStack; 00343 unsigned int fNextReaderNum; 00344 RefStackOf<XMLReader>* fReaderStack; 00345 bool fThrowEOE; 00346 bool fURLEntityCaching; 00347 XMLCh* fURLEntityCacheDir; 00348 }; 00349 00350 00351 00352 // --------------------------------------------------------------------------- 00353 // ReaderMgr: Inlined methods 00354 // 00355 // NOTE: We cannot put these in alphabetical and type order as we usually 00356 // do because some of the compilers we have to support are too stupid to 00357 // understand out of order inlines! 00358 // --------------------------------------------------------------------------- 00359 inline unsigned int ReaderMgr::getCurrentReaderNum() const 00360 { 00361 return fCurReader->getReaderNum(); 00362 } 00363 00364 inline bool ReaderMgr::getName(XMLBuffer& toFill) 00365 { 00366 toFill.reset(); 00367 return fCurReader->getName(toFill, false); 00368 } 00369 00370 inline bool ReaderMgr::getNameToken(XMLBuffer& toFill) 00371 { 00372 toFill.reset(); 00373 return fCurReader->getName(toFill, true); 00374 } 00375 00376 inline bool ReaderMgr::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten) 00377 { 00378 return fCurReader->getNextCharIfNot(chNotToGet, chGotten); 00379 } 00380 00381 inline void ReaderMgr::movePlainContentChars(XMLBuffer &dest) 00382 { 00383 fCurReader->movePlainContentChars(dest); 00384 } 00385 00386 inline bool ReaderMgr::getThrowEOE() const 00387 { 00388 return fThrowEOE; 00389 } 00390 00391 inline unsigned int ReaderMgr::getSrcOffset() const 00392 { 00393 return fCurReader->getSrcOffset(); 00394 } 00395 00396 inline bool ReaderMgr::lookingAtChar(const XMLCh chToCheck) 00397 { 00398 return (chToCheck == peekNextChar()); 00399 } 00400 00401 inline bool ReaderMgr::lookingAtSpace() 00402 { 00403 return XMLReader::isWhitespace(peekNextChar()); 00404 } 00405 00406 inline void ReaderMgr::setThrowEOE(const bool newValue) 00407 { 00408 fThrowEOE = newValue; 00409 } 00410 00411 inline bool ReaderMgr::skippedString(const XMLCh* const toSkip) 00412 { 00413 return fCurReader->skippedString(toSkip); 00414 } 00415 00416 inline void ReaderMgr::skipToChar(const XMLCh toSkipTo) 00417 { 00418 while (true) 00419 { 00420 // Get chars until we find the one to skip 00421 const XMLCh nextCh = getNextChar(); 00422 00423 // Break out at end of input or the char to skip 00424 if ((nextCh == toSkipTo) || !nextCh) 00425 break; 00426 } 00427 } 00428 00429 inline void ReaderMgr::skipPastChar(const XMLCh toSkipPast) 00430 { 00431 while (true) 00432 { 00433 // Get chars until we find the one to skip 00434 const XMLCh nextCh = getNextChar(); 00435 00436 if ((nextCh == toSkipPast) || !nextCh) 00437 break; 00438 } 00439 } 00440 00441 inline bool ReaderMgr::peekString(const XMLCh* const toPeek) 00442 { 00443 return fCurReader->peekString(toPeek); 00444 } 00445 00446 inline void ReaderMgr::setEntityHandler(XMLEntityHandler* const newHandler) 00447 { 00448 fEntityHandler = newHandler; 00449 } 00450 00451 00452 // 00453 // This is a simple class to temporarily change the 'throw at end of entity' 00454 // flag of the reader manager. There are some places where we need to 00455 // turn this on and off on a scoped basis. 00456 // 00457 class XMLPARSER_EXPORT ThrowEOEJanitor 00458 { 00459 public : 00460 // ----------------------------------------------------------------------- 00461 // Constructors and destructor 00462 // ----------------------------------------------------------------------- 00463 ThrowEOEJanitor(ReaderMgr* mgrTarget, const bool newValue) : 00464 00465 fOld(mgrTarget->getThrowEOE()) 00466 , fMgr(mgrTarget) 00467 { 00468 mgrTarget->setThrowEOE(newValue); 00469 } 00470 00471 ~ThrowEOEJanitor() 00472 { 00473 fMgr->setThrowEOE(fOld); 00474 }; 00475 00476 private : 00477 // ----------------------------------------------------------------------- 00478 // Private data members 00479 // 00480 // fOld 00481 // The previous value of the flag, which we replaced during ctor, 00482 // and will replace during dtor. 00483 // 00484 // fMgr 00485 // A pointer to the reader manager we are going to set/reset the 00486 // flag on. 00487 // ----------------------------------------------------------------------- 00488 bool fOld; 00489 ReaderMgr* fMgr; 00490 }; 00491 00492 #endif