Main Page   Class Hierarchy   Compound List   File List   Compound Members  

XMLString.hpp

00001 /*
00002  * The Apache Software License, Version 1.1
00003  *
00004  * Copyright (c) 1999-2001 The Apache Software Foundation.  All rights
00005  * reserved.
00006  *
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  *
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  *
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  *
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  *
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written
00029  *    permission, please contact apache\@apache.org.
00030  *
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  *
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  *
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  * $Log: XMLString.hpp,v $
00059  * Revision 1.1  2002/05/11 21:20:17  bhavani
00060  * CR#CR062582# adding xercesc 1.7 file
00061  *
00062  * Revision 1.2  2002/02/20 18:17:02  tng
00063  * [Bug 5977] Warnings on generating apiDocs.
00064  *
00065  * Revision 1.1.1.1  2002/02/01 22:22:16  peiyongz
00066  * sane_include
00067  *
00068  * Revision 1.26  2001/08/10 16:23:06  peiyongz
00069  * isHex(), isAlphaNum(), isAllWhiteSpace() and patternMatch() Added
00070  *
00071  * Revision 1.25  2001/07/06 20:27:57  peiyongz
00072  * isValidaQName()
00073  *
00074  * Revision 1.24  2001/07/04 14:38:20  peiyongz
00075  * IDDatatypeValidator: created
00076  * DatatypeValidatorFactory: IDDTV enabled
00077  * XMLString:isValidName(): to validate Name (XML [4][5])
00078  *
00079  * Revision 1.23  2001/06/13 14:07:55  peiyongz
00080  * isValidaEncName() to validate an encoding name (EncName)
00081  *
00082  * Revision 1.22  2001/05/23 15:44:51  tng
00083  * Schema: NormalizedString fix.  By Pei Yong Zhang.
00084  *
00085  * Revision 1.21  2001/05/11 13:26:31  tng
00086  * Copyright update.
00087  *
00088  * Revision 1.20  2001/05/09 18:43:30  tng
00089  * Add StringDatatypeValidator and BooleanDatatypeValidator.  By Pei Yong Zhang.
00090  *
00091  * Revision 1.19  2001/05/03 20:34:35  tng
00092  * Schema: SchemaValidator update
00093  *
00094  * Revision 1.18  2001/05/03 19:17:35  knoaman
00095  * TraverseSchema Part II.
00096  *
00097  * Revision 1.17  2001/03/21 21:56:13  tng
00098  * Schema: Add Schema Grammar, Schema Validator, and split the DTDValidator into DTDValidator, DTDScanner, and DTDGrammar.
00099  *
00100  * Revision 1.16  2001/03/02 20:52:46  knoaman
00101  * Schema: Regular expression - misc. updates for error messages,
00102  * and additions of new functions to XMLString class.
00103  *
00104  * Revision 1.15  2001/01/15 21:26:34  tng
00105  * Performance Patches by David Bertoni.
00106  *
00107  * Details: (see xerces-c-dev mailing Jan 14)
00108  * XMLRecognizer.cpp: the internal encoding string XMLUni::fgXMLChEncodingString
00109  * was going through this function numerous times.  As a result, the top hot-spot
00110  * for the parse was _wcsicmp().  The real problem is that the Microsofts wide string
00111  * functions are unbelievably slow.  For things like encodings, it might be
00112  * better to use a special comparison function that only considers a-z and
00113  * A-Z as characters with case.  This works since the character set for
00114  * encodings is limit to printable ASCII characters.
00115  *
00116  *  XMLScanner2.cpp: This also has some case-sensitive vs. insensitive compares.
00117  * They are also much faster.  The other tweak is to only make a copy of an attribute
00118  * string if it needs to be split.  And then, the strategy is to try to use a
00119  * stack-based buffer, rather than a dynamically-allocated one.
00120  *
00121  * SAX2XMLReaderImpl.cpp: Again, more case-sensitive vs. insensitive comparisons.
00122  *
00123  * KVStringPair.cpp & hpp: By storing the size of the allocation, the storage can
00124  * likely be re-used many times, cutting down on dynamic memory allocations.
00125  *
00126  * XMLString.hpp: a more efficient implementation of stringLen().
00127  *
00128  * DTDValidator.cpp: another case of using a stack-based buffer when possible
00129  *
00130  * These patches made a big difference in parse time in some of our test
00131  * files, especially the ones are very attribute-heavy.
00132  *
00133  * Revision 1.14  2000/10/13 22:47:57  andyh
00134  * Fix bug (failure to null-terminate result) in XMLString::trim().
00135  * Patch contributed by Nadav Aharoni
00136  *
00137  * Revision 1.13  2000/04/12 18:42:15  roddey
00138  * Improved docs in terms of what 'max chars' means in the method
00139  * parameters.
00140  *
00141  * Revision 1.12  2000/04/06 19:42:51  rahulj
00142  * Clarified how big the target buffer should be in the API
00143  * documentation.
00144  *
00145  * Revision 1.11  2000/03/23 01:02:38  roddey
00146  * Updates to the XMLURL class to correct a lot of parsing problems
00147  * and to add support for the port number. Updated the URL tests
00148  * to test some of this new stuff.
00149  *
00150  * Revision 1.10  2000/03/20 23:00:46  rahulj
00151  * Moved the inline definition of stringLen before the first
00152  * use. This satisfied the HP CC compiler.
00153  *
00154  * Revision 1.9  2000/03/02 19:54:49  roddey
00155  * This checkin includes many changes done while waiting for the
00156  * 1.1.0 code to be finished. I can't list them all here, but a list is
00157  * available elsewhere.
00158  *
00159  * Revision 1.8  2000/02/24 20:05:26  abagchi
00160  * Swat for removing Log from API docs
00161  *
00162  * Revision 1.7  2000/02/16 18:51:52  roddey
00163  * Fixed some facts in the docs and reformatted the docs to stay within
00164  * a reasonable line width.
00165  *
00166  * Revision 1.6  2000/02/16 17:07:07  abagchi
00167  * Added API docs
00168  *
00169  * Revision 1.5  2000/02/06 07:48:06  rahulj
00170  * Year 2K copyright swat.
00171  *
00172  * Revision 1.4  2000/01/12 00:16:23  roddey
00173  * Changes to deal with multiply nested, relative pathed, entities and to deal
00174  * with the new URL class changes.
00175  *
00176  * Revision 1.3  1999/12/18 00:18:10  roddey
00177  * More changes to support the new, completely orthagonal support for
00178  * intrinsic encodings.
00179  *
00180  * Revision 1.2  1999/12/15 19:41:28  roddey
00181  * Support for the new transcoder system, where even intrinsic encodings are
00182  * done via the same transcoder abstraction as external ones.
00183  *
00184  * Revision 1.1.1.1  1999/11/09 01:05:52  twl
00185  * Initial checkin
00186  *
00187  * Revision 1.2  1999/11/08 20:45:21  rahul
00188  * Swat for adding in Product name and CVS comment log variable.
00189  *
00190  */
00191 
00192 #if !defined(XMLSTRING_HPP)
00193 #define XMLSTRING_HPP
00194 
00195 #include <xercesc/util/XercesDefs.hpp>
00196 #include <xercesc/util/RefVectorOf.hpp>
00197 
00198 class XMLLCPTranscoder;
00199 
00211 class XMLUTIL_EXPORT XMLString
00212 {
00213 public:
00214     /* Static methods for native character mode string manipulation */
00217 
00228     static void binToText
00229     (
00230         const   unsigned int    toFormat
00231         ,       char* const     toFill
00232         , const unsigned int    maxChars
00233         , const unsigned int    radix
00234     );
00235 
00246     static void binToText
00247     (
00248         const   unsigned int    toFormat
00249         ,       XMLCh* const    toFill
00250         , const unsigned int    maxChars
00251         , const unsigned int    radix
00252     );
00253 
00264     static void binToText
00265     (
00266         const   unsigned long   toFormat
00267         ,       char* const     toFill
00268         , const unsigned int    maxChars
00269         , const unsigned int    radix
00270     );
00271 
00282     static void binToText
00283     (
00284         const   unsigned long   toFormat
00285         ,       XMLCh* const    toFill
00286         , const unsigned int    maxChars
00287         , const unsigned int    radix
00288     );
00289 
00300     static void binToText
00301     (
00302         const   long            toFormat
00303         ,       char* const     toFill
00304         , const unsigned int    maxChars
00305         , const unsigned int    radix
00306     );
00307 
00318     static void binToText
00319     (
00320         const   long            toFormat
00321         ,       XMLCh* const    toFill
00322         , const unsigned int    maxChars
00323         , const unsigned int    radix
00324     );
00325 
00336     static void binToText
00337     (
00338         const   int             toFormat
00339         ,       char* const     toFill
00340         , const unsigned int    maxChars
00341         , const unsigned int    radix
00342     );
00343 
00354     static void binToText
00355     (
00356         const   int             toFormat
00357         ,       XMLCh* const    toFill
00358         , const unsigned int    maxChars
00359         , const unsigned int    radix
00360     );
00361 
00372     static bool textToBin
00373     (
00374         const   XMLCh* const    toConvert
00375         ,       unsigned int&   toFill
00376     );
00377 
00390     static int parseInt
00391     (
00392         const   XMLCh* const    toConvert
00393     );
00394 
00396 
00413     static void catString
00414     (
00415                 char* const     target
00416         , const char* const     src
00417     );
00418 
00431     static void catString
00432     (
00433                 XMLCh* const    target
00434         , const XMLCh* const    src
00435     );
00437 
00451     static int compareIString
00452     (
00453         const   char* const     str1
00454         , const char* const     str2
00455     );
00456 
00467     static int compareIString
00468     (
00469         const   XMLCh* const    str1
00470         , const XMLCh* const    str2
00471     );
00472 
00473 
00487     static int compareNString
00488     (
00489         const   char* const     str1
00490         , const char* const     str2
00491         , const unsigned int    count
00492     );
00493 
00507     static int compareNString
00508     (
00509         const   XMLCh* const    str1
00510         , const XMLCh* const    str2
00511         , const unsigned int    count
00512     );
00513 
00514 
00528     static int compareNIString
00529     (
00530         const   char* const     str1
00531         , const char* const     str2
00532         , const unsigned int    count
00533     );
00534 
00549     static int compareNIString
00550     (
00551         const   XMLCh* const    str1
00552         , const XMLCh* const    str2
00553         , const unsigned int    count
00554     );
00555 
00568     static int compareString
00569     (
00570         const   char* const     str1
00571         , const char* const     str2
00572     );
00573 
00585     static int compareString
00586     (
00587         const   XMLCh* const    str1
00588         , const XMLCh* const    str2
00589     );
00590 
00617     static bool regionMatches
00618     (
00619         const   XMLCh* const    str1
00620                 , const int                             offset1
00621         , const XMLCh* const    str2
00622                 , const int                             offset2
00623                 , const unsigned int    charCount
00624     );
00625 
00653     static bool regionIMatches
00654     (
00655         const   XMLCh* const    str1
00656                 , const int                             offset1
00657         , const XMLCh* const    str2
00658                 , const int                             offset2
00659                 , const unsigned int    charCount
00660     );
00662 
00675     static void copyString
00676     (
00677                 char* const     target
00678         , const char* const     src
00679     );
00680 
00691     static void copyString
00692     (
00693                 XMLCh* const    target
00694         , const XMLCh* const    src
00695     );
00696 
00709     static bool copyNString
00710     (
00711                 XMLCh* const    target
00712         , const XMLCh* const    src
00713         , const unsigned int    maxChars
00714     );
00716 
00725     static unsigned int hash
00726     (
00727         const   char* const     toHash
00728         , const unsigned int    hashModulus
00729     );
00730 
00737     static unsigned int hash
00738     (
00739         const   XMLCh* const    toHash
00740         , const unsigned int    hashModulus
00741     );
00742 
00752     static unsigned int hashN
00753     (
00754         const   XMLCh* const    toHash
00755         , const unsigned int    numChars
00756         , const unsigned int    hashModulus
00757     );
00758 
00760 
00771     static int indexOf(const char* const toSearch, const char ch);
00772 
00781     static int indexOf(const XMLCh* const toSearch, const XMLCh ch);
00782 
00793     static int indexOf
00794     (
00795         const   char* const     toSearch
00796         , const char            chToFind
00797         , const unsigned int    fromIndex
00798     );
00799 
00810     static int indexOf
00811     (
00812         const   XMLCh* const    toSearch
00813         , const XMLCh           chToFind
00814         , const unsigned int    fromIndex
00815     );
00816 
00825     static int lastIndexOf(const char* const toSearch, const char ch);
00826 
00835     static int lastIndexOf(const XMLCh* const toSearch, const XMLCh ch);
00836 
00847     static int lastIndexOf
00848     (
00849         const   char* const     toSearch
00850         , const char            chToFind
00851         , const unsigned int    fromIndex
00852     );
00853 
00864     static int lastIndexOf
00865     (
00866         const   XMLCh* const    toSearch
00867         , const XMLCh           ch
00868         , const unsigned int    fromIndex
00869     );
00871 
00879     static void moveChars
00880     (
00881                 XMLCh* const    targetStr
00882         , const XMLCh* const    srcStr
00883         , const unsigned int    count
00884     );
00885 
00887 
00898     static void subString
00899     (
00900                 char* const    targetStr
00901         , const char* const    srcStr
00902         , const int            startIndex
00903         , const int            endIndex
00904     );
00905 
00914     static void subString
00915     (
00916                 XMLCh* const    targetStr
00917         , const XMLCh* const    srcStr
00918         , const int             startIndex
00919         , const int             endIndex
00920     );
00921 
00923 
00930     static char* replicate(const char* const toRep);
00931 
00936     static XMLCh* replicate(const XMLCh* const toRep);
00937 
00939 
00948     static bool startsWith
00949     (
00950         const   char* const     toTest
00951         , const char* const     prefix
00952     );
00953 
00960     static bool startsWith
00961     (
00962         const   XMLCh* const    toTest
00963         , const XMLCh* const    prefix
00964     );
00965 
00974     static bool startsWithI
00975     (
00976         const   char* const     toTest
00977         , const char* const     prefix
00978     );
00979 
00989     static bool startsWithI
00990     (
00991         const   XMLCh* const    toTest
00992         , const XMLCh* const    prefix
00993     );
00994 
01001     static bool endsWith
01002     (
01003         const   XMLCh* const    toTest
01004         , const XMLCh* const    suffix
01005     );
01006 
01007 
01014     static const XMLCh* findAny
01015     (
01016         const   XMLCh* const    toSearch
01017         , const XMLCh* const    searchList
01018     );
01019 
01026     static XMLCh* findAny
01027     (
01028                 XMLCh* const    toSearch
01029         , const XMLCh* const    searchList
01030     );
01031 
01038     static int patternMatch
01039     (
01040                 XMLCh* const    toSearch
01041         , const XMLCh* const    pattern
01042     );
01043 
01048     static unsigned int stringLen(const char* const src);
01049 
01054     static unsigned int stringLen(const XMLCh* const src);
01055 
01061     static bool isValidNCName(const XMLCh* const name);
01062 
01068     static bool isValidName(const XMLCh* const name);
01069 
01075     static bool isValidEncName(const XMLCh* const name);
01076 
01082     static bool isValidQName(const XMLCh* const name);
01083 
01090     static bool isAlpha(XMLCh const theChar);
01091 
01097     static bool isDigit(XMLCh const theChar);
01098 
01104     static bool isAlphaNum(XMLCh const theChar);
01105 
01111     static bool isHex(XMLCh const theChar);
01112 
01118     static bool isAllWhiteSpace(const XMLCh* const toCheck);
01119 
01121 
01124 
01130     static void cut
01131     (
01132                 XMLCh* const    toCutFrom
01133         , const unsigned int    count
01134     );
01135 
01144     static char* transcode
01145     (
01146         const   XMLCh* const    toTranscode
01147     );
01148 
01165     static bool transcode
01166     (
01167         const   XMLCh* const    toTranscode
01168         ,       char* const     toFill
01169         , const unsigned int    maxChars
01170     );
01171 
01180     static XMLCh* transcode
01181     (
01182         const   char* const     toTranscode
01183     );
01184 
01196     static bool transcode
01197     (
01198         const   char* const     toTranscode
01199         ,       XMLCh* const    toFill
01200         , const unsigned int    maxChars
01201     );
01202 
01208     static void trim(char* const toTrim);
01209 
01215     static void trim(XMLCh* const toTrim);
01216 
01223     static RefVectorOf<XMLCh>* tokenizeString(const XMLCh* const tokenizeSrc);
01224 
01230     static bool isInList(const XMLCh* const toFind, const XMLCh* const enumList);
01231 
01233 
01244     static XMLCh* makeUName
01245     (
01246         const   XMLCh* const    pszURI
01247         , const XMLCh* const    pszName
01248     );
01249 
01265     static unsigned int replaceTokens
01266     (
01267                 XMLCh* const    errText
01268         , const unsigned int    maxChars
01269         , const XMLCh* const    text1
01270         , const XMLCh* const    text2
01271         , const XMLCh* const    text3
01272         , const XMLCh* const    text4
01273     );
01274 
01279     static void upperCase(XMLCh* const toUpperCase);
01280 
01285     static void lowerCase(XMLCh* const toLowerCase);
01286 
01290     static bool isWSReplaced(const XMLCh* const toCheck);
01291 
01295     static bool isWSCollapsed(const XMLCh* const toCheck);
01296 
01301     static void replaceWS(XMLCh* const toConvert);
01302 
01307     static void collapseWS(XMLCh* const toConvert);
01309 
01310 
01311 private :
01312 
01316     XMLString();
01318     ~XMLString();
01320 
01321 
01325     static void initString(XMLLCPTranscoder* const defToUse);
01326     static void termString();
01328 
01333         static bool validateRegion(const XMLCh* const str1, const int offset1,
01334                                                 const XMLCh* const str2, const int offset2,
01335                                                 const unsigned int charCount);
01336 
01337     friend class XMLPlatformUtils;
01338 };
01339 
01340 
01341 // ---------------------------------------------------------------------------
01342 //  Inline some methods that are either just passthroughs to other string
01343 //  methods, or which are key for performance.
01344 // ---------------------------------------------------------------------------
01345 inline void XMLString::moveChars(       XMLCh* const    targetStr
01346                                 , const XMLCh* const    srcStr
01347                                 , const unsigned int    count)
01348 {
01349     XMLCh* outPtr = targetStr;
01350     const XMLCh* inPtr = srcStr;
01351     for (unsigned int index = 0; index < count; index++)
01352         *outPtr++ = *inPtr++;
01353 }
01354 
01355 inline unsigned int XMLString::stringLen(const XMLCh* const src)
01356 {
01357     if (src == 0 || *src == 0)
01358     {
01359         return 0;
01360    }
01361     else
01362    {
01363         const XMLCh* pszTmp = src + 1;
01364 
01365         while (*pszTmp)
01366             ++pszTmp;
01367 
01368         return (unsigned int)(pszTmp - src);
01369     }
01370 }
01371 
01372 inline bool XMLString::startsWith(  const   XMLCh* const    toTest
01373                                     , const XMLCh* const    prefix)
01374 {
01375     return (compareNString(toTest, prefix, stringLen(prefix)) == 0);
01376 }
01377 
01378 inline bool XMLString::startsWithI( const   XMLCh* const    toTest
01379                                     , const XMLCh* const    prefix)
01380 {
01381     return (compareNIString(toTest, prefix, stringLen(prefix)) == 0);
01382 }
01383 
01384 inline bool XMLString::endsWith(const XMLCh* const toTest,
01385                                 const XMLCh* const suffix)
01386 {
01387 
01388     unsigned int suffixLen = XMLString::stringLen(suffix);
01389 
01390     return regionMatches(toTest, XMLString::stringLen(toTest) - suffixLen,
01391                          suffix, 0, suffixLen);
01392 }
01393 
01394 inline XMLCh* XMLString::replicate(const XMLCh* const toRep)
01395 {
01396     // If a null string, return a null string!
01397     XMLCh* ret = 0;
01398     if (toRep)
01399     {
01400         const unsigned int len = stringLen(toRep);
01401         ret = new XMLCh[len + 1];
01402         XMLCh* outPtr = ret;
01403         const XMLCh* inPtr = toRep;
01404         for (unsigned int index = 0; index <= len; index++)
01405             *outPtr++ = *inPtr++;
01406     }
01407     return ret;
01408 }
01409 
01410 inline bool XMLString::validateRegion(const XMLCh* const str1,
01411                                                                           const int offset1,
01412                                                                           const XMLCh* const str2,
01413                                                                           const int offset2,
01414                                                                           const unsigned int charCount)
01415 {
01416 
01417         if (offset1 < 0 || offset2 < 0 ||
01418                 (offset1 + charCount) > XMLString::stringLen(str1) ||
01419                 (offset2 + charCount) > XMLString::stringLen(str2) )
01420                 return false;
01421 
01422         return true;
01423 }
01424 
01425 #endif

Generated on Tue Nov 19 09:36:36 2002 by doxygen1.3-rc1