http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Release Info

Installation
Download
Build

FAQs
Samples
API Docs

DOM C++ Binding
Programming
Migration Guide

Feedback
Bug-Reporting
PDF Document

CVS Repository
Mail Archive

API Docs for SAX and DOM
 

Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

XMLChar.hpp

Go to the documentation of this file.
00001 /*
00002  * The Apache Software License, Version 1.1
00003  *
00004  * Copyright (c) 2002 The Apache Software Foundation.  All rights
00005  * reserved.
00006  *
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  *
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  *
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  *
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  *
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written
00029  *    permission, please contact apache\@apache.org.
00030  *
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  *
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  *
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  * $Log: XMLChar.hpp,v $
00059  * Revision 1.3  2004/01/29 11:48:47  cargilld
00060  * Code cleanup changes to get rid of various compiler diagnostic messages.
00061  *
00062  * Revision 1.2  2003/08/14 02:57:27  knoaman
00063  * Code refactoring to improve performance of validation.
00064  *
00065  * Revision 1.1  2002/12/20 22:10:21  tng
00066  * XML 1.1
00067  *
00068  */
00069 
00070 #if !defined(XMLCHAR_HPP)
00071 #define XMLCHAR_HPP
00072 
00073 #include <xercesc/util/XMLUniDefs.hpp>
00074 
00075 XERCES_CPP_NAMESPACE_BEGIN
00076 
00077 // ---------------------------------------------------------------------------
00078 //  This file defines Char and utility that conforms to XML 1.0 and XML 1.1
00079 // ---------------------------------------------------------------------------
00080 // Masks for the fgCharCharsTable1_0 array
00081 const XMLByte   gLetterCharMask             = 0x1;
00082 const XMLByte   gFirstNameCharMask          = 0x2;
00083 const XMLByte   gNameCharMask               = 0x4;
00084 const XMLByte   gPlainContentCharMask       = 0x8;
00085 const XMLByte   gSpecialStartTagCharMask    = 0x10;
00086 const XMLByte   gControlCharMask            = 0x20;
00087 const XMLByte   gXMLCharMask                = 0x40;
00088 const XMLByte   gWhitespaceCharMask         = 0x80;
00089 
00090 // ---------------------------------------------------------------------------
00091 //  This class is for XML 1.0
00092 // ---------------------------------------------------------------------------
00093 class  XMLChar1_0
00094 {
00095 public:
00096     // -----------------------------------------------------------------------
00097     //  Public, static methods, check the string
00098     // -----------------------------------------------------------------------
00099     static bool isAllSpaces
00100     (
00101         const   XMLCh* const    toCheck
00102         , const unsigned int    count
00103     );
00104 
00105     static bool containsWhiteSpace
00106     (
00107         const   XMLCh* const    toCheck
00108         , const unsigned int    count
00109     );
00110 
00111     static bool isValidName
00112     (
00113         const   XMLCh* const    toCheck
00114         , const unsigned int    count
00115     );
00116 
00117     static bool isValidNCName
00118     (
00119         const   XMLCh* const    toCheck
00120         , const unsigned int    count
00121     );
00122 
00123     static bool isValidQName
00124     (
00125         const   XMLCh* const    toCheck
00126         , const unsigned int    count
00127     );
00128 
00129     // -----------------------------------------------------------------------
00130     //  Public, static methods, check the XMLCh
00131     //  surrogate pair is assumed if second parameter is not null
00132     // -----------------------------------------------------------------------
00133     static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00134     static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00135     static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00136     static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00137     static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00138     static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00139     static bool isWhitespace(const XMLCh toCheck);
00140     static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2);
00141     static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00142 
00143     static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00144 
00145     // -----------------------------------------------------------------------
00146     //  Special Non-conformant Public, static methods
00147     // -----------------------------------------------------------------------
00151     static bool isNELRecognized();
00152 
00156     static void enableNELWS();
00157 
00158 private:
00159     // -----------------------------------------------------------------------
00160     //  Unimplemented constructors and operators
00161     // -----------------------------------------------------------------------
00162     XMLChar1_0();
00163 
00164     // -----------------------------------------------------------------------
00165     //  Static data members
00166     //
00167     //  fgCharCharsTable1_0
00168     //      The character characteristics table. Bits in each byte, represent
00169     //      the characteristics of each character. It is generated via some
00170     //      code and then hard coded into the cpp file for speed.
00171     //
00172     //  fNEL
00173     //      Flag to respresents whether NEL and LSEP newline recognition is enabled
00174     //      or disabled
00175     // -----------------------------------------------------------------------
00176     static XMLByte  fgCharCharsTable1_0[0x10000];
00177     static bool     enableNEL;
00178 
00179     friend class XMLReader;
00180 };
00181 
00182 
00183 // ---------------------------------------------------------------------------
00184 //  XMLReader: Public, static methods
00185 // ---------------------------------------------------------------------------
00186 inline bool XMLChar1_0::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
00187 {
00188     if (!toCheck2)
00189         return ((fgCharCharsTable1_0[toCheck] & gLetterCharMask) != 0);
00190     return false;
00191 }
00192 
00193 inline bool XMLChar1_0::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00194 {
00195     if (!toCheck2)
00196         return ((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0);
00197     return false;
00198 }
00199 
00200 inline bool XMLChar1_0::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00201 {
00202     if (!toCheck2)
00203         return ((fgCharCharsTable1_0[toCheck] & gNameCharMask) != 0);
00204     return false;
00205 }
00206 
00207 inline bool XMLChar1_0::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
00208 {
00209     if (!toCheck2)
00210         return ((fgCharCharsTable1_0[toCheck] & gPlainContentCharMask) != 0);
00211     else {
00212         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00213            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00214                return true;
00215     }
00216     return false;
00217 }
00218 
00219 
00220 inline bool XMLChar1_0::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
00221 {
00222     if (!toCheck2)
00223         return ((fgCharCharsTable1_0[toCheck] & gSpecialStartTagCharMask) != 0);
00224     return false;
00225 }
00226 
00227 inline bool XMLChar1_0::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
00228 {
00229     if (!toCheck2)
00230         return ((fgCharCharsTable1_0[toCheck] & gXMLCharMask) != 0);
00231     else {
00232         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00233            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00234                return true;
00235     }
00236     return false;
00237 }
00238 
00239 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck)
00240 {
00241     return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
00242 }
00243 
00244 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
00245 {
00246     if (!toCheck2)
00247         return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
00248     return false;
00249 }
00250 
00251 inline bool XMLChar1_0::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
00252 {
00253     if (!toCheck2)
00254         return ((fgCharCharsTable1_0[toCheck] & gControlCharMask) != 0);
00255     return false;
00256 }
00257 
00258 inline bool XMLChar1_0::isNELRecognized() {
00259 
00260     return enableNEL;
00261 }
00262 
00263 
00264 // ---------------------------------------------------------------------------
00265 //  This class is for XML 1.1
00266 // ---------------------------------------------------------------------------
00267 class  XMLChar1_1
00268 {
00269 public:
00270     // -----------------------------------------------------------------------
00271     //  Public, static methods, check the string
00272     // -----------------------------------------------------------------------
00273     static bool isAllSpaces
00274     (
00275         const   XMLCh* const    toCheck
00276         , const unsigned int    count
00277     );
00278 
00279     static bool containsWhiteSpace
00280     (
00281         const   XMLCh* const    toCheck
00282         , const unsigned int    count
00283     );
00284 
00285     static bool isValidName
00286     (
00287         const   XMLCh* const    toCheck
00288         , const unsigned int    count
00289     );
00290 
00291     static bool isValidNCName
00292     (
00293         const   XMLCh* const    toCheck
00294         , const unsigned int    count
00295     );
00296 
00297     static bool isValidQName
00298     (
00299         const   XMLCh* const    toCheck
00300         , const unsigned int    count
00301     );
00302 
00303     // -----------------------------------------------------------------------
00304     //  Public, static methods, check the XMLCh
00305     // -----------------------------------------------------------------------
00306     static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00307     static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00308     static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00309     static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00310     static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00311     static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00312     static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00313     static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00314 
00315     static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00316 
00317 private:
00318     // -----------------------------------------------------------------------
00319     //  Unimplemented constructors and operators
00320     // -----------------------------------------------------------------------
00321     XMLChar1_1();
00322 
00323     // -----------------------------------------------------------------------
00324     //  Static data members
00325     //
00326     //  fgCharCharsTable1_1
00327     //      The character characteristics table. Bits in each byte, represent
00328     //      the characteristics of each character. It is generated via some
00329     //      code and then hard coded into the cpp file for speed.
00330     //
00331     // -----------------------------------------------------------------------
00332     static XMLByte  fgCharCharsTable1_1[0x10000];
00333 
00334     friend class XMLReader;
00335 };
00336 
00337 
00338 // ---------------------------------------------------------------------------
00339 //  XMLReader: Public, static methods
00340 // ---------------------------------------------------------------------------
00341 inline bool XMLChar1_1::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
00342 {
00343     if (!toCheck2)
00344         return ((fgCharCharsTable1_1[toCheck] & gLetterCharMask) != 0);
00345     return false;
00346 }
00347 
00348 inline bool XMLChar1_1::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00349 {
00350     if (!toCheck2)
00351         return ((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0);
00352     else {
00353         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00354            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00355                return true;
00356     }
00357     return false;
00358 }
00359 
00360 inline bool XMLChar1_1::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00361 {
00362     if (!toCheck2)
00363         return ((fgCharCharsTable1_1[toCheck] & gNameCharMask) != 0);
00364     else {
00365         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00366            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00367                return true;
00368     }
00369     return false;
00370 }
00371 
00372 inline bool XMLChar1_1::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
00373 {
00374     if (!toCheck2)
00375         return ((fgCharCharsTable1_1[toCheck] & gPlainContentCharMask) != 0);
00376     else {
00377         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00378            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00379                return true;
00380     }
00381     return false;
00382 }
00383 
00384 
00385 inline bool XMLChar1_1::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
00386 {
00387     if (!toCheck2)
00388         return ((fgCharCharsTable1_1[toCheck] & gSpecialStartTagCharMask) != 0);
00389     return false;
00390 }
00391 
00392 inline bool XMLChar1_1::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
00393 {
00394     if (!toCheck2)
00395         return ((fgCharCharsTable1_1[toCheck] & gXMLCharMask) != 0);
00396     else {
00397         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00398            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00399                return true;
00400     }
00401     return false;
00402 }
00403 
00404 inline bool XMLChar1_1::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
00405 {
00406     if (!toCheck2)
00407         return ((fgCharCharsTable1_1[toCheck] & gWhitespaceCharMask) != 0);
00408     return false;
00409 }
00410 
00411 inline bool XMLChar1_1::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
00412 {
00413     if (!toCheck2)
00414         return ((fgCharCharsTable1_1[toCheck] & gControlCharMask) != 0);
00415     return false;
00416 }
00417 
00418 
00419 XERCES_CPP_NAMESPACE_END
00420 
00421 #endif


Copyright © 2004 The Apache Software Foundation. All Rights Reserved.