Main Page   Class Hierarchy   Compound List   File List   Compound Members  

XMLRecognizer.hpp

00001 /*
00002  * The Apache Software License, Version 1.1
00003  * 
00004  * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
00005  * reserved.
00006  * 
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  * 
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer. 
00013  * 
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  * 
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:  
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  * 
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written 
00029  *    permission, please contact apache\@apache.org.
00030  * 
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  * 
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  * 
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  *  $Id: XMLRecognizer.hpp,v 1.1 2002/05/11 20:04:08 bhavani Exp $
00059  */
00060 
00061 #if !defined(XMLRECOGNIZER_HPP)
00062 #define XMLRECOGNIZER_HPP
00063 
00072 class XMLPARSER_EXPORT XMLRecognizer
00073 {
00074 public :
00075     // -----------------------------------------------------------------------
00076     //  Class types
00077     //
00078     //  This enum represents the various encoding families that we have to
00079     //  deal with individually at the scanner level. This does not indicate
00080     //  the exact encoding, just the rough family that would let us scan
00081     //  the XML/TextDecl to find the encoding string.
00082     //
00083     //  The 'L's and 'B's stand for little or big endian. We conditionally
00084     //  create versions that will automatically map to the local UTF-16 and
00085     //  UCS-4 endian modes.
00086     //
00087     //  OtherEncoding means that its some transcoder based encoding, i.e. not
00088     //  one of the ones that we do internally. Its a special case and should
00089     //  never be used directly outside of the reader.
00090     //
00091     //  NOTE: Keep this in sync with the name map array in the Cpp file!!
00092     // -----------------------------------------------------------------------
00093     enum Encodings
00094     {
00095         EBCDIC          = 0
00096         , UCS_4B        = 1
00097         , UCS_4L        = 2
00098         , US_ASCII      = 3
00099         , UTF_8         = 4
00100         , UTF_16B       = 5
00101         , UTF_16L       = 6
00102 
00103         , Encodings_Count
00104         , Encodings_Min = EBCDIC
00105         , Encodings_Max = UTF_16L
00106 
00107         , OtherEncoding = 999
00108 
00109         #if defined(ENDIANMODE_BIG)
00110         , Def_UTF16     = UTF_16B
00111         , Def_UCS4      = UCS_4B
00112         #else
00113         , Def_UTF16     = UTF_16L
00114         , Def_UCS4      = UCS_4L
00115         #endif
00116     };
00117 
00118 
00119     // -----------------------------------------------------------------------
00120     //  Public, const static data
00121     //
00122     //  These are the byte sequences for each of the encodings that we can
00123     //  auto sense, and their lengths.
00124     // -----------------------------------------------------------------------
00125     static const char           fgASCIIPre[];
00126     static const unsigned int   fgASCIIPreLen;
00127     static const XMLByte        fgEBCDICPre[];
00128     static const unsigned int   fgEBCDICPreLen;
00129     static const XMLByte        fgUTF16BPre[];
00130     static const XMLByte        fgUTF16LPre[];
00131     static const unsigned int   fgUTF16PreLen;
00132     static const XMLByte        fgUCS4BPre[];
00133     static const XMLByte        fgUCS4LPre[];
00134     static const unsigned int   fgUCS4PreLen;
00135     static const char           fgUTF8BOM[];
00136     static const unsigned int   fgUTF8BOMLen;
00137 
00138 
00139     // -----------------------------------------------------------------------
00140     //  Encoding recognition methods
00141     // -----------------------------------------------------------------------
00142     static Encodings basicEncodingProbe
00143     (
00144         const   XMLByte* const      rawBuffer
00145         , const unsigned int        rawByteCount
00146     );
00147 
00148     static Encodings encodingForName
00149     (
00150         const   XMLCh* const    theEncName
00151     );
00152 
00153     static const XMLCh* nameForEncoding(const Encodings theEncoding);
00154 
00155 
00156 protected :
00157     // -----------------------------------------------------------------------
00158     //  Unimplemented constructors, operators, and destructor
00159     //
00160     //  This class is effectively being used as a namespace for some static
00161     //  methods.
00162     //
00163     //   (these functions are protected rather than private only to get rid of
00164     //    some annoying compiler warnings.)
00165     //
00166     // -----------------------------------------------------------------------
00167     XMLRecognizer();
00168     ~XMLRecognizer();
00169     void operator=(const XMLRecognizer&);
00170 };
00171 
00172 #endif

Generated on Tue Nov 19 09:36:35 2002 by doxygen1.3-rc1