Oracle Coherence for C++ API
Release 3.6.0.0

E15728-01

coherence/lang/String.hpp

00001 /*
00002 * String.hpp
00003 *
00004 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
00005 *
00006 * Oracle is a registered trademarks of Oracle Corporation and/or its
00007 * affiliates.
00008 *
00009 * This software is the confidential and proprietary information of Oracle
00010 * Corporation. You shall not disclose such confidential and proprietary
00011 * information and shall use it only in accordance with the terms of the
00012 * license agreement you entered into with Oracle.
00013 *
00014 * This notice may not be removed or altered.
00015 */
00016 #ifndef COH_STRING_HPP
00017 #define COH_STRING_HPP
00018 
00019 #include "coherence/lang/compatibility.hpp"
00020 
00021 #include "coherence/lang/Array.hpp"
00022 #include "coherence/lang/Comparable.hpp"
00023 #include "coherence/lang/Object.hpp"
00024 
00025 #include <memory>
00026 #include <ostream>
00027 #include <sstream>
00028 #include <string>
00029 
00030 COH_OPEN_NAMESPACE2(coherence,lang)
00031 
00032 /**
00033 * @internal
00034 *
00035 * Used to protect protected inheritance of Array<octet_t> by String, as
00036 * spec based class definitions don't have a notion of protected
00037 * inheritance.
00038 */
00039 class COH_EXPORT_SPEC ProtectedOctetArray
00040     : protected Array<octet_t>
00041     {
00042     public:
00043         typedef Array<octet_t>::super super;
00044         typedef Array<octet_t>::alias alias;
00045 
00046     protected:
00047         ProtectedOctetArray(size32_t cb, octet_t* ab)
00048             : Array<octet_t>(cb, ab)
00049             {}
00050 
00051         ProtectedOctetArray(ProtectedOctetArray::View vThat,
00052             size32_t iFrom, size32_t iTo)
00053             : Array<octet_t>(vThat, iFrom, iTo)
00054             {}
00055 
00056         virtual ~ProtectedOctetArray()
00057             {}
00058     };
00059 
00060 
00061 /**
00062 * A managed C-style (NUL terminated) string.
00063 *
00064 * In addition to exposing the underlying char array, the String class
00065 * supports transformations to and from Unicode code points within the Basic
00066 * Multilingual Plane (BMP):
00067 *
00068 * <ul>
00069 * <li>UTF-8  BMP char array</li>
00070 * <li>UTF-16 BMP wchar_t array (on platforms where wchar_t is >= 16 bits)</li>
00071 * <li>UTF-8  BMP octet_t array</li>
00072 * <li>UTF-16 BMP char16_t array</li>
00073 * </ul>
00074 *
00075 * Note: the ASCII character set is a subset of UTF-8 BMP.
00076 *
00077 * Unlike most managed types in the Coherence class hierarchy, Strings are
00078 * auto-boxable by default. That is a String::Handle or String::View can be
00079 * directly assigned from or to common string representations.  For example
00080 * the following code is legal:
00081 * @code
00082 * String::Handle hs = "hello world";
00083 * @endcode
00084 * as is
00085 * @code
00086 * void someFunction(String::View vs);
00087 *
00088 * someFunction("some value");
00089 * @endcode
00090 *
00091 * @see StringHandle for details
00092 *
00093 * @author mf/jh/djl  2007.07.05
00094 */
00095 class COH_EXPORT String
00096     : public cloneable_spec<String,
00097         extends<ProtectedOctetArray>,
00098         implements<Comparable> >
00099     {
00100     friend class factory<String>;
00101 
00102     // ----- constants ------------------------------------------------------
00103 
00104     public:
00105         /**
00106         * The largest possible value of type size32_t.
00107         */
00108         static const size32_t npos = size32_t(-1);
00109 
00110 
00111     // ----- typedefs -------------------------------------------------------
00112 
00113     public:
00114         /**
00115         * While StringHandle boxes a number of common string types, String is
00116         * still compatible with BoxHandle, and when used with it can box to
00117         * only one type. By default Strings are boxable from a number of
00118         * types, see StringHandle for details.
00119         */
00120         typedef std::string BoxedType;
00121 
00122 
00123     // ----- nested class: StringHandle -------------------------------------
00124 
00125     public:
00126         /**
00127         * StringHandle provides standard TypedHandle features as well as
00128         * auto-boxing support for standard string types including:
00129         *
00130         * <ul>
00131         * <li>char[]       C-style NUL terminated char array</li>
00132         * <li>std::string  STL string</li>
00133         * <li>std::wstring STL wide string</li>
00134         * </ul>
00135         *
00136         * Boxing from wchar_t[] is supported, but requires an explicit
00137         * constructor call in order to avoid ambiguity when assigning a
00138         * String handle/view to NULL.
00139         *
00140         * Unboxing to char[] and wchar[] is not supported as it is unsafe to
00141         * maintain a reference to the underlying character array without
00142         * holding a reference to the String. Unboxing to std::string, and
00143         * std::wstring is both supported and safe.
00144         */
00145         template<class T> class StringHandle
00146             : public TypedHandle<T>
00147             {
00148             // ----- constructors ---------------------------------------
00149 
00150             public:
00151                 /**
00152                 * Create an empty StringHandle.
00153                 */
00154                 StringHandle()
00155                     : TypedHandle<T>()
00156                     {
00157                     }
00158 
00159                 /**
00160                 * Create a new StringHandle from a boxable type.
00161                 */
00162                 StringHandle(const char* ach)
00163                     : TypedHandle<T>()
00164                     {
00165                     if (NULL != ach)
00166                         {
00167                         TypedHandle<T>::operator=(T::create(ach));
00168                         }
00169                     }
00170 
00171                 /**
00172                 * Create a new StringHandle from a boxable type.
00173                 */
00174                 explicit StringHandle(const wchar_t* ach)
00175                     : TypedHandle<T>()
00176                     {
00177                     if (NULL != ach)
00178                         {
00179                         TypedHandle<T>::operator=(T::create(ach));
00180                         }
00181                     }
00182 
00183                 /**
00184                 * Create a new StringHandle from a boxable type.
00185                 */
00186                 template<class C, class R, class A>
00187                 StringHandle(const std::basic_string<C, R, A>& s)
00188                     : TypedHandle<T>(T::create(s))
00189                     {
00190                     }
00191 
00192                 /**
00193                 * Create a new StringHandle from the TypedHandle with a type
00194                 * conversion.
00195                 */
00196                 template<class O> StringHandle<T>(const TypedHandle<O>& that)
00197                     : TypedHandle<T>(that)
00198                     {
00199                     }
00200 
00201                 /**
00202                 * The copy constructor.
00203                 */
00204                 StringHandle(const StringHandle& that)
00205                     : TypedHandle<T>(that)
00206                     {
00207                     }
00208 
00209                 /**
00210                 * Create a new StringHandle from the raw pointer.
00211                 */
00212                 explicit StringHandle(T* o)
00213                     : TypedHandle<T>(o)
00214                     {
00215                     }
00216 
00217             // ----- operators ------------------------------------------
00218 
00219             public:
00220                 /**
00221                 * The assignment operator.
00222                 */
00223                 template<class O>
00224                 StringHandle& operator=(const TypedHandle<O>& that)
00225                     {
00226                     TypedHandle<T>::operator=(that);
00227                     return *this;
00228                     }
00229 
00230                 /**
00231                 * The "boxing" operator.
00232                 */
00233                 StringHandle& operator=(const char* ach)
00234                     {
00235                     if (NULL == ach)
00236                         {
00237                         TypedHandle<T>::operator=(NULL);
00238                         }
00239                     else
00240                         {
00241                         TypedHandle<T>::operator=(T::create(ach));
00242                         }
00243                     return *this;
00244                     }
00245 
00246                 /**
00247                 * The "boxing" operator.
00248                 */
00249                 template<class C, class R, class A>
00250                 StringHandle& operator=(const std::basic_string<C, R, A>& s)
00251                     {
00252                     TypedHandle<T>::operator=(T::create(s));
00253                     return *this;
00254                     }
00255 
00256                 /**
00257                 * The "unboxing" operator.
00258                 *
00259                 * @return a copy of the referenced Object
00260                 */
00261                 template<class C, class R, class A>
00262                 operator std::basic_string<C, R, A>() const
00263                     {
00264                     const T* pT = TypedHandle<T>::get();
00265                     if (NULL == pT)
00266                         {
00267                         coh_throw_npe(typeid(T));
00268                         }
00269                     return (std::basic_string<C, R, A>) *pT;
00270                     }
00271             };
00272 
00273     // ----- handle definitions ---------------------------------------------
00274 
00275     public:
00276         /**
00277         * Handle definition.
00278         */
00279         typedef StringHandle<String> Handle;
00280 
00281         /**
00282         * View definition.
00283         */
00284         typedef StringHandle<const String> View;
00285 
00286 
00287     // ----- factory methods ------------------------------------------------
00288 
00289     public:
00290         /**
00291         * Create a String from a C-style NUL terminated char array.
00292         *
00293         * @param ach  the NUL terminated string of chars to copy
00294         * @param cch  the number of chars to copy; if npos, until NUL
00295         *
00296         * @throws IllegalArgumentException if any of the elements in the
00297         *         array are not UTF-8 BMP
00298         */
00299         static String::Handle create(const char* achSrc = "", size32_t cch = npos);
00300 
00301         /**
00302         * Create a String from a C-style NUL terminated wide char array.
00303         *
00304         * @param ach  the NUL terminated string of wide chars to copy
00305         * @param cch  the number of chars to copy; if npos, copy until NUL
00306         *
00307         * @throws IllegalArgumentException if any of the elements in the
00308         *         array are not UTF-16 BMP
00309         */
00310         static String::Handle create(const wchar_t* achSrc, size32_t cch = npos);
00311 
00312         /**
00313         * Create a String from an STL string.
00314         *
00315         * @param s  the STL string to copy
00316         *
00317         * @throws IllegalArgumentException if any of the elements in the
00318         *         array are not UTF-8 BMP
00319         */
00320         template<class C, class R, class A> static COH_INLINE
00321             String::Handle create(const std::basic_string<C, R, A>& s)
00322             {
00323             size_t cch = s.size();
00324             if (cch >= npos) // for 64b
00325                 {
00326                 coh_throw_illegal_argument("maximum String length exceeded");
00327                 }
00328             return String::create(s.data(), size32_t(cch));
00329             }
00330 
00331         /**
00332         * Create a String from a char array.
00333         *
00334         * @param vach  the array of chars to copy
00335         * @param of    the offset at which to start copying
00336         * @param cch   the number of chars to copy; if npos, copy all
00337         *              subsequent chars in the array
00338         *
00339         * @throws IndexOutOfBoundsException if of > vach->length or if
00340         *         cch < npos and of + cch > vach->length
00341         * @throws IllegalArgumentException if any of the elements in the
00342         *         array are not UTF-8 BMP
00343         */
00344         static String::Handle create(Array<char>::View vachSrc,
00345                 size32_t of = 0, size32_t cch = npos);
00346 
00347         /**
00348         * Create a String from a wide char array.
00349         *
00350         * @param vach  the array of chars to copy
00351         * @param of    the offset at which to start copying
00352         * @param cch   the number of chars to copy; if npos, copy all
00353         *              subsequent chars in the array
00354         *
00355         * @throws IndexOutOfBoundsException if of > vach->length or if
00356         *         cch < npos and of + cch > vach->length
00357         * @throws IllegalArgumentException if any of the elements in the
00358         *         array are not UTF-16 BMP
00359         * @throws UnsupportedOperationException if sizeof(wchar_t) <
00360         *         sizeof(char16_t)
00361         */
00362         static String::Handle create(Array<wchar_t>::View vachSrc,
00363                 size32_t of = 0, size32_t cch = npos);
00364 
00365         /**
00366         * Create a String from an octet array.
00367         *
00368         * @param vab  the array of octets to copy
00369         * @param of   the offset at which to start copying
00370         * @param cb   the number of octets to copy; if npos, copy all
00371         *             subsequent octets in the array
00372         *
00373         * @throws IndexOutOfBoundsException if of > vab->length or if
00374         *         cb < npos and of + cb > vab->length
00375         * @throws IllegalArgumentException if any of the elements in the
00376         *         array are not UTF-8 BMP
00377         */
00378         static String::Handle create(Array<octet_t>::View vabSrc,
00379                 size32_t of = 0, size32_t cb = npos);
00380 
00381         /**
00382         * Create a String from a 16-bit char array.
00383         *
00384         * @param vach  the array of chars to copy
00385         * @param of    the offset at which to start copying
00386         * @param cch   the number of chars to copy; if npos, copy all
00387         *              subsequent chars in the array
00388         *
00389         * @throws IndexOutOfBoundsException if of > vach->length or if
00390         *         cch < npos and of + cch > vach->length
00391         * @throws IllegalArgumentException if any of the elements in the
00392         *         array are not UTF-16 BMP
00393         */
00394         static String::Handle create(Array<char16_t>::View vachSrc,
00395                 size32_t of = 0, size32_t cch = npos);
00396 
00397 
00398     // ----- constructors ---------------------------------------------------
00399 
00400     private:
00401         /**
00402         * Constructor.
00403         *
00404         * @param ccp  the number of code points in the string
00405         * @param cb   the number of octets in the string
00406         * @param ab   the String's octets
00407         */
00408         String(size32_t ccp, size32_t cb, octet_t* ab);
00409 
00410         /**
00411         * Copy constructor.
00412         */
00413         String(const String& that);
00414 
00415 
00416     // ----- String interface -----------------------------------------------
00417 
00418     public:
00419         /**
00420         * Return true iff the String contains only ASCII (ISO-8859-1)
00421         * characters. In this case each character is represented by a single
00422         * char, otherwise a character can take between one and three chars.
00423         *
00424         * @return true iff the String contains only ASCII characters
00425         */
00426         virtual bool isASCII() const;
00427 
00428         /**
00429         * Return the number of unicode code points (characters) in this String.
00430         *
00431         * @return the number of characters in this String
00432         */
00433         virtual size32_t length() const;
00434 
00435         /**
00436         * Return the String as a C-style NUL terminated char array.
00437         *
00438         * If the String is non-ASCII then the String::next() method may be
00439         * used to expand the char array into a sequence of char16_t unicode
00440         * characters.
00441         *
00442         * The returned array's lifetime is bound to the lifetime of the
00443         * String which it was returned from. Specifically it is unsafe to use
00444         * the returned char* while not holding a handle to the String.
00445         *
00446         * @return the char array representing the String.
00447         */
00448         virtual const char* getCString() const;
00449 
00450         /**
00451         * Compare this String against the supplied C-style string.
00452         *
00453         * @param ach  the NUL terminated C-style string to compare to this
00454         *             String
00455         * @param cch  the length of the supplied string, or npos to rely on
00456         *             NUL terminator
00457         *
00458         * @return true iff the two strings are identical
00459         */
00460         virtual bool equals(const char* ach, size32_t cch = npos) const;
00461 
00462         /**
00463         * Compare this String against the supplied C-style wide char string.
00464         *
00465         * @param ach  the NUL terminated C-style string to compare to this
00466         *             String
00467         * @param cch  the length of the supplied string, or npos to rely on
00468         *             NUL terminator
00469         *
00470         * @return true iff the two strings are identical
00471         *
00472         * @throws UnsupportedOperationException if sizeof(wchar_t) < sizeof(char16_t)
00473         */
00474         virtual bool equals(const wchar_t* ach, size32_t cch = npos) const;
00475 
00476         /**
00477         * Compare this String against the supplied STL string or wstring.
00478         *
00479         * @param s  the STL string to compare to this String
00480         *
00481         * @return true iff the two strings are identical
00482         */
00483         template<class C, class R, class A> COH_INLINE
00484             bool equalsStd(const std::basic_string<C, R, A>& s) const
00485             {
00486             size_t cch = s.size();
00487             return cch < npos && equals(s.data(), size32_t(cch));
00488             }
00489 
00490         /**
00491         * Convert the String to any of the types supported by StringHandle,
00492         * namely an STL string or wstring.
00493         *
00494         * @return the std::string/wstring representation
00495         */
00496         template<class C, class R, class A> COH_INLINE
00497             operator std::basic_string<C, R, A>() const
00498             {
00499             if (sizeof(C) == sizeof(octet_t))
00500                 {
00501                 return std::basic_string<C, R, A>((const C*) getCString(),
00502                         length());
00503                 }
00504 
00505             if (sizeof(C) < sizeof(char16_t))
00506                 {
00507                 coh_throw_unsupported_operation("unsupported string type");
00508                 }
00509 
00510             typename std::basic_string<C, R, A>::size_type cch =
00511                 typename std::basic_string<C, R, A>::size_type(length());
00512             const char* iter = getCString();
00513             std::basic_string<C, R, A> ws;
00514             ws.reserve(cch);
00515             for (typename std::basic_string<C, R, A>::size_type
00516                     i = 0; i < cch; ++i)
00517                 {
00518                 ws.push_back((C) String::next(iter));
00519                 }
00520             return ws;
00521             }
00522 
00523         /**
00524         * Return the index of a substring within this String.
00525         *
00526         * @param vsSearch  the substring to search for in vsSource
00527         * @param iBegin    the location in the string to start searching
00528         *
00529         * @return the index of the substring found within this String or npos
00530         */
00531         virtual size32_t indexOf(String::View vsSearch,
00532                 size32_t iBegin = 0) const;
00533 
00534         /**
00535         * Return the index of a character within this String.
00536         *
00537         * @param chSearch  the character to search for in this String
00538         * @param iBegin    the location in this String to start searching
00539         *
00540         * @return the index of the character found within this String or npos
00541         */
00542         virtual size32_t indexOf(char16_t chSearch,
00543                 size32_t iBegin = 0) const;
00544 
00545         /**
00546         * Return the index of a substring within this String by searching
00547         * backward from the given beginning index.
00548         *
00549         * @param vsSearh  the substring to search for within this String
00550         * @param iBegin   the location in this String to start searching
00551         *
00552         * @return the index of the substring found within this String or npos
00553         */
00554         virtual size32_t lastIndexOf(String::View vsSearch,
00555                 size32_t iBegin = npos) const;
00556 
00557         /**
00558         * Return the index of a substring within this String by searching
00559         * backward from the given beginning index.
00560         *
00561         * @param chSearch  the character to search for in this String
00562         * @param iBegin    the location in this String to start searching
00563         *
00564         * @return the index of the character found within this String or npos
00565         */
00566         virtual size32_t lastIndexOf(char16_t chSearch,
00567                 size32_t iBegin = npos) const;
00568 
00569         /**
00570         * Return a new String comprised of the substring of this string
00571         * from iBegin (inclusive) to iEnd (exclusive).
00572         *
00573         * @param iBegin    the starting index from which to create the string
00574         * @param iEnd      the index of where the substring should stop
00575         *                  in this String or npos for end of string
00576         *
00577         * @return the new substring created from this String
00578         */
00579         virtual String::View substring(size32_t iBegin,
00580                 size32_t iEnd = npos) const;
00581 
00582         /**
00583         * Return true if this String starts with the supplied String.
00584         *
00585         * @param vsSearch  the string to search for
00586         *
00587         * @return true if this String starts with vsSearch
00588         */
00589         virtual bool startsWith(String::View vsSearch) const;
00590 
00591         /**
00592         * Return true if this String ends with the supplied String.
00593         *
00594         * @param vsSearch  the string to search for
00595         *
00596         * @return true if this String ends with vsSearch
00597         */
00598         virtual bool endsWith(String::View vsSearch) const;
00599 
00600         /**
00601         * A substring of this String is compared to a substring of a supplied
00602         * String.
00603         *
00604         * @param ofSource  the offset in this String where comparison begins
00605         * @param vsOther   the String whose substring is compared against
00606         *                  this String
00607         * @param ofOther   the offset in vsOther where comparison begins
00608         * @param cch       the count of characters to compare
00609         *
00610         * @return the result of the two substrings
00611         */
00612         virtual bool regionMatches(size32_t ofSourse,
00613                 String::View vsOther, size32_t ofOther = 0,
00614                 size32_t cch = npos) const;
00615 
00616         /**
00617         * Return a String that is the result of removing all leading and
00618         * trailing white space.
00619         *
00620         * @return a trimmed copy of this String
00621         */
00622         String::View trim() const;
00623 
00624         /**
00625         * Return the underlying UTF-8 BMP NUL terminated Array<octet_t>.
00626         *
00627         * For performance reasons the returned Array may not support cloning.
00628         * If clone() is called the result will a String, which depending on
00629         * the compiler's handling of dynamic_cast to a private super class may
00630         * fail to be castable to an Array<octet_t>.
00631         *
00632         * @return the Array<octet_t>
00633         */
00634         virtual Array<octet_t>::View getOctets() const;
00635 
00636 
00637     // ----- Comparable interface -------------------------------------------
00638 
00639     public:
00640         /**
00641         * {@inheritDoc}
00642         */
00643         virtual int32_t compareTo(Object::View v) const;
00644 
00645 
00646     // ----- Object interface -----------------------------------------------
00647 
00648     public:
00649         /**
00650         * {@inheritDoc}
00651         */
00652         virtual size32_t hashCode() const;
00653 
00654         /**
00655         * {@inheritDoc}
00656         */
00657         virtual void toStream(std::ostream& out) const;
00658 
00659         /**
00660         * {@inheritDoc}
00661         */
00662         virtual bool isImmutable() const;
00663 
00664         /**
00665         * {@inheritDoc}
00666         */
00667         virtual bool equals(Object::View v) const;
00668 
00669         /**
00670         * {@inheritDoc}
00671         */
00672         virtual size32_t sizeOf() const;
00673 
00674 
00675     // ----- static helpers -------------------------------------------------
00676 
00677     public:
00678         /**
00679         * Return the Unicode character as UTF-16 from the char array, and
00680         * increment the pointer such that it references the start of the
00681         * next Unicode character.
00682         *
00683         * @param ach  pointer to the start of the next UTF-8 code point.
00684         *
00685         * @return the next Unicode character
00686         *
00687         * @throws IllegalArgumentException  if a non UTF-8 BMP sequence is
00688         *                                   encountered
00689         */
00690         static char16_t next(const char*& ach);
00691 
00692 
00693     // ----- data members ---------------------------------------------------
00694 
00695     protected:
00696         /**
00697         * The number of unicode code points (characters) in the String.
00698         */
00699         size32_t m_ccp;
00700 
00701 
00702     // ----- constants ------------------------------------------------------
00703 
00704     public:
00705         /**
00706         * String referencing NULL.
00707         *
00708         * This constant is generally only needed for defining a default
00709         * value for a function parameter:
00710         *
00711         * @code
00712         * void function(String::View vs = String::NULL_STRING)
00713         * @endcode
00714         *
00715         * Simply passing NULL as a default is not allowable for Strings as due
00716         * to auto-boxing the compiler is unable to determine if NULL indicates
00717         * a String* or a char*. For all other uses of NULL with String the
00718         * literal NULL is preferred.
00719         */
00720         static const char* const NULL_STRING;
00721     };
00722 
00723 
00724 // ----- helper macros ------------------------------------------------------
00725 
00726 /**
00727 * This macro will take any set of streamable contents and turn them into a
00728 * coherence#lang#String instance.
00729 *
00730 * @param CONTENTS  the contents to use in constructing the String.
00731 *
00732 * Usage example:
00733 * @code
00734 * String::Handle hsFoo = COH_TO_STRING("This value: " << 5 << " is my value");
00735 * @endcode
00736 */
00737 #define COH_TO_STRING(CONTENTS) \
00738     coherence::lang::String::create(((std::stringstream&) \
00739             (*(std::auto_ptr<std::stringstream>(new std::stringstream())) \
00740                 << CONTENTS)).str())
00741 
00742 COH_CLOSE_NAMESPACE2
00743 
00744 #endif // COH_STRING_HPP
Copyright © 2000, 2010, Oracle and/or its affiliates. All rights reserved.