coherence/lang/String.hpp

00001 /*
00002 * String.hpp
00003 *
00004 * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved.
00005 *
00006 * Oracle is a registered trademarks of Oracle Corporation and/or its
00007 * affiliates.
00008 *
00009 * This software is the confidential and proprietary information of Oracle
00010 * Corporation. You shall not disclose such confidential and proprietary
00011 * information and shall use it only in accordance with the terms of the
00012 * license agreement you entered into with Oracle.
00013 *
00014 * This notice may not be removed or altered.
00015 */
00016 /*
00017 * String.hpp
00018 *
00019 * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved.
00020 *
00021 * Oracle is a registered trademarks of Oracle Corporation and/or its
00022 * affiliates.
00023 *
00024 * This software is the confidential and proprietary information of Oracle
00025 * Corporation. You shall not disclose such confidential and proprietary
00026 * information and shall use it only in accordance with the terms of the
00027 * license agreement you entered into with Oracle.
00028 *
00029 * This notice may not be removed or altered.
00030 */
00031 #ifndef COH_STRING_HPP
00032 #define COH_STRING_HPP
00033 
00034 #include "coherence/lang/compatibility.hpp"
00035 
00036 #include "coherence/lang/Array.hpp"
00037 #include "coherence/lang/Comparable.hpp"
00038 #include "coherence/lang/Object.hpp"
00039 
00040 #include <memory>
00041 #include <ostream>
00042 #include <sstream>
00043 #include <string>
00044 
00045 COH_OPEN_NAMESPACE2(coherence,lang)
00046 
00047 /**
00048 * @internal
00049 *
00050 * Used to protect protected inheritance of Array<octet_t> by String, as
00051 * spec based class definitions don't have a notion of protected
00052 * inheritance.
00053 */
00054 class COH_EXPORT_SPEC ProtectedOctetArray
00055     : protected Array<octet_t>
00056     {
00057     public:
00058         typedef Array<octet_t>::super super;
00059         typedef Array<octet_t>::alias alias;
00060 
00061     protected:
00062         ProtectedOctetArray(size32_t cb, octet_t* ab)
00063             : Array<octet_t>(cb, ab)
00064             {}
00065 
00066         ProtectedOctetArray(ProtectedOctetArray::View vThat,
00067             size32_t iFrom, size32_t iTo)
00068             : Array<octet_t>(vThat, iFrom, iTo)
00069             {}
00070 
00071         virtual ~ProtectedOctetArray()
00072             {}
00073     };
00074 
00075 
00076 /**
00077 * A managed C-style (NUL terminated) string.
00078 *
00079 * In addition to exposing the underlying char array, the String class
00080 * supports transformations to and from Unicode code points within the Basic
00081 * Multilingual Plane (BMP):
00082 *
00083 * <ul>
00084 * <li>UTF-8  BMP char array</li>
00085 * <li>UTF-16 BMP wchar_t array (on platforms where wchar_t is >= 16 bits)</li>
00086 * <li>UTF-8  BMP octet_t array</li>
00087 * <li>UTF-16 BMP char16_t array</li>
00088 * </ul>
00089 *
00090 * Note: the ASCII character set is a subset of UTF-8 BMP.
00091 *
00092 * Unlike most managed types in the Coherence class hierarchy, Strings are
00093 * auto-boxable by default. That is a String::Handle or String::View can be
00094 * directly assigned from or to common string representations.  For example
00095 * the following code is legal:
00096 * @code
00097 * String::Handle hs = "hello world";
00098 * @endcode
00099 * as is
00100 * @code
00101 * void someFunction(String::View vs);
00102 *
00103 * someFunction("some value");
00104 * @endcode
00105 *
00106 * @see StringHandle for details
00107 *
00108 * @author mf/jh/djl  2007.07.05
00109 */
00110 class COH_EXPORT String
00111     : public cloneable_spec<String,
00112         extends<ProtectedOctetArray>,
00113         implements<Comparable> >
00114     {
00115     friend class factory<String>;
00116 
00117     // ----- constants ------------------------------------------------------
00118 
00119     public:
00120         /**
00121         * The largest possible value of type size32_t.
00122         */
00123         static const size32_t npos = size32_t(-1);
00124 
00125 
00126     // ----- typedefs -------------------------------------------------------
00127 
00128     public:
00129         /**
00130         * While StringHandle boxes a number of common string types, String is
00131         * still compatible with BoxHandle, and when used with it can box to
00132         * only one type. By default Strings are boxable from a number of
00133         * types, see StringHandle for details.
00134         */
00135         typedef std::string BoxedType;
00136 
00137 
00138     // ----- nested class: StringHandle -------------------------------------
00139 
00140     public:
00141         /**
00142         * StringHandle provides standard TypedHandle features as well as
00143         * auto-boxing support for standard string types including:
00144         *
00145         * <ul>
00146         * <li>char[]       C-style NUL terminated char array</li>
00147         * <li>std::string  STL string</li>
00148         * <li>std::wstring STL wide string</li>
00149         * </ul>
00150         *
00151         * Boxing from wchar_t[] is supported, but requires an explicit
00152         * constructor call in order to avoid ambiguity when assigning a
00153         * String handle/view to NULL.
00154         *
00155         * Unboxing to char[] and wchar[] is not supported as it is unsafe to
00156         * maintain a reference to the underlying character array without
00157         * holding a reference to the String. Unboxing to std::string, and
00158         * std::wstring is both supported and safe.
00159         */
00160         template<class T> class StringHandle
00161             : public TypedHandle<T>
00162             {
00163             // ----- constructors ---------------------------------------
00164 
00165             public:
00166                 /**
00167                 * Create an empty StringHandle.
00168                 */
00169                 StringHandle()
00170                     : TypedHandle<T>()
00171                     {
00172                     }
00173 
00174                 /**
00175                 * Create a new StringHandle from a boxable type.
00176                 */
00177                 StringHandle(const char* ach)
00178                     : TypedHandle<T>()
00179                     {
00180                     if (NULL != ach)
00181                         {
00182                         TypedHandle<T>::operator=(T::create(ach));
00183                         }
00184                     }
00185 
00186                 /**
00187                 * Create a new StringHandle from a boxable type.
00188                 */
00189                 explicit StringHandle(const wchar_t* ach)
00190                     : TypedHandle<T>()
00191                     {
00192                     if (NULL != ach)
00193                         {
00194                         TypedHandle<T>::operator=(T::create(ach));
00195                         }
00196                     }
00197 
00198                 /**
00199                 * Create a new StringHandle from a boxable type.
00200                 */
00201                 template<class C, class R, class A>
00202                 StringHandle(const std::basic_string<C, R, A>& s)
00203                     : TypedHandle<T>(T::create(s))
00204                     {
00205                     }
00206 
00207                 /**
00208                 * Create a new StringHandle from the TypedHandle with a type
00209                 * conversion.
00210                 */
00211                 template<class O> StringHandle<T>(const TypedHandle<O>& that)
00212                     : TypedHandle<T>(that)
00213                     {
00214                     }
00215 
00216                 /**
00217                 * The copy constructor.
00218                 */
00219                 StringHandle(const StringHandle& that)
00220                     : TypedHandle<T>(that)
00221                     {
00222                     }
00223 
00224                 /**
00225                 * Create a new StringHandle from the raw pointer.
00226                 */
00227                 explicit StringHandle(T* o)
00228                     : TypedHandle<T>(o)
00229                     {
00230                     }
00231 
00232             // ----- operators ------------------------------------------
00233 
00234             public:
00235                 /**
00236                 * The assignment operator.
00237                 */
00238                 template<class O>
00239                 StringHandle& operator=(const TypedHandle<O>& that)
00240                     {
00241                     TypedHandle<T>::operator=(that);
00242                     return *this;
00243                     }
00244 
00245                 /**
00246                 * The "boxing" operator.
00247                 */
00248                 StringHandle& operator=(const char* ach)
00249                     {
00250                     if (NULL == ach)
00251                         {
00252                         TypedHandle<T>::operator=(NULL);
00253                         }
00254                     else
00255                         {
00256                         TypedHandle<T>::operator=(T::create(ach));
00257                         }
00258                     return *this;
00259                     }
00260 
00261                 /**
00262                 * The "boxing" operator.
00263                 */
00264                 template<class C, class R, class A>
00265                 StringHandle& operator=(const std::basic_string<C, R, A>& s)
00266                     {
00267                     TypedHandle<T>::operator=(T::create(s));
00268                     return *this;
00269                     }
00270 
00271                 /**
00272                 * The "unboxing" operator.
00273                 *
00274                 * @return a copy of the referenced Object
00275                 */
00276                 template<class C, class R, class A>
00277                 operator std::basic_string<C, R, A>() const
00278                     {
00279                     const T* pT = TypedHandle<T>::get();
00280                     if (NULL == pT)
00281                         {
00282                         coh_throw_npe(typeid(T));
00283                         }
00284                     return (std::basic_string<C, R, A>) *pT;
00285                     }
00286             };
00287 
00288     // ----- handle definitions ---------------------------------------------
00289 
00290     public:
00291         /**
00292         * Handle definition.
00293         */
00294         typedef StringHandle<String> Handle;
00295 
00296         /**
00297         * View definition.
00298         */
00299         typedef StringHandle<const String> View;
00300 
00301 
00302     // ----- factory methods ------------------------------------------------
00303 
00304     public:
00305         /**
00306         * Create a String from a C-style NUL terminated char array.
00307         *
00308         * @param ach  the NUL terminated string of chars to copy
00309         * @param cch  the number of chars to copy; if npos, until NUL
00310         *
00311         * @throws IllegalArgumentException if any of the elements in the
00312         *         array are not UTF-8 BMP
00313         */
00314         static String::Handle create(const char* achSrc = "", size32_t cch = npos);
00315 
00316         /**
00317         * Create a String from a C-style NUL terminated wide char array.
00318         *
00319         * @param ach  the NUL terminated string of wide chars to copy
00320         * @param cch  the number of chars to copy; if npos, copy until NUL
00321         *
00322         * @throws IllegalArgumentException if any of the elements in the
00323         *         array are not UTF-16 BMP
00324         */
00325         static String::Handle create(const wchar_t* achSrc, size32_t cch = npos);
00326 
00327         /**
00328         * Create a String from an STL string.
00329         *
00330         * @param s  the STL string to copy
00331         *
00332         * @throws IllegalArgumentException if any of the elements in the
00333         *         array are not UTF-8 BMP
00334         */
00335         template<class C, class R, class A> static COH_INLINE
00336             String::Handle create(const std::basic_string<C, R, A>& s)
00337             {
00338             size_t cch = s.size();
00339             if (cch >= npos) // for 64b
00340                 {
00341                 coh_throw_illegal_argument("maximum String length exceeded");
00342                 }
00343             return String::create(s.data(), size32_t(cch));
00344             }
00345 
00346         /**
00347         * Create a String from a char array.
00348         *
00349         * @param vach  the array of chars to copy
00350         * @param of    the offset at which to start copying
00351         * @param cch   the number of chars to copy; if npos, copy all
00352         *              subsequent chars in the array
00353         *
00354         * @throws IndexOutOfBoundsException if of > vach->length or if
00355         *         cch < npos and of + cch > vach->length
00356         * @throws IllegalArgumentException if any of the elements in the
00357         *         array are not UTF-8 BMP
00358         */
00359         static String::Handle create(Array<char>::View vachSrc,
00360                 size32_t of = 0, size32_t cch = npos);
00361 
00362         /**
00363         * Create a String from a wide char array.
00364         *
00365         * @param vach  the array of chars to copy
00366         * @param of    the offset at which to start copying
00367         * @param cch   the number of chars to copy; if npos, copy all
00368         *              subsequent chars in the array
00369         *
00370         * @throws IndexOutOfBoundsException if of > vach->length or if
00371         *         cch < npos and of + cch > vach->length
00372         * @throws IllegalArgumentException if any of the elements in the
00373         *         array are not UTF-16 BMP
00374         * @throws UnsupportedOperationException if sizeof(wchar_t) <
00375         *         sizeof(char16_t)
00376         */
00377         static String::Handle create(Array<wchar_t>::View vachSrc,
00378                 size32_t of = 0, size32_t cch = npos);
00379 
00380         /**
00381         * Create a String from an octet array.
00382         *
00383         * @param vab  the array of octets to copy
00384         * @param of   the offset at which to start copying
00385         * @param cb   the number of octets to copy; if npos, copy all
00386         *             subsequent octets in the array
00387         *
00388         * @throws IndexOutOfBoundsException if of > vab->length or if
00389         *         cb < npos and of + cb > vab->length
00390         * @throws IllegalArgumentException if any of the elements in the
00391         *         array are not UTF-8 BMP
00392         */
00393         static String::Handle create(Array<octet_t>::View vabSrc,
00394                 size32_t of = 0, size32_t cb = npos);
00395 
00396         /**
00397         * Create a String from a 16-bit char array.
00398         *
00399         * @param vach  the array of chars to copy
00400         * @param of    the offset at which to start copying
00401         * @param cch   the number of chars to copy; if npos, copy all
00402         *              subsequent chars in the array
00403         *
00404         * @throws IndexOutOfBoundsException if of > vach->length or if
00405         *         cch < npos and of + cch > vach->length
00406         * @throws IllegalArgumentException if any of the elements in the
00407         *         array are not UTF-16 BMP
00408         */
00409         static String::Handle create(Array<char16_t>::View vachSrc,
00410                 size32_t of = 0, size32_t cch = npos);
00411 
00412 
00413     // ----- constructors ---------------------------------------------------
00414 
00415     private:
00416         /**
00417         * Constructor.
00418         *
00419         * @param ccp  the number of code points in the string
00420         * @param cb   the number of octets in the string
00421         * @param ab   the String's octets
00422         */
00423         String(size32_t ccp, size32_t cb, octet_t* ab);
00424 
00425         /**
00426         * Copy constructor.
00427         */
00428         String(const String& that);
00429 
00430 
00431     // ----- String interface -----------------------------------------------
00432 
00433     public:
00434         /**
00435         * Return true iff the String contains only ASCII (ISO-8859-1)
00436         * characters. In this case each character is represented by a single
00437         * char, otherwise a character can take between one and three chars.
00438         *
00439         * @return true iff the String contains only ASCII characters
00440         */
00441         virtual bool isASCII() const;
00442 
00443         /**
00444         * Return the number of unicode code points (characters) in this String.
00445         *
00446         * @return the number of characters in this String
00447         */
00448         virtual size32_t length() const;
00449 
00450         /**
00451         * Return the String as a C-style NUL terminated char array.
00452         *
00453         * If the String is non-ASCII then the String::next() method may be
00454         * used to expand the char array into a sequence of char16_t unicode
00455         * characters.
00456         *
00457         * The returned array's lifetime is bound to the lifetime of the
00458         * String which it was returned from. Specifically it is unsafe to use
00459         * the returned char* while not holding a handle to the String.
00460         *
00461         * @return the char array representing the String.
00462         */
00463         virtual const char* getCString() const;
00464 
00465         /**
00466         * Compare this String against the supplied C-style string.
00467         *
00468         * @param ach  the NUL terminated C-style string to compare to this
00469         *             String
00470         * @param cch  the length of the supplied string, or npos to rely on
00471         *             NUL terminator
00472         *
00473         * @return true iff the two strings are identical
00474         */
00475         virtual bool equals(const char* ach, size32_t cch = npos) const;
00476 
00477         /**
00478         * Compare this String against the supplied C-style wide char string.
00479         *
00480         * @param ach  the NUL terminated C-style string to compare to this
00481         *             String
00482         * @param cch  the length of the supplied string, or npos to rely on
00483         *             NUL terminator
00484         *
00485         * @return true iff the two strings are identical
00486         *
00487         * @throws UnsupportedOperationException if sizeof(wchar_t) < sizeof(char16_t)
00488         */
00489         virtual bool equals(const wchar_t* ach, size32_t cch = npos) const;
00490 
00491         /**
00492         * Compare this String against the supplied STL string or wstring.
00493         *
00494         * @param s  the STL string to compare to this String
00495         *
00496         * @return true iff the two strings are identical
00497         */
00498         template<class C, class R, class A> COH_INLINE
00499             bool equalsStd(const std::basic_string<C, R, A>& s) const
00500             {
00501             size_t cch = s.size();
00502             return cch < npos && equals(s.data(), size32_t(cch));
00503             }
00504 
00505         /**
00506         * Convert the String to any of the types supported by StringHandle,
00507         * namely an STL string or wstring.
00508         *
00509         * @return the std::string/wstring representation
00510         */
00511         template<class C, class R, class A> COH_INLINE
00512             operator std::basic_string<C, R, A>() const
00513             {
00514             if (sizeof(C) == sizeof(octet_t))
00515                 {
00516                 return std::basic_string<C, R, A>((const C*) getCString(),
00517                         length());
00518                 }
00519 
00520             if (sizeof(C) < sizeof(char16_t))
00521                 {
00522                 coh_throw_unsupported_operation("unsupported string type");
00523                 }
00524 
00525             typename std::basic_string<C, R, A>::size_type cch =
00526                 typename std::basic_string<C, R, A>::size_type(length());
00527             const char* iter = getCString();
00528             std::basic_string<C, R, A> ws;
00529             ws.reserve(cch);
00530             for (typename std::basic_string<C, R, A>::size_type
00531                     i = 0; i < cch; ++i)
00532                 {
00533                 ws.push_back((C) String::next(iter));
00534                 }
00535             return ws;
00536             }
00537 
00538         /**
00539         * Return the index of a substring within this String.
00540         *
00541         * @param vsSearch  the substring to search for in vsSource
00542         * @param iBegin    the location in the string to start searching
00543         *
00544         * @return the index of the substring found within this String or npos
00545         */
00546         virtual size32_t indexOf(String::View vsSearch,
00547                 size32_t iBegin = 0) const;
00548 
00549         /**
00550         * Return the index of a character within this String.
00551         *
00552         * @param chSearch  the character to search for in this String
00553         * @param iBegin    the location in this String to start searching
00554         *
00555         * @return the index of the character found within this String or npos
00556         */
00557         virtual size32_t indexOf(char16_t chSearch,
00558                 size32_t iBegin = 0) const;
00559 
00560         /**
00561         * Return the index of a substring within this String by searching
00562         * backward from the given beginning index.
00563         *
00564         * @param vsSearh  the substring to search for within this String
00565         * @param iBegin   the location in this String to start searching
00566         *
00567         * @return the index of the substring found within this String or npos
00568         */
00569         virtual size32_t lastIndexOf(String::View vsSearch,
00570                 size32_t iBegin = npos) const;
00571 
00572         /**
00573         * Return the index of a substring within this String by searching
00574         * backward from the given beginning index.
00575         *
00576         * @param chSearch  the character to search for in this String
00577         * @param iBegin    the location in this String to start searching
00578         *
00579         * @return the index of the character found within this String or npos
00580         */
00581         virtual size32_t lastIndexOf(char16_t chSearch,
00582                 size32_t iBegin = npos) const;
00583 
00584         /**
00585         * Return a new String comprised of the substring of this string
00586         * from iBegin (inclusive) to iEnd (exclusive).
00587         *
00588         * @param iBegin    the starting index from which to create the string
00589         * @param iEnd      the index of where the substring should stop
00590         *                  in this String or npos for end of string
00591         *
00592         * @return the new substring created from this String
00593         */
00594         virtual String::View substring(size32_t iBegin,
00595                 size32_t iEnd = npos) const;
00596 
00597         /**
00598         * Return true if this String starts with the supplied String.
00599         *
00600         * @param vsSearch  the string to search for
00601         *
00602         * @return true if this String starts with vsSearch
00603         */
00604         virtual bool startsWith(String::View vsSearch) const;
00605 
00606         /**
00607         * Return true if this String ends with the supplied String.
00608         *
00609         * @param vsSearch  the string to search for
00610         *
00611         * @return true if this String ends with vsSearch
00612         */
00613         virtual bool endsWith(String::View vsSearch) const;
00614 
00615         /**
00616         * A substring of this String is compared to a substring of a supplied
00617         * String.
00618         *
00619         * @param ofSource  the offset in this String where comparison begins
00620         * @param vsOther   the String whose substring is compared against
00621         *                  this String
00622         * @param ofOther   the offset in vsOther where comparison begins
00623         * @param cch       the count of characters to compare
00624         *
00625         * @return the result of the two substrings
00626         */
00627         virtual bool regionMatches(size32_t ofSourse,
00628                 String::View vsOther, size32_t ofOther = 0,
00629                 size32_t cch = npos) const;
00630 
00631         /**
00632         * Return a String that is the result of removing all leading and
00633         * trailing white space.
00634         *
00635         * @return a trimmed copy of this String
00636         */
00637         String::View trim() const;
00638 
00639         /**
00640         * Return the underlying UTF-8 BMP NUL terminated Array<octet_t>.
00641         *
00642         * For performance reasons the returned Array may not support cloning.
00643         * If clone() is called the result will a String, which depending on
00644         * the compiler's handling of dynamic_cast to a private super class may
00645         * fail to be castable to an Array<octet_t>.
00646         *
00647         * @return the Array<octet_t>
00648         */
00649         virtual Array<octet_t>::View getOctets() const;
00650 
00651 
00652     // ----- Comparable interface -------------------------------------------
00653 
00654     public:
00655         /**
00656         * {@inheritDoc}
00657         */
00658         virtual int32_t compareTo(Object::View v) const;
00659 
00660 
00661     // ----- Object interface -----------------------------------------------
00662 
00663     public:
00664         /**
00665         * {@inheritDoc}
00666         */
00667         virtual size32_t hashCode() const;
00668 
00669         /**
00670         * {@inheritDoc}
00671         */
00672         virtual void toStream(std::ostream& out) const;
00673 
00674         /**
00675         * {@inheritDoc}
00676         */
00677         virtual bool isImmutable() const;
00678 
00679         /**
00680         * {@inheritDoc}
00681         */
00682         virtual bool equals(Object::View v) const;
00683 
00684         /**
00685         * {@inheritDoc}
00686         */
00687         virtual size32_t sizeOf() const;
00688 
00689 
00690     // ----- static helpers -------------------------------------------------
00691 
00692     public:
00693         /**
00694         * Return the Unicode character as UTF-16 from the char array, and
00695         * increment the pointer such that it references the start of the
00696         * next Unicode character.
00697         *
00698         * @param ach  pointer to the start of the next UTF-8 code point.
00699         *
00700         * @return the next Unicode character
00701         *
00702         * @throws IllegalArgumentException  if a non UTF-8 BMP sequence is
00703         *                                   encountered
00704         */
00705         static char16_t next(const char*& ach);
00706 
00707 
00708     // ----- data members ---------------------------------------------------
00709 
00710     protected:
00711         /**
00712         * The number of unicode code points (characters) in the String.
00713         */
00714         size32_t m_ccp;
00715 
00716 
00717     // ----- constants ------------------------------------------------------
00718 
00719     public:
00720         /**
00721         * String referencing NULL.
00722         *
00723         * This constant is generally only needed for defining a default
00724         * value for a function parameter:
00725         *
00726         * @code
00727         * void function(String::View vs = String::NULL_STRING)
00728         * @endcode
00729         *
00730         * Simply passing NULL as a default is not allowable for Strings as due
00731         * to auto-boxing the compiler is unable to determine if NULL indicates
00732         * a String* or a char*. For all other uses of NULL with String the
00733         * literal NULL is preferred.
00734         */
00735         static const char* const NULL_STRING;
00736     };
00737 
00738 
00739 // ----- helper macros ------------------------------------------------------
00740 
00741 /**
00742 * This macro will take any set of streamable contents and turn them into a
00743 * coherence#lang#String instance.
00744 *
00745 * @param CONTENTS  the contents to use in constructing the String.
00746 *
00747 * Usage example:
00748 * @code
00749 * String::Handle hsFoo = COH_TO_STRING("This value: " << 5 << " is my value");
00750 * @endcode
00751 */
00752 #define COH_TO_STRING(CONTENTS) \
00753     coherence::lang::String::create(((std::stringstream&) \
00754             (*(std::auto_ptr<std::stringstream>(new std::stringstream())) \
00755                 << CONTENTS)).str())
00756 
00757 COH_CLOSE_NAMESPACE2
00758 
00759 #endif // COH_STRING_HPP
Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved.