00001 /* 00002 * String.hpp 00003 * 00004 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. 00005 * 00006 * Oracle is a registered trademarks of Oracle Corporation and/or its 00007 * affiliates. 00008 * 00009 * This software is the confidential and proprietary information of Oracle 00010 * Corporation. You shall not disclose such confidential and proprietary 00011 * information and shall use it only in accordance with the terms of the 00012 * license agreement you entered into with Oracle. 00013 * 00014 * This notice may not be removed or altered. 00015 */ 00016 #ifndef COH_STRING_HPP 00017 #define COH_STRING_HPP 00018 00019 #include "coherence/lang/compatibility.hpp" 00020 00021 #include "coherence/lang/Array.hpp" 00022 #include "coherence/lang/Comparable.hpp" 00023 #include "coherence/lang/Object.hpp" 00024 00025 #include <memory> 00026 #include <ostream> 00027 #include <sstream> 00028 #include <string> 00029 00030 COH_OPEN_NAMESPACE2(coherence,lang) 00031 00032 /** 00033 * @internal 00034 * 00035 * Used to protect protected inheritance of Array<octet_t> by String, as 00036 * spec based class definitions don't have a notion of protected 00037 * inheritance. 00038 */ 00039 class COH_EXPORT_SPEC ProtectedOctetArray 00040 : protected Array<octet_t> 00041 { 00042 public: 00043 typedef Array<octet_t>::super super; 00044 typedef Array<octet_t>::alias alias; 00045 00046 protected: 00047 ProtectedOctetArray(size32_t cb, octet_t* ab) 00048 : Array<octet_t>(cb, ab) 00049 {} 00050 00051 ProtectedOctetArray(ProtectedOctetArray::View vThat, 00052 size32_t iFrom, size32_t iTo) 00053 : Array<octet_t>(vThat, iFrom, iTo) 00054 {} 00055 00056 virtual ~ProtectedOctetArray() 00057 {} 00058 }; 00059 00060 00061 /** 00062 * A managed C-style (NUL terminated) string. 00063 * 00064 * In addition to exposing the underlying char array, the String class 00065 * supports transformations to and from Unicode code points within the Basic 00066 * Multilingual Plane (BMP): 00067 * 00068 * <ul> 00069 * <li>UTF-8 BMP char array</li> 00070 * <li>UTF-16 BMP wchar_t array (on platforms where wchar_t is >= 16 bits)</li> 00071 * <li>UTF-8 BMP octet_t array</li> 00072 * <li>UTF-16 BMP char16_t array</li> 00073 * </ul> 00074 * 00075 * Note: the ASCII character set is a subset of UTF-8 BMP. 00076 * 00077 * Unlike most managed types in the Coherence class hierarchy, Strings are 00078 * auto-boxable by default. That is a String::Handle or String::View can be 00079 * directly assigned from or to common string representations. For example 00080 * the following code is legal: 00081 * @code 00082 * String::Handle hs = "hello world"; 00083 * @endcode 00084 * as is 00085 * @code 00086 * void someFunction(String::View vs); 00087 * 00088 * someFunction("some value"); 00089 * @endcode 00090 * 00091 * @see StringHandle for details 00092 * 00093 * @author mf/jh/djl 2007.07.05 00094 */ 00095 class COH_EXPORT String 00096 : public cloneable_spec<String, 00097 extends<ProtectedOctetArray>, 00098 implements<Comparable> > 00099 { 00100 friend class factory<String>; 00101 00102 // ----- constants ------------------------------------------------------ 00103 00104 public: 00105 /** 00106 * The largest possible value of type size32_t. 00107 */ 00108 static const size32_t npos = size32_t(-1); 00109 00110 00111 // ----- typedefs ------------------------------------------------------- 00112 00113 public: 00114 /** 00115 * While StringHandle boxes a number of common string types, String is 00116 * still compatible with BoxHandle, and when used with it can box to 00117 * only one type. By default Strings are boxable from a number of 00118 * types, see StringHandle for details. 00119 */ 00120 typedef std::string BoxedType; 00121 00122 00123 // ----- nested class: StringHandle ------------------------------------- 00124 00125 public: 00126 /** 00127 * StringHandle provides standard TypedHandle features as well as 00128 * auto-boxing support for standard string types including: 00129 * 00130 * <ul> 00131 * <li>char[] C-style NUL terminated char array</li> 00132 * <li>std::string STL string</li> 00133 * <li>std::wstring STL wide string</li> 00134 * </ul> 00135 * 00136 * Boxing from wchar_t[] is supported, but requires an explicit 00137 * constructor call in order to avoid ambiguity when assigning a 00138 * String handle/view to NULL. 00139 * 00140 * Unboxing to char[] and wchar[] is not supported as it is unsafe to 00141 * maintain a reference to the underlying character array without 00142 * holding a reference to the String. Unboxing to std::string, and 00143 * std::wstring is both supported and safe. 00144 */ 00145 template<class T> class StringHandle 00146 : public TypedHandle<T> 00147 { 00148 // ----- constructors --------------------------------------- 00149 00150 public: 00151 /** 00152 * Create an empty StringHandle. 00153 */ 00154 StringHandle() 00155 : TypedHandle<T>() 00156 { 00157 } 00158 00159 /** 00160 * Create a new StringHandle from a boxable type. 00161 */ 00162 StringHandle(const char* ach) 00163 : TypedHandle<T>() 00164 { 00165 if (NULL != ach) 00166 { 00167 TypedHandle<T>::operator=(T::create(ach)); 00168 } 00169 } 00170 00171 /** 00172 * Create a new StringHandle from a boxable type. 00173 */ 00174 explicit StringHandle(const wchar_t* ach) 00175 : TypedHandle<T>() 00176 { 00177 if (NULL != ach) 00178 { 00179 TypedHandle<T>::operator=(T::create(ach)); 00180 } 00181 } 00182 00183 /** 00184 * Create a new StringHandle from a boxable type. 00185 */ 00186 template<class C, class R, class A> 00187 StringHandle(const std::basic_string<C, R, A>& s) 00188 : TypedHandle<T>(T::create(s)) 00189 { 00190 } 00191 00192 /** 00193 * Create a new StringHandle from the TypedHandle with a type 00194 * conversion. 00195 */ 00196 template<class O> StringHandle<T>(const TypedHandle<O>& that) 00197 : TypedHandle<T>(that) 00198 { 00199 } 00200 00201 /** 00202 * The copy constructor. 00203 */ 00204 StringHandle(const StringHandle& that) 00205 : TypedHandle<T>(that) 00206 { 00207 } 00208 00209 /** 00210 * Create a new StringHandle from the raw pointer. 00211 */ 00212 explicit StringHandle(T* o) 00213 : TypedHandle<T>(o) 00214 { 00215 } 00216 00217 // ----- operators ------------------------------------------ 00218 00219 public: 00220 /** 00221 * The assignment operator. 00222 */ 00223 template<class O> 00224 StringHandle& operator=(const TypedHandle<O>& that) 00225 { 00226 TypedHandle<T>::operator=(that); 00227 return *this; 00228 } 00229 00230 /** 00231 * The "boxing" operator. 00232 */ 00233 StringHandle& operator=(const char* ach) 00234 { 00235 if (NULL == ach) 00236 { 00237 TypedHandle<T>::operator=(NULL); 00238 } 00239 else 00240 { 00241 TypedHandle<T>::operator=(T::create(ach)); 00242 } 00243 return *this; 00244 } 00245 00246 /** 00247 * The "boxing" operator. 00248 */ 00249 template<class C, class R, class A> 00250 StringHandle& operator=(const std::basic_string<C, R, A>& s) 00251 { 00252 TypedHandle<T>::operator=(T::create(s)); 00253 return *this; 00254 } 00255 00256 /** 00257 * The "unboxing" operator. 00258 * 00259 * @return a copy of the referenced Object 00260 */ 00261 template<class C, class R, class A> 00262 operator std::basic_string<C, R, A>() const 00263 { 00264 const T* pT = TypedHandle<T>::get(); 00265 if (NULL == pT) 00266 { 00267 coh_throw_npe(typeid(T)); 00268 } 00269 return (std::basic_string<C, R, A>) *pT; 00270 } 00271 }; 00272 00273 // ----- handle definitions --------------------------------------------- 00274 00275 public: 00276 /** 00277 * Handle definition. 00278 */ 00279 typedef StringHandle<String> Handle; 00280 00281 /** 00282 * View definition. 00283 */ 00284 typedef StringHandle<const String> View; 00285 00286 00287 // ----- factory methods ------------------------------------------------ 00288 00289 public: 00290 /** 00291 * Create a String from a C-style NUL terminated char array. 00292 * 00293 * @param ach the NUL terminated string of chars to copy 00294 * @param cch the number of chars to copy; if npos, until NUL 00295 * 00296 * @throws IllegalArgumentException if any of the elements in the 00297 * array are not UTF-8 BMP 00298 */ 00299 static String::Handle create(const char* achSrc = "", size32_t cch = npos); 00300 00301 /** 00302 * Create a String from a C-style NUL terminated wide char array. 00303 * 00304 * @param ach the NUL terminated string of wide chars to copy 00305 * @param cch the number of chars to copy; if npos, copy until NUL 00306 * 00307 * @throws IllegalArgumentException if any of the elements in the 00308 * array are not UTF-16 BMP 00309 */ 00310 static String::Handle create(const wchar_t* achSrc, size32_t cch = npos); 00311 00312 /** 00313 * Create a String from an STL string. 00314 * 00315 * @param s the STL string to copy 00316 * 00317 * @throws IllegalArgumentException if any of the elements in the 00318 * array are not UTF-8 BMP 00319 */ 00320 template<class C, class R, class A> static COH_INLINE 00321 String::Handle create(const std::basic_string<C, R, A>& s) 00322 { 00323 size_t cch = s.size(); 00324 if (cch >= npos) // for 64b 00325 { 00326 coh_throw_illegal_argument("maximum String length exceeded"); 00327 } 00328 return String::create(s.data(), size32_t(cch)); 00329 } 00330 00331 /** 00332 * Create a String from a char array. 00333 * 00334 * @param vach the array of chars to copy 00335 * @param of the offset at which to start copying 00336 * @param cch the number of chars to copy; if npos, copy all 00337 * subsequent chars in the array 00338 * 00339 * @throws IndexOutOfBoundsException if of > vach->length or if 00340 * cch < npos and of + cch > vach->length 00341 * @throws IllegalArgumentException if any of the elements in the 00342 * array are not UTF-8 BMP 00343 */ 00344 static String::Handle create(Array<char>::View vachSrc, 00345 size32_t of = 0, size32_t cch = npos); 00346 00347 /** 00348 * Create a String from a wide char array. 00349 * 00350 * @param vach the array of chars to copy 00351 * @param of the offset at which to start copying 00352 * @param cch the number of chars to copy; if npos, copy all 00353 * subsequent chars in the array 00354 * 00355 * @throws IndexOutOfBoundsException if of > vach->length or if 00356 * cch < npos and of + cch > vach->length 00357 * @throws IllegalArgumentException if any of the elements in the 00358 * array are not UTF-16 BMP 00359 * @throws UnsupportedOperationException if sizeof(wchar_t) < 00360 * sizeof(char16_t) 00361 */ 00362 static String::Handle create(Array<wchar_t>::View vachSrc, 00363 size32_t of = 0, size32_t cch = npos); 00364 00365 /** 00366 * Create a String from an octet array. 00367 * 00368 * @param vab the array of octets to copy 00369 * @param of the offset at which to start copying 00370 * @param cb the number of octets to copy; if npos, copy all 00371 * subsequent octets in the array 00372 * 00373 * @throws IndexOutOfBoundsException if of > vab->length or if 00374 * cb < npos and of + cb > vab->length 00375 * @throws IllegalArgumentException if any of the elements in the 00376 * array are not UTF-8 BMP 00377 */ 00378 static String::Handle create(Array<octet_t>::View vabSrc, 00379 size32_t of = 0, size32_t cb = npos); 00380 00381 /** 00382 * Create a String from a 16-bit char array. 00383 * 00384 * @param vach the array of chars to copy 00385 * @param of the offset at which to start copying 00386 * @param cch the number of chars to copy; if npos, copy all 00387 * subsequent chars in the array 00388 * 00389 * @throws IndexOutOfBoundsException if of > vach->length or if 00390 * cch < npos and of + cch > vach->length 00391 * @throws IllegalArgumentException if any of the elements in the 00392 * array are not UTF-16 BMP 00393 */ 00394 static String::Handle create(Array<char16_t>::View vachSrc, 00395 size32_t of = 0, size32_t cch = npos); 00396 00397 /** 00398 * Create a String from another String. 00399 * 00400 * Needed for clone(). 00401 * 00402 * @param that the String to copy 00403 * 00404 * @since Coherence 3.7.1.8 00405 */ 00406 static String::Handle create(const String& that); 00407 00408 00409 // ----- constructors --------------------------------------------------- 00410 00411 private: 00412 /** 00413 * Constructor. 00414 * 00415 * @param ccp the number of code points in the string 00416 * @param cb the number of octets in the string 00417 * @param ab the String's octets 00418 */ 00419 String(size32_t ccp, size32_t cb, octet_t* ab); 00420 00421 /** 00422 * Copy constructor. 00423 */ 00424 String(const String& that); 00425 00426 00427 // ----- String interface ----------------------------------------------- 00428 00429 public: 00430 /** 00431 * Return true iff the String contains only ASCII (ISO-8859-1) 00432 * characters. In this case each character is represented by a single 00433 * char, otherwise a character can take between one and three chars. 00434 * 00435 * @return true iff the String contains only ASCII characters 00436 */ 00437 virtual bool isASCII() const; 00438 00439 /** 00440 * Return the number of unicode code points (characters) in this String. 00441 * 00442 * @return the number of characters in this String 00443 */ 00444 virtual size32_t length() const; 00445 00446 /** 00447 * Return the String as a C-style NUL terminated char array. 00448 * 00449 * If the String is non-ASCII then the String::next() method may be 00450 * used to expand the char array into a sequence of char16_t unicode 00451 * characters. 00452 * 00453 * The returned array's lifetime is bound to the lifetime of the 00454 * String which it was returned from. Specifically it is unsafe to use 00455 * the returned char* while not holding a handle to the String. 00456 * 00457 * @return the char array representing the String. 00458 */ 00459 virtual const char* getCString() const; 00460 00461 /** 00462 * Compare this String against the supplied C-style string. 00463 * 00464 * @param ach the NUL terminated C-style string to compare to this 00465 * String 00466 * @param cch the length of the supplied string, or npos to rely on 00467 * NUL terminator 00468 * 00469 * @return true iff the two strings are identical 00470 */ 00471 virtual bool equals(const char* ach, size32_t cch = npos) const; 00472 00473 /** 00474 * Compare this String against the supplied C-style wide char string. 00475 * 00476 * @param ach the NUL terminated C-style string to compare to this 00477 * String 00478 * @param cch the length of the supplied string, or npos to rely on 00479 * NUL terminator 00480 * 00481 * @return true iff the two strings are identical 00482 * 00483 * @throws UnsupportedOperationException if sizeof(wchar_t) < sizeof(char16_t) 00484 */ 00485 virtual bool equals(const wchar_t* ach, size32_t cch = npos) const; 00486 00487 /** 00488 * Compare this String against the supplied STL string or wstring. 00489 * 00490 * @param s the STL string to compare to this String 00491 * 00492 * @return true iff the two strings are identical 00493 */ 00494 template<class C, class R, class A> COH_INLINE 00495 bool equalsStd(const std::basic_string<C, R, A>& s) const 00496 { 00497 size_t cch = s.size(); 00498 return cch < npos && equals(s.data(), size32_t(cch)); 00499 } 00500 00501 /** 00502 * Convert the String to any of the types supported by StringHandle, 00503 * namely an STL string or wstring. 00504 * 00505 * @return the std::string/wstring representation 00506 */ 00507 template<class C, class R, class A> COH_INLINE 00508 operator std::basic_string<C, R, A>() const 00509 { 00510 if (sizeof(C) == sizeof(octet_t)) 00511 { 00512 return std::basic_string<C, R, A>((const C*) getCString(), 00513 super::length - 1); 00514 } 00515 00516 if (sizeof(C) < sizeof(char16_t)) 00517 { 00518 coh_throw_unsupported_operation("unsupported string type"); 00519 } 00520 00521 typename std::basic_string<C, R, A>::size_type cch = 00522 typename std::basic_string<C, R, A>::size_type(length()); 00523 const char* iter = getCString(); 00524 std::basic_string<C, R, A> ws; 00525 ws.reserve(cch); 00526 for (typename std::basic_string<C, R, A>::size_type 00527 i = 0; i < cch; ++i) 00528 { 00529 ws.push_back((C) String::next(iter)); 00530 } 00531 return ws; 00532 } 00533 00534 /** 00535 * Return the index of a substring within this String. 00536 * 00537 * @param vsSearch the substring to search for in vsSource 00538 * @param iBegin the location in the string to start searching 00539 * 00540 * @return the index of the substring found within this String or npos 00541 */ 00542 virtual size32_t indexOf(String::View vsSearch, 00543 size32_t iBegin = 0) const; 00544 00545 /** 00546 * Return the index of a character within this String. 00547 * 00548 * @param chSearch the character to search for in this String 00549 * @param iBegin the location in this String to start searching 00550 * 00551 * @return the index of the character found within this String or npos 00552 */ 00553 virtual size32_t indexOf(char16_t chSearch, 00554 size32_t iBegin = 0) const; 00555 00556 /** 00557 * Return the index of a substring within this String by searching 00558 * backward from the given beginning index. 00559 * 00560 * @param vsSearh the substring to search for within this String 00561 * @param iBegin the location in this String to start searching 00562 * 00563 * @return the index of the substring found within this String or npos 00564 */ 00565 virtual size32_t lastIndexOf(String::View vsSearch, 00566 size32_t iBegin = npos) const; 00567 00568 /** 00569 * Return the index of a substring within this String by searching 00570 * backward from the given beginning index. 00571 * 00572 * @param chSearch the character to search for in this String 00573 * @param iBegin the location in this String to start searching 00574 * 00575 * @return the index of the character found within this String or npos 00576 */ 00577 virtual size32_t lastIndexOf(char16_t chSearch, 00578 size32_t iBegin = npos) const; 00579 00580 /** 00581 * Return a new String comprised of the substring of this string 00582 * from iBegin (inclusive) to iEnd (exclusive). 00583 * 00584 * @param iBegin the starting index from which to create the string 00585 * @param iEnd the index of where the substring should stop 00586 * in this String or npos for end of string 00587 * 00588 * @return the new substring created from this String 00589 */ 00590 virtual String::View substring(size32_t iBegin, 00591 size32_t iEnd = npos) const; 00592 00593 /** 00594 * Return true if this String starts with the supplied String. 00595 * 00596 * @param vsSearch the string to search for 00597 * 00598 * @return true if this String starts with vsSearch 00599 */ 00600 virtual bool startsWith(String::View vsSearch) const; 00601 00602 /** 00603 * Return true if this String ends with the supplied String. 00604 * 00605 * @param vsSearch the string to search for 00606 * 00607 * @return true if this String ends with vsSearch 00608 */ 00609 virtual bool endsWith(String::View vsSearch) const; 00610 00611 /** 00612 * A substring of this String is compared to a substring of a supplied 00613 * String. 00614 * 00615 * @param ofSource the offset in this String where comparison begins 00616 * @param vsOther the String whose substring is compared against 00617 * this String 00618 * @param ofOther the offset in vsOther where comparison begins 00619 * @param cch the count of characters to compare, or npos for 00620 * (vsOther->length - ofOther) 00621 * 00622 * @return the result of the two substrings 00623 */ 00624 virtual bool regionMatches(size32_t ofSourse, 00625 String::View vsOther, size32_t ofOther = 0, 00626 size32_t cch = npos) const; 00627 00628 /** 00629 * Return a String that is the result of removing all leading and 00630 * trailing white space. 00631 * 00632 * @return a trimmed copy of this String 00633 */ 00634 String::View trim() const; 00635 00636 /** 00637 * Return the underlying UTF-8 BMP NUL terminated Array<octet_t>. 00638 * 00639 * For performance reasons the returned Array may not support cloning. 00640 * If clone() is called the result will a String, which depending on 00641 * the compiler's handling of dynamic_cast to a private super class may 00642 * fail to be castable to an Array<octet_t>. 00643 * 00644 * @return the Array<octet_t> 00645 */ 00646 virtual Array<octet_t>::View getOctets() const; 00647 00648 00649 // ----- Array interface ------------------------------------------------ 00650 00651 protected: 00652 using Array<octet_t>::regionMatches; 00653 00654 00655 // ----- Comparable interface ------------------------------------------- 00656 00657 public: 00658 /** 00659 * {@inheritDoc} 00660 */ 00661 virtual int32_t compareTo(Object::View v) const; 00662 00663 00664 // ----- Object interface ----------------------------------------------- 00665 00666 public: 00667 /** 00668 * {@inheritDoc} 00669 */ 00670 virtual size32_t hashCode() const; 00671 00672 /** 00673 * {@inheritDoc} 00674 */ 00675 virtual void toStream(std::ostream& out) const; 00676 00677 /** 00678 * {@inheritDoc} 00679 */ 00680 virtual bool isImmutable() const; 00681 00682 /** 00683 * {@inheritDoc} 00684 */ 00685 virtual bool equals(Object::View v) const; 00686 00687 /** 00688 * {@inheritDoc} 00689 */ 00690 virtual size64_t sizeOf(bool fDeep = false) const; 00691 00692 // ----- static helpers ------------------------------------------------- 00693 00694 public: 00695 /** 00696 * Return the Unicode character as UTF-16 from the char array, and 00697 * increment the pointer such that it references the start of the 00698 * next Unicode character. 00699 * 00700 * @param ach pointer to the start of the next UTF-8 code point. 00701 * 00702 * @return the next Unicode character 00703 * 00704 * @throws IllegalArgumentException if a non UTF-8 BMP sequence is 00705 * encountered 00706 */ 00707 static char16_t next(const char*& ach); 00708 00709 00710 // ----- data members --------------------------------------------------- 00711 00712 protected: 00713 /** 00714 * The number of unicode code points (characters) in the String. 00715 */ 00716 size32_t m_ccp; 00717 00718 00719 // ----- constants ------------------------------------------------------ 00720 00721 public: 00722 /** 00723 * String referencing NULL. 00724 * 00725 * This constant is generally only needed for defining a default 00726 * value for a function parameter: 00727 * 00728 * @code 00729 * void function(String::View vs = String::null_string) 00730 * @endcode 00731 * 00732 * Simply passing NULL as a default is not allowable for Strings as due 00733 * to auto-boxing the compiler is unable to determine if NULL indicates 00734 * a String* or a char*. For all other uses of NULL with String the 00735 * literal NULL is preferred. 00736 */ 00737 static const char* const null_string; 00738 }; 00739 00740 00741 // ----- helper macros ------------------------------------------------------ 00742 00743 /** 00744 * This macro will take any set of streamable contents and turn them into a 00745 * coherence#lang#String instance. 00746 * 00747 * @param CONTENTS the contents to use in constructing the String. 00748 * 00749 * Usage example: 00750 * @code 00751 * String::Handle hsFoo = COH_TO_STRING("This value: " << 5 << " is my value"); 00752 * @endcode 00753 */ 00754 #define COH_TO_STRING(CONTENTS) \ 00755 coherence::lang::String::create(((std::stringstream&) \ 00756 (*(std::auto_ptr<std::stringstream>(new std::stringstream())) \ 00757 << CONTENTS)).str()) 00758 00759 COH_CLOSE_NAMESPACE2 00760 00761 #endif // COH_STRING_HPP