00001 /* 00002 * String.hpp 00003 * 00004 * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved. 00005 * 00006 * Oracle is a registered trademarks of Oracle Corporation and/or its 00007 * affiliates. 00008 * 00009 * This software is the confidential and proprietary information of Oracle 00010 * Corporation. You shall not disclose such confidential and proprietary 00011 * information and shall use it only in accordance with the terms of the 00012 * license agreement you entered into with Oracle. 00013 * 00014 * This notice may not be removed or altered. 00015 */ 00016 /* 00017 * String.hpp 00018 * 00019 * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved. 00020 * 00021 * Oracle is a registered trademarks of Oracle Corporation and/or its 00022 * affiliates. 00023 * 00024 * This software is the confidential and proprietary information of Oracle 00025 * Corporation. You shall not disclose such confidential and proprietary 00026 * information and shall use it only in accordance with the terms of the 00027 * license agreement you entered into with Oracle. 00028 * 00029 * This notice may not be removed or altered. 00030 */ 00031 #ifndef COH_STRING_HPP 00032 #define COH_STRING_HPP 00033 00034 #include "coherence/lang/compatibility.hpp" 00035 00036 #include "coherence/lang/Array.hpp" 00037 #include "coherence/lang/Comparable.hpp" 00038 #include "coherence/lang/Object.hpp" 00039 00040 #include <memory> 00041 #include <ostream> 00042 #include <sstream> 00043 #include <string> 00044 00045 COH_OPEN_NAMESPACE2(coherence,lang) 00046 00047 /** 00048 * @internal 00049 * 00050 * Used to protect protected inheritance of Array<octet_t> by String, as 00051 * spec based class definitions don't have a notion of protected 00052 * inheritance. 00053 */ 00054 class COH_EXPORT_SPEC ProtectedOctetArray 00055 : protected Array<octet_t> 00056 { 00057 public: 00058 typedef Array<octet_t>::super super; 00059 typedef Array<octet_t>::alias alias; 00060 00061 protected: 00062 ProtectedOctetArray(size32_t cb, octet_t* ab) 00063 : Array<octet_t>(cb, ab) 00064 {} 00065 00066 ProtectedOctetArray(ProtectedOctetArray::View vThat, 00067 size32_t iFrom, size32_t iTo) 00068 : Array<octet_t>(vThat, iFrom, iTo) 00069 {} 00070 00071 virtual ~ProtectedOctetArray() 00072 {} 00073 }; 00074 00075 00076 /** 00077 * A managed C-style (NUL terminated) string. 00078 * 00079 * In addition to exposing the underlying char array, the String class 00080 * supports transformations to and from Unicode code points within the Basic 00081 * Multilingual Plane (BMP): 00082 * 00083 * <ul> 00084 * <li>UTF-8 BMP char array</li> 00085 * <li>UTF-16 BMP wchar_t array (on platforms where wchar_t is >= 16 bits)</li> 00086 * <li>UTF-8 BMP octet_t array</li> 00087 * <li>UTF-16 BMP char16_t array</li> 00088 * </ul> 00089 * 00090 * Note: the ASCII character set is a subset of UTF-8 BMP. 00091 * 00092 * Unlike most managed types in the Coherence class hierarchy, Strings are 00093 * auto-boxable by default. That is a String::Handle or String::View can be 00094 * directly assigned from or to common string representations. For example 00095 * the following code is legal: 00096 * @code 00097 * String::Handle hs = "hello world"; 00098 * @endcode 00099 * as is 00100 * @code 00101 * void someFunction(String::View vs); 00102 * 00103 * someFunction("some value"); 00104 * @endcode 00105 * 00106 * @see StringHandle for details 00107 * 00108 * @author mf/jh/djl 2007.07.05 00109 */ 00110 class COH_EXPORT String 00111 : public cloneable_spec<String, 00112 extends<ProtectedOctetArray>, 00113 implements<Comparable> > 00114 { 00115 friend class factory<String>; 00116 00117 // ----- constants ------------------------------------------------------ 00118 00119 public: 00120 /** 00121 * The largest possible value of type size32_t. 00122 */ 00123 static const size32_t npos = size32_t(-1); 00124 00125 00126 // ----- typedefs ------------------------------------------------------- 00127 00128 public: 00129 /** 00130 * While StringHandle boxes a number of common string types, String is 00131 * still compatible with BoxHandle, and when used with it can box to 00132 * only one type. By default Strings are boxable from a number of 00133 * types, see StringHandle for details. 00134 */ 00135 typedef std::string BoxedType; 00136 00137 00138 // ----- nested class: StringHandle ------------------------------------- 00139 00140 public: 00141 /** 00142 * StringHandle provides standard TypedHandle features as well as 00143 * auto-boxing support for standard string types including: 00144 * 00145 * <ul> 00146 * <li>char[] C-style NUL terminated char array</li> 00147 * <li>std::string STL string</li> 00148 * <li>std::wstring STL wide string</li> 00149 * </ul> 00150 * 00151 * Boxing from wchar_t[] is supported, but requires an explicit 00152 * constructor call in order to avoid ambiguity when assigning a 00153 * String handle/view to NULL. 00154 * 00155 * Unboxing to char[] and wchar[] is not supported as it is unsafe to 00156 * maintain a reference to the underlying character array without 00157 * holding a reference to the String. Unboxing to std::string, and 00158 * std::wstring is both supported and safe. 00159 */ 00160 template<class T> class StringHandle 00161 : public TypedHandle<T> 00162 { 00163 // ----- constructors --------------------------------------- 00164 00165 public: 00166 /** 00167 * Create an empty StringHandle. 00168 */ 00169 StringHandle() 00170 : TypedHandle<T>() 00171 { 00172 } 00173 00174 /** 00175 * Create a new StringHandle from a boxable type. 00176 */ 00177 StringHandle(const char* ach) 00178 : TypedHandle<T>() 00179 { 00180 if (NULL != ach) 00181 { 00182 TypedHandle<T>::operator=(T::create(ach)); 00183 } 00184 } 00185 00186 /** 00187 * Create a new StringHandle from a boxable type. 00188 */ 00189 explicit StringHandle(const wchar_t* ach) 00190 : TypedHandle<T>() 00191 { 00192 if (NULL != ach) 00193 { 00194 TypedHandle<T>::operator=(T::create(ach)); 00195 } 00196 } 00197 00198 /** 00199 * Create a new StringHandle from a boxable type. 00200 */ 00201 template<class C, class R, class A> 00202 StringHandle(const std::basic_string<C, R, A>& s) 00203 : TypedHandle<T>(T::create(s)) 00204 { 00205 } 00206 00207 /** 00208 * Create a new StringHandle from the TypedHandle with a type 00209 * conversion. 00210 */ 00211 template<class O> StringHandle<T>(const TypedHandle<O>& that) 00212 : TypedHandle<T>(that) 00213 { 00214 } 00215 00216 /** 00217 * The copy constructor. 00218 */ 00219 StringHandle(const StringHandle& that) 00220 : TypedHandle<T>(that) 00221 { 00222 } 00223 00224 /** 00225 * Create a new StringHandle from the raw pointer. 00226 */ 00227 explicit StringHandle(T* o) 00228 : TypedHandle<T>(o) 00229 { 00230 } 00231 00232 // ----- operators ------------------------------------------ 00233 00234 public: 00235 /** 00236 * The assignment operator. 00237 */ 00238 template<class O> 00239 StringHandle& operator=(const TypedHandle<O>& that) 00240 { 00241 TypedHandle<T>::operator=(that); 00242 return *this; 00243 } 00244 00245 /** 00246 * The "boxing" operator. 00247 */ 00248 StringHandle& operator=(const char* ach) 00249 { 00250 if (NULL == ach) 00251 { 00252 TypedHandle<T>::operator=(NULL); 00253 } 00254 else 00255 { 00256 TypedHandle<T>::operator=(T::create(ach)); 00257 } 00258 return *this; 00259 } 00260 00261 /** 00262 * The "boxing" operator. 00263 */ 00264 template<class C, class R, class A> 00265 StringHandle& operator=(const std::basic_string<C, R, A>& s) 00266 { 00267 TypedHandle<T>::operator=(T::create(s)); 00268 return *this; 00269 } 00270 00271 /** 00272 * The "unboxing" operator. 00273 * 00274 * @return a copy of the referenced Object 00275 */ 00276 template<class C, class R, class A> 00277 operator std::basic_string<C, R, A>() const 00278 { 00279 const T* pT = TypedHandle<T>::get(); 00280 if (NULL == pT) 00281 { 00282 coh_throw_npe(typeid(T)); 00283 } 00284 return (std::basic_string<C, R, A>) *pT; 00285 } 00286 }; 00287 00288 // ----- handle definitions --------------------------------------------- 00289 00290 public: 00291 /** 00292 * Handle definition. 00293 */ 00294 typedef StringHandle<String> Handle; 00295 00296 /** 00297 * View definition. 00298 */ 00299 typedef StringHandle<const String> View; 00300 00301 00302 // ----- factory methods ------------------------------------------------ 00303 00304 public: 00305 /** 00306 * Create a String from a C-style NUL terminated char array. 00307 * 00308 * @param ach the NUL terminated string of chars to copy 00309 * @param cch the number of chars to copy; if npos, until NUL 00310 * 00311 * @throws IllegalArgumentException if any of the elements in the 00312 * array are not UTF-8 BMP 00313 */ 00314 static String::Handle create(const char* achSrc = "", size32_t cch = npos); 00315 00316 /** 00317 * Create a String from a C-style NUL terminated wide char array. 00318 * 00319 * @param ach the NUL terminated string of wide chars to copy 00320 * @param cch the number of chars to copy; if npos, copy until NUL 00321 * 00322 * @throws IllegalArgumentException if any of the elements in the 00323 * array are not UTF-16 BMP 00324 */ 00325 static String::Handle create(const wchar_t* achSrc, size32_t cch = npos); 00326 00327 /** 00328 * Create a String from an STL string. 00329 * 00330 * @param s the STL string to copy 00331 * 00332 * @throws IllegalArgumentException if any of the elements in the 00333 * array are not UTF-8 BMP 00334 */ 00335 template<class C, class R, class A> static COH_INLINE 00336 String::Handle create(const std::basic_string<C, R, A>& s) 00337 { 00338 size_t cch = s.size(); 00339 if (cch >= npos) // for 64b 00340 { 00341 coh_throw_illegal_argument("maximum String length exceeded"); 00342 } 00343 return String::create(s.data(), size32_t(cch)); 00344 } 00345 00346 /** 00347 * Create a String from a char array. 00348 * 00349 * @param vach the array of chars to copy 00350 * @param of the offset at which to start copying 00351 * @param cch the number of chars to copy; if npos, copy all 00352 * subsequent chars in the array 00353 * 00354 * @throws IndexOutOfBoundsException if of > vach->length or if 00355 * cch < npos and of + cch > vach->length 00356 * @throws IllegalArgumentException if any of the elements in the 00357 * array are not UTF-8 BMP 00358 */ 00359 static String::Handle create(Array<char>::View vachSrc, 00360 size32_t of = 0, size32_t cch = npos); 00361 00362 /** 00363 * Create a String from a wide char array. 00364 * 00365 * @param vach the array of chars to copy 00366 * @param of the offset at which to start copying 00367 * @param cch the number of chars to copy; if npos, copy all 00368 * subsequent chars in the array 00369 * 00370 * @throws IndexOutOfBoundsException if of > vach->length or if 00371 * cch < npos and of + cch > vach->length 00372 * @throws IllegalArgumentException if any of the elements in the 00373 * array are not UTF-16 BMP 00374 * @throws UnsupportedOperationException if sizeof(wchar_t) < 00375 * sizeof(char16_t) 00376 */ 00377 static String::Handle create(Array<wchar_t>::View vachSrc, 00378 size32_t of = 0, size32_t cch = npos); 00379 00380 /** 00381 * Create a String from an octet array. 00382 * 00383 * @param vab the array of octets to copy 00384 * @param of the offset at which to start copying 00385 * @param cb the number of octets to copy; if npos, copy all 00386 * subsequent octets in the array 00387 * 00388 * @throws IndexOutOfBoundsException if of > vab->length or if 00389 * cb < npos and of + cb > vab->length 00390 * @throws IllegalArgumentException if any of the elements in the 00391 * array are not UTF-8 BMP 00392 */ 00393 static String::Handle create(Array<octet_t>::View vabSrc, 00394 size32_t of = 0, size32_t cb = npos); 00395 00396 /** 00397 * Create a String from a 16-bit char array. 00398 * 00399 * @param vach the array of chars to copy 00400 * @param of the offset at which to start copying 00401 * @param cch the number of chars to copy; if npos, copy all 00402 * subsequent chars in the array 00403 * 00404 * @throws IndexOutOfBoundsException if of > vach->length or if 00405 * cch < npos and of + cch > vach->length 00406 * @throws IllegalArgumentException if any of the elements in the 00407 * array are not UTF-16 BMP 00408 */ 00409 static String::Handle create(Array<char16_t>::View vachSrc, 00410 size32_t of = 0, size32_t cch = npos); 00411 00412 00413 // ----- constructors --------------------------------------------------- 00414 00415 private: 00416 /** 00417 * Constructor. 00418 * 00419 * @param ccp the number of code points in the string 00420 * @param cb the number of octets in the string 00421 * @param ab the String's octets 00422 */ 00423 String(size32_t ccp, size32_t cb, octet_t* ab); 00424 00425 /** 00426 * Copy constructor. 00427 */ 00428 String(const String& that); 00429 00430 00431 // ----- String interface ----------------------------------------------- 00432 00433 public: 00434 /** 00435 * Return true iff the String contains only ASCII (ISO-8859-1) 00436 * characters. In this case each character is represented by a single 00437 * char, otherwise a character can take between one and three chars. 00438 * 00439 * @return true iff the String contains only ASCII characters 00440 */ 00441 virtual bool isASCII() const; 00442 00443 /** 00444 * Return the number of unicode code points (characters) in this String. 00445 * 00446 * @return the number of characters in this String 00447 */ 00448 virtual size32_t length() const; 00449 00450 /** 00451 * Return the String as a C-style NUL terminated char array. 00452 * 00453 * If the String is non-ASCII then the String::next() method may be 00454 * used to expand the char array into a sequence of char16_t unicode 00455 * characters. 00456 * 00457 * The returned array's lifetime is bound to the lifetime of the 00458 * String which it was returned from. Specifically it is unsafe to use 00459 * the returned char* while not holding a handle to the String. 00460 * 00461 * @return the char array representing the String. 00462 */ 00463 virtual const char* getCString() const; 00464 00465 /** 00466 * Compare this String against the supplied C-style string. 00467 * 00468 * @param ach the NUL terminated C-style string to compare to this 00469 * String 00470 * @param cch the length of the supplied string, or npos to rely on 00471 * NUL terminator 00472 * 00473 * @return true iff the two strings are identical 00474 */ 00475 virtual bool equals(const char* ach, size32_t cch = npos) const; 00476 00477 /** 00478 * Compare this String against the supplied C-style wide char string. 00479 * 00480 * @param ach the NUL terminated C-style string to compare to this 00481 * String 00482 * @param cch the length of the supplied string, or npos to rely on 00483 * NUL terminator 00484 * 00485 * @return true iff the two strings are identical 00486 * 00487 * @throws UnsupportedOperationException if sizeof(wchar_t) < sizeof(char16_t) 00488 */ 00489 virtual bool equals(const wchar_t* ach, size32_t cch = npos) const; 00490 00491 /** 00492 * Compare this String against the supplied STL string or wstring. 00493 * 00494 * @param s the STL string to compare to this String 00495 * 00496 * @return true iff the two strings are identical 00497 */ 00498 template<class C, class R, class A> COH_INLINE 00499 bool equalsStd(const std::basic_string<C, R, A>& s) const 00500 { 00501 size_t cch = s.size(); 00502 return cch < npos && equals(s.data(), size32_t(cch)); 00503 } 00504 00505 /** 00506 * Convert the String to any of the types supported by StringHandle, 00507 * namely an STL string or wstring. 00508 * 00509 * @return the std::string/wstring representation 00510 */ 00511 template<class C, class R, class A> COH_INLINE 00512 operator std::basic_string<C, R, A>() const 00513 { 00514 if (sizeof(C) == sizeof(octet_t)) 00515 { 00516 return std::basic_string<C, R, A>((const C*) getCString(), 00517 length()); 00518 } 00519 00520 if (sizeof(C) < sizeof(char16_t)) 00521 { 00522 coh_throw_unsupported_operation("unsupported string type"); 00523 } 00524 00525 typename std::basic_string<C, R, A>::size_type cch = 00526 typename std::basic_string<C, R, A>::size_type(length()); 00527 const char* iter = getCString(); 00528 std::basic_string<C, R, A> ws; 00529 ws.reserve(cch); 00530 for (typename std::basic_string<C, R, A>::size_type 00531 i = 0; i < cch; ++i) 00532 { 00533 ws.push_back((C) String::next(iter)); 00534 } 00535 return ws; 00536 } 00537 00538 /** 00539 * Return the index of a substring within this String. 00540 * 00541 * @param vsSearch the substring to search for in vsSource 00542 * @param iBegin the location in the string to start searching 00543 * 00544 * @return the index of the substring found within this String or npos 00545 */ 00546 virtual size32_t indexOf(String::View vsSearch, 00547 size32_t iBegin = 0) const; 00548 00549 /** 00550 * Return the index of a character within this String. 00551 * 00552 * @param chSearch the character to search for in this String 00553 * @param iBegin the location in this String to start searching 00554 * 00555 * @return the index of the character found within this String or npos 00556 */ 00557 virtual size32_t indexOf(char16_t chSearch, 00558 size32_t iBegin = 0) const; 00559 00560 /** 00561 * Return the index of a substring within this String by searching 00562 * backward from the given beginning index. 00563 * 00564 * @param vsSearh the substring to search for within this String 00565 * @param iBegin the location in this String to start searching 00566 * 00567 * @return the index of the substring found within this String or npos 00568 */ 00569 virtual size32_t lastIndexOf(String::View vsSearch, 00570 size32_t iBegin = npos) const; 00571 00572 /** 00573 * Return the index of a substring within this String by searching 00574 * backward from the given beginning index. 00575 * 00576 * @param chSearch the character to search for in this String 00577 * @param iBegin the location in this String to start searching 00578 * 00579 * @return the index of the character found within this String or npos 00580 */ 00581 virtual size32_t lastIndexOf(char16_t chSearch, 00582 size32_t iBegin = npos) const; 00583 00584 /** 00585 * Return a new String comprised of the substring of this string 00586 * from iBegin (inclusive) to iEnd (exclusive). 00587 * 00588 * @param iBegin the starting index from which to create the string 00589 * @param iEnd the index of where the substring should stop 00590 * in this String or npos for end of string 00591 * 00592 * @return the new substring created from this String 00593 */ 00594 virtual String::View substring(size32_t iBegin, 00595 size32_t iEnd = npos) const; 00596 00597 /** 00598 * Return true if this String starts with the supplied String. 00599 * 00600 * @param vsSearch the string to search for 00601 * 00602 * @return true if this String starts with vsSearch 00603 */ 00604 virtual bool startsWith(String::View vsSearch) const; 00605 00606 /** 00607 * Return true if this String ends with the supplied String. 00608 * 00609 * @param vsSearch the string to search for 00610 * 00611 * @return true if this String ends with vsSearch 00612 */ 00613 virtual bool endsWith(String::View vsSearch) const; 00614 00615 /** 00616 * A substring of this String is compared to a substring of a supplied 00617 * String. 00618 * 00619 * @param ofSource the offset in this String where comparison begins 00620 * @param vsOther the String whose substring is compared against 00621 * this String 00622 * @param ofOther the offset in vsOther where comparison begins 00623 * @param cch the count of characters to compare 00624 * 00625 * @return the result of the two substrings 00626 */ 00627 virtual bool regionMatches(size32_t ofSourse, 00628 String::View vsOther, size32_t ofOther = 0, 00629 size32_t cch = npos) const; 00630 00631 /** 00632 * Return a String that is the result of removing all leading and 00633 * trailing white space. 00634 * 00635 * @return a trimmed copy of this String 00636 */ 00637 String::View trim() const; 00638 00639 /** 00640 * Return the underlying UTF-8 BMP NUL terminated Array<octet_t>. 00641 * 00642 * For performance reasons the returned Array may not support cloning. 00643 * If clone() is called the result will a String, which depending on 00644 * the compiler's handling of dynamic_cast to a private super class may 00645 * fail to be castable to an Array<octet_t>. 00646 * 00647 * @return the Array<octet_t> 00648 */ 00649 virtual Array<octet_t>::View getOctets() const; 00650 00651 00652 // ----- Comparable interface ------------------------------------------- 00653 00654 public: 00655 /** 00656 * {@inheritDoc} 00657 */ 00658 virtual int32_t compareTo(Object::View v) const; 00659 00660 00661 // ----- Object interface ----------------------------------------------- 00662 00663 public: 00664 /** 00665 * {@inheritDoc} 00666 */ 00667 virtual size32_t hashCode() const; 00668 00669 /** 00670 * {@inheritDoc} 00671 */ 00672 virtual void toStream(std::ostream& out) const; 00673 00674 /** 00675 * {@inheritDoc} 00676 */ 00677 virtual bool isImmutable() const; 00678 00679 /** 00680 * {@inheritDoc} 00681 */ 00682 virtual bool equals(Object::View v) const; 00683 00684 /** 00685 * {@inheritDoc} 00686 */ 00687 virtual size32_t sizeOf() const; 00688 00689 00690 // ----- static helpers ------------------------------------------------- 00691 00692 public: 00693 /** 00694 * Return the Unicode character as UTF-16 from the char array, and 00695 * increment the pointer such that it references the start of the 00696 * next Unicode character. 00697 * 00698 * @param ach pointer to the start of the next UTF-8 code point. 00699 * 00700 * @return the next Unicode character 00701 * 00702 * @throws IllegalArgumentException if a non UTF-8 BMP sequence is 00703 * encountered 00704 */ 00705 static char16_t next(const char*& ach); 00706 00707 00708 // ----- data members --------------------------------------------------- 00709 00710 protected: 00711 /** 00712 * The number of unicode code points (characters) in the String. 00713 */ 00714 size32_t m_ccp; 00715 00716 00717 // ----- constants ------------------------------------------------------ 00718 00719 public: 00720 /** 00721 * String referencing NULL. 00722 * 00723 * This constant is generally only needed for defining a default 00724 * value for a function parameter: 00725 * 00726 * @code 00727 * void function(String::View vs = String::NULL_STRING) 00728 * @endcode 00729 * 00730 * Simply passing NULL as a default is not allowable for Strings as due 00731 * to auto-boxing the compiler is unable to determine if NULL indicates 00732 * a String* or a char*. For all other uses of NULL with String the 00733 * literal NULL is preferred. 00734 */ 00735 static const char* const NULL_STRING; 00736 }; 00737 00738 00739 // ----- helper macros ------------------------------------------------------ 00740 00741 /** 00742 * This macro will take any set of streamable contents and turn them into a 00743 * coherence#lang#String instance. 00744 * 00745 * @param CONTENTS the contents to use in constructing the String. 00746 * 00747 * Usage example: 00748 * @code 00749 * String::Handle hsFoo = COH_TO_STRING("This value: " << 5 << " is my value"); 00750 * @endcode 00751 */ 00752 #define COH_TO_STRING(CONTENTS) \ 00753 coherence::lang::String::create(((std::stringstream&) \ 00754 (*(std::auto_ptr<std::stringstream>(new std::stringstream())) \ 00755 << CONTENTS)).str()) 00756 00757 COH_CLOSE_NAMESPACE2 00758 00759 #endif // COH_STRING_HPP