unistr.h

Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1998-2012, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *
00007 * File unistr.h
00008 *
00009 * Modification History:
00010 *
00011 *   Date        Name        Description
00012 *   09/25/98    stephen     Creation.
00013 *   11/11/98    stephen     Changed per 11/9 code review.
00014 *   04/20/99    stephen     Overhauled per 4/16 code review.
00015 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
00016 *                           handleReplaceBetween(); other methods unchanged.
00017 *   06/25/01    grhoten     Remove dependency on iostream.
00018 ******************************************************************************
00019 */
00020 
00021 #ifndef UNISTR_H
00022 #define UNISTR_H
00023 
00029 #include "unicode/utypes.h"
00030 #include "unicode/rep.h"
00031 #include "unicode/std_string.h"
00032 #include "unicode/stringpiece.h"
00033 #include "unicode/bytestream.h"
00034 #include "unicode/ucasemap.h"
00035 
00036 struct UConverter;          // unicode/ucnv.h
00037 class  StringThreadTest;
00038 
00039 #ifndef U_COMPARE_CODE_POINT_ORDER
00040 /* see also ustring.h and unorm.h */
00046 #define U_COMPARE_CODE_POINT_ORDER  0x8000
00047 #endif
00048 
00049 #ifndef USTRING_H
00050 
00053 U_STABLE int32_t U_EXPORT2
00054 u_strlen(const UChar *s);
00055 #endif
00056 
00062 #ifndef U_STRING_CASE_MAPPER_DEFINED
00063 #define U_STRING_CASE_MAPPER_DEFINED
00064 
00069 typedef int32_t U_CALLCONV
00070 UStringCaseMapper(const UCaseMap *csm,
00071                   UChar *dest, int32_t destCapacity,
00072                   const UChar *src, int32_t srcLength,
00073                   UErrorCode *pErrorCode);
00074 
00075 #endif
00076 
00077 U_NAMESPACE_BEGIN
00078 
00079 class BreakIterator;        // unicode/brkiter.h
00080 class Locale;               // unicode/locid.h
00081 class StringCharacterIterator;
00082 class UnicodeStringAppendable;  // unicode/appendable.h
00083 
00084 /* The <iostream> include has been moved to unicode/ustream.h */
00085 
00096 #define US_INV icu::UnicodeString::kInvariant
00097 
00115 #if defined(U_DECLARE_UTF16)
00116 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
00117 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
00118 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
00119 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
00120 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
00121 #else
00122 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
00123 #endif
00124 
00138 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
00139 
00147 #ifndef UNISTR_FROM_CHAR_EXPLICIT
00148 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
00149     // Auto-"explicit" in ICU library code.
00150 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
00151 # else
00152     // Empty by default for source code compatibility.
00153 #   define UNISTR_FROM_CHAR_EXPLICIT
00154 # endif
00155 #endif
00156 
00167 #ifndef UNISTR_FROM_STRING_EXPLICIT
00168 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
00169     // Auto-"explicit" in ICU library code.
00170 #   define UNISTR_FROM_STRING_EXPLICIT explicit
00171 # else
00172     // Empty by default for source code compatibility.
00173 #   define UNISTR_FROM_STRING_EXPLICIT
00174 # endif
00175 #endif
00176 
00246 class U_COMMON_API UnicodeString : public Replaceable
00247 {
00248 public:
00249 
00258   enum EInvariant {
00263     kInvariant
00264   };
00265 
00266   //========================================
00267   // Read-only operations
00268   //========================================
00269 
00270   /* Comparison - bitwise only - for international comparison use collation */
00271 
00279   inline UBool operator== (const UnicodeString& text) const;
00280 
00288   inline UBool operator!= (const UnicodeString& text) const;
00289 
00297   inline UBool operator> (const UnicodeString& text) const;
00298 
00306   inline UBool operator< (const UnicodeString& text) const;
00307 
00315   inline UBool operator>= (const UnicodeString& text) const;
00316 
00324   inline UBool operator<= (const UnicodeString& text) const;
00325 
00337   inline int8_t compare(const UnicodeString& text) const;
00338 
00353   inline int8_t compare(int32_t start,
00354          int32_t length,
00355          const UnicodeString& text) const;
00356 
00374    inline int8_t compare(int32_t start,
00375          int32_t length,
00376          const UnicodeString& srcText,
00377          int32_t srcStart,
00378          int32_t srcLength) const;
00379 
00392   inline int8_t compare(const UChar *srcChars,
00393          int32_t srcLength) const;
00394 
00409   inline int8_t compare(int32_t start,
00410          int32_t length,
00411          const UChar *srcChars) const;
00412 
00430   inline int8_t compare(int32_t start,
00431          int32_t length,
00432          const UChar *srcChars,
00433          int32_t srcStart,
00434          int32_t srcLength) const;
00435 
00453   inline int8_t compareBetween(int32_t start,
00454             int32_t limit,
00455             const UnicodeString& srcText,
00456             int32_t srcStart,
00457             int32_t srcLimit) const;
00458 
00476   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
00477 
00497   inline int8_t compareCodePointOrder(int32_t start,
00498                                       int32_t length,
00499                                       const UnicodeString& srcText) const;
00500 
00522    inline int8_t compareCodePointOrder(int32_t start,
00523                                        int32_t length,
00524                                        const UnicodeString& srcText,
00525                                        int32_t srcStart,
00526                                        int32_t srcLength) const;
00527 
00546   inline int8_t compareCodePointOrder(const UChar *srcChars,
00547                                       int32_t srcLength) const;
00548 
00568   inline int8_t compareCodePointOrder(int32_t start,
00569                                       int32_t length,
00570                                       const UChar *srcChars) const;
00571 
00593   inline int8_t compareCodePointOrder(int32_t start,
00594                                       int32_t length,
00595                                       const UChar *srcChars,
00596                                       int32_t srcStart,
00597                                       int32_t srcLength) const;
00598 
00620   inline int8_t compareCodePointOrderBetween(int32_t start,
00621                                              int32_t limit,
00622                                              const UnicodeString& srcText,
00623                                              int32_t srcStart,
00624                                              int32_t srcLimit) const;
00625 
00644   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
00645 
00666   inline int8_t caseCompare(int32_t start,
00667          int32_t length,
00668          const UnicodeString& srcText,
00669          uint32_t options) const;
00670 
00693   inline int8_t caseCompare(int32_t start,
00694          int32_t length,
00695          const UnicodeString& srcText,
00696          int32_t srcStart,
00697          int32_t srcLength,
00698          uint32_t options) const;
00699 
00719   inline int8_t caseCompare(const UChar *srcChars,
00720          int32_t srcLength,
00721          uint32_t options) const;
00722 
00743   inline int8_t caseCompare(int32_t start,
00744          int32_t length,
00745          const UChar *srcChars,
00746          uint32_t options) const;
00747 
00770   inline int8_t caseCompare(int32_t start,
00771          int32_t length,
00772          const UChar *srcChars,
00773          int32_t srcStart,
00774          int32_t srcLength,
00775          uint32_t options) const;
00776 
00799   inline int8_t caseCompareBetween(int32_t start,
00800             int32_t limit,
00801             const UnicodeString& srcText,
00802             int32_t srcStart,
00803             int32_t srcLimit,
00804             uint32_t options) const;
00805 
00813   inline UBool startsWith(const UnicodeString& text) const;
00814 
00825   inline UBool startsWith(const UnicodeString& srcText,
00826             int32_t srcStart,
00827             int32_t srcLength) const;
00828 
00837   inline UBool startsWith(const UChar *srcChars,
00838             int32_t srcLength) const;
00839 
00849   inline UBool startsWith(const UChar *srcChars,
00850             int32_t srcStart,
00851             int32_t srcLength) const;
00852 
00860   inline UBool endsWith(const UnicodeString& text) const;
00861 
00872   inline UBool endsWith(const UnicodeString& srcText,
00873           int32_t srcStart,
00874           int32_t srcLength) const;
00875 
00884   inline UBool endsWith(const UChar *srcChars,
00885           int32_t srcLength) const;
00886 
00897   inline UBool endsWith(const UChar *srcChars,
00898           int32_t srcStart,
00899           int32_t srcLength) const;
00900 
00901 
00902   /* Searching - bitwise only */
00903 
00912   inline int32_t indexOf(const UnicodeString& text) const;
00913 
00923   inline int32_t indexOf(const UnicodeString& text,
00924               int32_t start) const;
00925 
00937   inline int32_t indexOf(const UnicodeString& text,
00938               int32_t start,
00939               int32_t length) const;
00940 
00957   inline int32_t indexOf(const UnicodeString& srcText,
00958               int32_t srcStart,
00959               int32_t srcLength,
00960               int32_t start,
00961               int32_t length) const;
00962 
00974   inline int32_t indexOf(const UChar *srcChars,
00975               int32_t srcLength,
00976               int32_t start) const;
00977 
00990   inline int32_t indexOf(const UChar *srcChars,
00991               int32_t srcLength,
00992               int32_t start,
00993               int32_t length) const;
00994 
01011   int32_t indexOf(const UChar *srcChars,
01012               int32_t srcStart,
01013               int32_t srcLength,
01014               int32_t start,
01015               int32_t length) const;
01016 
01024   inline int32_t indexOf(UChar c) const;
01025 
01034   inline int32_t indexOf(UChar32 c) const;
01035 
01044   inline int32_t indexOf(UChar c,
01045               int32_t start) const;
01046 
01056   inline int32_t indexOf(UChar32 c,
01057               int32_t start) const;
01058 
01069   inline int32_t indexOf(UChar c,
01070               int32_t start,
01071               int32_t length) const;
01072 
01084   inline int32_t indexOf(UChar32 c,
01085               int32_t start,
01086               int32_t length) const;
01087 
01096   inline int32_t lastIndexOf(const UnicodeString& text) const;
01097 
01107   inline int32_t lastIndexOf(const UnicodeString& text,
01108               int32_t start) const;
01109 
01121   inline int32_t lastIndexOf(const UnicodeString& text,
01122               int32_t start,
01123               int32_t length) const;
01124 
01141   inline int32_t lastIndexOf(const UnicodeString& srcText,
01142               int32_t srcStart,
01143               int32_t srcLength,
01144               int32_t start,
01145               int32_t length) const;
01146 
01157   inline int32_t lastIndexOf(const UChar *srcChars,
01158               int32_t srcLength,
01159               int32_t start) const;
01160 
01173   inline int32_t lastIndexOf(const UChar *srcChars,
01174               int32_t srcLength,
01175               int32_t start,
01176               int32_t length) const;
01177 
01194   int32_t lastIndexOf(const UChar *srcChars,
01195               int32_t srcStart,
01196               int32_t srcLength,
01197               int32_t start,
01198               int32_t length) const;
01199 
01207   inline int32_t lastIndexOf(UChar c) const;
01208 
01217   inline int32_t lastIndexOf(UChar32 c) const;
01218 
01227   inline int32_t lastIndexOf(UChar c,
01228               int32_t start) const;
01229 
01239   inline int32_t lastIndexOf(UChar32 c,
01240               int32_t start) const;
01241 
01252   inline int32_t lastIndexOf(UChar c,
01253               int32_t start,
01254               int32_t length) const;
01255 
01267   inline int32_t lastIndexOf(UChar32 c,
01268               int32_t start,
01269               int32_t length) const;
01270 
01271 
01272   /* Character access */
01273 
01282   inline UChar charAt(int32_t offset) const;
01283 
01291   inline UChar operator[] (int32_t offset) const;
01292 
01304   UChar32 char32At(int32_t offset) const;
01305 
01321   int32_t getChar32Start(int32_t offset) const;
01322 
01339   int32_t getChar32Limit(int32_t offset) const;
01340 
01391   int32_t moveIndex32(int32_t index, int32_t delta) const;
01392 
01393   /* Substring extraction */
01394 
01410   inline void extract(int32_t start,
01411            int32_t length,
01412            UChar *dst,
01413            int32_t dstStart = 0) const;
01414 
01436   int32_t
01437   extract(UChar *dest, int32_t destCapacity,
01438           UErrorCode &errorCode) const;
01439 
01450   inline void extract(int32_t start,
01451            int32_t length,
01452            UnicodeString& target) const;
01453 
01465   inline void extractBetween(int32_t start,
01466               int32_t limit,
01467               UChar *dst,
01468               int32_t dstStart = 0) const;
01469 
01479   virtual void extractBetween(int32_t start,
01480               int32_t limit,
01481               UnicodeString& target) const;
01482 
01504   int32_t extract(int32_t start,
01505            int32_t startLength,
01506            char *target,
01507            int32_t targetCapacity,
01508            enum EInvariant inv) const;
01509 
01510 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
01511 
01531   int32_t extract(int32_t start,
01532            int32_t startLength,
01533            char *target,
01534            uint32_t targetLength) const;
01535 
01536 #endif
01537 
01538 #if !UCONFIG_NO_CONVERSION
01539 
01565   inline int32_t extract(int32_t start,
01566                  int32_t startLength,
01567                  char *target,
01568                  const char *codepage = 0) const;
01569 
01599   int32_t extract(int32_t start,
01600            int32_t startLength,
01601            char *target,
01602            uint32_t targetLength,
01603            const char *codepage) const;
01604 
01622   int32_t extract(char *dest, int32_t destCapacity,
01623                   UConverter *cnv,
01624                   UErrorCode &errorCode) const;
01625 
01626 #endif
01627 
01641   UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
01642 
01653   inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
01654 
01666   void toUTF8(ByteSink &sink) const;
01667 
01668 #if U_HAVE_STD_STRING
01669 
01682   template<typename StringClass>
01683   StringClass &toUTF8String(StringClass &result) const {
01684     StringByteSink<StringClass> sbs(&result);
01685     toUTF8(sbs);
01686     return result;
01687   }
01688 
01689 #endif
01690 
01706   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
01707 
01708   /* Length operations */
01709 
01718   inline int32_t length(void) const;
01719 
01733   int32_t
01734   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
01735 
01759   UBool
01760   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
01761 
01767   inline UBool isEmpty(void) const;
01768 
01778   inline int32_t getCapacity(void) const;
01779 
01780   /* Other operations */
01781 
01787   inline int32_t hashCode(void) const;
01788 
01801   inline UBool isBogus(void) const;
01802 
01803 
01804   //========================================
01805   // Write operations
01806   //========================================
01807 
01808   /* Assignment operations */
01809 
01817   UnicodeString &operator=(const UnicodeString &srcText);
01818 
01841   UnicodeString &fastCopyFrom(const UnicodeString &src);
01842 
01850   inline UnicodeString& operator= (UChar ch);
01851 
01859   inline UnicodeString& operator= (UChar32 ch);
01860 
01872   inline UnicodeString& setTo(const UnicodeString& srcText,
01873                int32_t srcStart);
01874 
01888   inline UnicodeString& setTo(const UnicodeString& srcText,
01889                int32_t srcStart,
01890                int32_t srcLength);
01891 
01900   inline UnicodeString& setTo(const UnicodeString& srcText);
01901 
01910   inline UnicodeString& setTo(const UChar *srcChars,
01911                int32_t srcLength);
01912 
01921   UnicodeString& setTo(UChar srcChar);
01922 
01931   UnicodeString& setTo(UChar32 srcChar);
01932 
01956   UnicodeString &setTo(UBool isTerminated,
01957                        const UChar *text,
01958                        int32_t textLength);
01959 
01979   UnicodeString &setTo(UChar *buffer,
01980                        int32_t buffLength,
01981                        int32_t buffCapacity);
01982 
02023   void setToBogus();
02024 
02032   UnicodeString& setCharAt(int32_t offset,
02033                UChar ch);
02034 
02035 
02036   /* Append operations */
02037 
02045  inline  UnicodeString& operator+= (UChar ch);
02046 
02054  inline  UnicodeString& operator+= (UChar32 ch);
02055 
02063   inline UnicodeString& operator+= (const UnicodeString& srcText);
02064 
02079   inline UnicodeString& append(const UnicodeString& srcText,
02080             int32_t srcStart,
02081             int32_t srcLength);
02082 
02090   inline UnicodeString& append(const UnicodeString& srcText);
02091 
02105   inline UnicodeString& append(const UChar *srcChars,
02106             int32_t srcStart,
02107             int32_t srcLength);
02108 
02118   inline UnicodeString& append(const UChar *srcChars,
02119             int32_t srcLength);
02120 
02127   inline UnicodeString& append(UChar srcChar);
02128 
02135   UnicodeString& append(UChar32 srcChar);
02136 
02137 
02138   /* Insert operations */
02139 
02153   inline UnicodeString& insert(int32_t start,
02154             const UnicodeString& srcText,
02155             int32_t srcStart,
02156             int32_t srcLength);
02157 
02166   inline UnicodeString& insert(int32_t start,
02167             const UnicodeString& srcText);
02168 
02182   inline UnicodeString& insert(int32_t start,
02183             const UChar *srcChars,
02184             int32_t srcStart,
02185             int32_t srcLength);
02186 
02196   inline UnicodeString& insert(int32_t start,
02197             const UChar *srcChars,
02198             int32_t srcLength);
02199 
02208   inline UnicodeString& insert(int32_t start,
02209             UChar srcChar);
02210 
02219   inline UnicodeString& insert(int32_t start,
02220             UChar32 srcChar);
02221 
02222 
02223   /* Replace operations */
02224 
02242   UnicodeString& replace(int32_t start,
02243              int32_t length,
02244              const UnicodeString& srcText,
02245              int32_t srcStart,
02246              int32_t srcLength);
02247 
02260   UnicodeString& replace(int32_t start,
02261              int32_t length,
02262              const UnicodeString& srcText);
02263 
02281   UnicodeString& replace(int32_t start,
02282              int32_t length,
02283              const UChar *srcChars,
02284              int32_t srcStart,
02285              int32_t srcLength);
02286 
02299   inline UnicodeString& replace(int32_t start,
02300              int32_t length,
02301              const UChar *srcChars,
02302              int32_t srcLength);
02303 
02315   inline UnicodeString& replace(int32_t start,
02316              int32_t length,
02317              UChar srcChar);
02318 
02330   UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
02331 
02341   inline UnicodeString& replaceBetween(int32_t start,
02342                 int32_t limit,
02343                 const UnicodeString& srcText);
02344 
02359   inline UnicodeString& replaceBetween(int32_t start,
02360                 int32_t limit,
02361                 const UnicodeString& srcText,
02362                 int32_t srcStart,
02363                 int32_t srcLimit);
02364 
02375   virtual void handleReplaceBetween(int32_t start,
02376                                     int32_t limit,
02377                                     const UnicodeString& text);
02378 
02384   virtual UBool hasMetaData() const;
02385 
02401   virtual void copy(int32_t start, int32_t limit, int32_t dest);
02402 
02403   /* Search and replace operations */
02404 
02413   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
02414                 const UnicodeString& newText);
02415 
02427   inline UnicodeString& findAndReplace(int32_t start,
02428                 int32_t length,
02429                 const UnicodeString& oldText,
02430                 const UnicodeString& newText);
02431 
02449   UnicodeString& findAndReplace(int32_t start,
02450                 int32_t length,
02451                 const UnicodeString& oldText,
02452                 int32_t oldStart,
02453                 int32_t oldLength,
02454                 const UnicodeString& newText,
02455                 int32_t newStart,
02456                 int32_t newLength);
02457 
02458 
02459   /* Remove operations */
02460 
02466   inline UnicodeString& remove(void);
02467 
02476   inline UnicodeString& remove(int32_t start,
02477                                int32_t length = (int32_t)INT32_MAX);
02478 
02487   inline UnicodeString& removeBetween(int32_t start,
02488                                       int32_t limit = (int32_t)INT32_MAX);
02489 
02499   inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
02500 
02501   /* Length operations */
02502 
02514   UBool padLeading(int32_t targetLength,
02515                     UChar padChar = 0x0020);
02516 
02528   UBool padTrailing(int32_t targetLength,
02529                      UChar padChar = 0x0020);
02530 
02537   inline UBool truncate(int32_t targetLength);
02538 
02544   UnicodeString& trim(void);
02545 
02546 
02547   /* Miscellaneous operations */
02548 
02554   inline UnicodeString& reverse(void);
02555 
02564   inline UnicodeString& reverse(int32_t start,
02565              int32_t length);
02566 
02573   UnicodeString& toUpper(void);
02574 
02582   UnicodeString& toUpper(const Locale& locale);
02583 
02590   UnicodeString& toLower(void);
02591 
02599   UnicodeString& toLower(const Locale& locale);
02600 
02601 #if !UCONFIG_NO_BREAK_ITERATION
02602 
02629   UnicodeString &toTitle(BreakIterator *titleIter);
02630 
02658   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
02659 
02691   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
02692 
02693 #endif
02694 
02708   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
02709 
02710   //========================================
02711   // Access to the internal buffer
02712   //========================================
02713 
02757   UChar *getBuffer(int32_t minCapacity);
02758 
02779   void releaseBuffer(int32_t newLength=-1);
02780 
02811   inline const UChar *getBuffer() const;
02812 
02846   inline const UChar *getTerminatedBuffer();
02847 
02848   //========================================
02849   // Constructors
02850   //========================================
02851 
02855   UnicodeString();
02856 
02868   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
02869 
02879   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
02880 
02890   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
02891 
02902   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
02903 
02911   UnicodeString(const UChar *text,
02912         int32_t textLength);
02913 
02936   UnicodeString(UBool isTerminated,
02937                 const UChar *text,
02938                 int32_t textLength);
02939 
02958   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
02959 
02960 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
02961 
02981   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
02982 
02991   UnicodeString(const char *codepageData, int32_t dataLength);
02992 
02993 #endif
02994 
02995 #if !UCONFIG_NO_CONVERSION
02996 
03014   UnicodeString(const char *codepageData, const char *codepage);
03015 
03033   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
03034 
03056   UnicodeString(
03057         const char *src, int32_t srcLength,
03058         UConverter *cnv,
03059         UErrorCode &errorCode);
03060 
03061 #endif
03062 
03087   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
03088 
03089 
03095   UnicodeString(const UnicodeString& that);
03096 
03103   UnicodeString(const UnicodeString& src, int32_t srcStart);
03104 
03112   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
03113 
03130   virtual Replaceable *clone() const;
03131 
03135   virtual ~UnicodeString();
03136 
03150   static UnicodeString fromUTF8(const StringPiece &utf8);
03151 
03163   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
03164 
03165   /* Miscellaneous operations */
03166 
03201   UnicodeString unescape() const;
03202 
03222   UChar32 unescapeAt(int32_t &offset) const;
03223 
03229   static UClassID U_EXPORT2 getStaticClassID();
03230 
03236   virtual UClassID getDynamicClassID() const;
03237 
03238   //========================================
03239   // Implementation methods
03240   //========================================
03241 
03242 protected:
03247   virtual int32_t getLength() const;
03248 
03254   virtual UChar getCharAt(int32_t offset) const;
03255 
03261   virtual UChar32 getChar32At(int32_t offset) const;
03262 
03263 private:
03264   // For char* constructors. Could be made public.
03265   UnicodeString &setToUTF8(const StringPiece &utf8);
03266   // For extract(char*).
03267   // We could make a toUTF8(target, capacity, errorCode) public but not
03268   // this version: New API will be cleaner if we make callers create substrings
03269   // rather than having start+length on every method,
03270   // and it should take a UErrorCode&.
03271   int32_t
03272   toUTF8(int32_t start, int32_t len,
03273          char *target, int32_t capacity) const;
03274 
03279   UBool doEquals(const UnicodeString &text, int32_t len) const;
03280 
03281   inline int8_t
03282   doCompare(int32_t start,
03283            int32_t length,
03284            const UnicodeString& srcText,
03285            int32_t srcStart,
03286            int32_t srcLength) const;
03287 
03288   int8_t doCompare(int32_t start,
03289            int32_t length,
03290            const UChar *srcChars,
03291            int32_t srcStart,
03292            int32_t srcLength) const;
03293 
03294   inline int8_t
03295   doCompareCodePointOrder(int32_t start,
03296                           int32_t length,
03297                           const UnicodeString& srcText,
03298                           int32_t srcStart,
03299                           int32_t srcLength) const;
03300 
03301   int8_t doCompareCodePointOrder(int32_t start,
03302                                  int32_t length,
03303                                  const UChar *srcChars,
03304                                  int32_t srcStart,
03305                                  int32_t srcLength) const;
03306 
03307   inline int8_t
03308   doCaseCompare(int32_t start,
03309                 int32_t length,
03310                 const UnicodeString &srcText,
03311                 int32_t srcStart,
03312                 int32_t srcLength,
03313                 uint32_t options) const;
03314 
03315   int8_t
03316   doCaseCompare(int32_t start,
03317                 int32_t length,
03318                 const UChar *srcChars,
03319                 int32_t srcStart,
03320                 int32_t srcLength,
03321                 uint32_t options) const;
03322 
03323   int32_t doIndexOf(UChar c,
03324             int32_t start,
03325             int32_t length) const;
03326 
03327   int32_t doIndexOf(UChar32 c,
03328                         int32_t start,
03329                         int32_t length) const;
03330 
03331   int32_t doLastIndexOf(UChar c,
03332                 int32_t start,
03333                 int32_t length) const;
03334 
03335   int32_t doLastIndexOf(UChar32 c,
03336                             int32_t start,
03337                             int32_t length) const;
03338 
03339   void doExtract(int32_t start,
03340          int32_t length,
03341          UChar *dst,
03342          int32_t dstStart) const;
03343 
03344   inline void doExtract(int32_t start,
03345          int32_t length,
03346          UnicodeString& target) const;
03347 
03348   inline UChar doCharAt(int32_t offset)  const;
03349 
03350   UnicodeString& doReplace(int32_t start,
03351                int32_t length,
03352                const UnicodeString& srcText,
03353                int32_t srcStart,
03354                int32_t srcLength);
03355 
03356   UnicodeString& doReplace(int32_t start,
03357                int32_t length,
03358                const UChar *srcChars,
03359                int32_t srcStart,
03360                int32_t srcLength);
03361 
03362   UnicodeString& doReverse(int32_t start,
03363                int32_t length);
03364 
03365   // calculate hash code
03366   int32_t doHashCode(void) const;
03367 
03368   // get pointer to start of array
03369   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
03370   inline UChar* getArrayStart(void);
03371   inline const UChar* getArrayStart(void) const;
03372 
03373   // A UnicodeString object (not necessarily its current buffer)
03374   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
03375   inline UBool isWritable() const;
03376 
03377   // Is the current buffer writable?
03378   inline UBool isBufferWritable() const;
03379 
03380   // None of the following does releaseArray().
03381   inline void setLength(int32_t len);        // sets only fShortLength and fLength
03382   inline void setToEmpty();                  // sets fFlags=kShortString
03383   inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
03384 
03385   // allocate the array; result may be fStackBuffer
03386   // sets refCount to 1 if appropriate
03387   // sets fArray, fCapacity, and fFlags
03388   // returns boolean for success or failure
03389   UBool allocate(int32_t capacity);
03390 
03391   // release the array if owned
03392   void releaseArray(void);
03393 
03394   // turn a bogus string into an empty one
03395   void unBogus();
03396 
03397   // implements assigment operator, copy constructor, and fastCopyFrom()
03398   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
03399 
03400   // Pin start and limit to acceptable values.
03401   inline void pinIndex(int32_t& start) const;
03402   inline void pinIndices(int32_t& start,
03403                          int32_t& length) const;
03404 
03405 #if !UCONFIG_NO_CONVERSION
03406 
03407   /* Internal extract() using UConverter. */
03408   int32_t doExtract(int32_t start, int32_t length,
03409                     char *dest, int32_t destCapacity,
03410                     UConverter *cnv,
03411                     UErrorCode &errorCode) const;
03412 
03413   /*
03414    * Real constructor for converting from codepage data.
03415    * It assumes that it is called with !fRefCounted.
03416    *
03417    * If <code>codepage==0</code>, then the default converter
03418    * is used for the platform encoding.
03419    * If <code>codepage</code> is an empty string (<code>""</code>),
03420    * then a simple conversion is performed on the codepage-invariant
03421    * subset ("invariant characters") of the platform encoding. See utypes.h.
03422    */
03423   void doCodepageCreate(const char *codepageData,
03424                         int32_t dataLength,
03425                         const char *codepage);
03426 
03427   /*
03428    * Worker function for creating a UnicodeString from
03429    * a codepage string using a UConverter.
03430    */
03431   void
03432   doCodepageCreate(const char *codepageData,
03433                    int32_t dataLength,
03434                    UConverter *converter,
03435                    UErrorCode &status);
03436 
03437 #endif
03438 
03439   /*
03440    * This function is called when write access to the array
03441    * is necessary.
03442    *
03443    * We need to make a copy of the array if
03444    * the buffer is read-only, or
03445    * the buffer is refCounted (shared), and refCount>1, or
03446    * the buffer is too small.
03447    *
03448    * Return FALSE if memory could not be allocated.
03449    */
03450   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
03451                             int32_t growCapacity = -1,
03452                             UBool doCopyArray = TRUE,
03453                             int32_t **pBufferToDelete = 0,
03454                             UBool forceClone = FALSE);
03455 
03461   UnicodeString &
03462   caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
03463 
03464   // ref counting
03465   void addRef(void);
03466   int32_t removeRef(void);
03467   int32_t refCount(void) const;
03468 
03469   // constants
03470   enum {
03471     // Set the stack buffer size so that sizeof(UnicodeString) is,
03472     // naturally (without padding), a multiple of sizeof(pointer).
03473     US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
03474     kInvalidUChar=0xffff, // invalid UChar index
03475     kGrowSize=128, // grow size for this buffer
03476     kInvalidHashCode=0, // invalid hash code
03477     kEmptyHashCode=1, // hash code for empty string
03478 
03479     // bit flag values for fFlags
03480     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
03481     kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
03482     kRefCounted=4,      // there is a refCount field before the characters in fArray
03483     kBufferIsReadonly=8,// do not write to this buffer
03484     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
03485                         // and releaseBuffer(newLength) must be called
03486 
03487     // combined values for convenience
03488     kShortString=kUsingStackBuffer,
03489     kLongString=kRefCounted,
03490     kReadonlyAlias=kBufferIsReadonly,
03491     kWritableAlias=0
03492   };
03493 
03494   friend class StringThreadTest;
03495   friend class UnicodeStringAppendable;
03496 
03497   union StackBufferOrFields;        // forward declaration necessary before friend declaration
03498   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
03499 
03500   /*
03501    * The following are all the class fields that are stored
03502    * in each UnicodeString object.
03503    * Note that UnicodeString has virtual functions,
03504    * therefore there is an implicit vtable pointer
03505    * as the first real field.
03506    * The fields should be aligned such that no padding is necessary.
03507    * On 32-bit machines, the size should be 32 bytes,
03508    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
03509    *
03510    * We use a hack to achieve this.
03511    *
03512    * With at least some compilers, each of the following is forced to
03513    * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
03514    * rounded up with additional padding if the fields do not already fit that requirement:
03515    * - sizeof(class UnicodeString)
03516    * - offsetof(UnicodeString, fUnion)
03517    * - sizeof(fUnion)
03518    * - sizeof(fFields)
03519    *
03520    * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
03521    * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
03522    * (Padding at the end of fFields is ok:
03523    * As long as there is no padding after fStackBuffer, it is not wasted space.)
03524    *
03525    * We further assume that the compiler does not reorder the fields,
03526    * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
03527    * with at most some padding (but no other field) in between.
03528    * (Padding there would be wasted space, but functionally harmless.)
03529    *
03530    * We use a few more sizeof(pointer)'s chunks of space with
03531    * fRestOfStackBuffer, fShortLength and fFlags,
03532    * to get up exactly to the intended sizeof(UnicodeString).
03533    */
03534   // (implicit) *vtable;
03535   union StackBufferOrFields {
03536     // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
03537     // else fFields is used
03538     UChar fStackBuffer[8];  // buffer for short strings, together with fRestOfStackBuffer
03539     struct {
03540       UChar   *fArray;    // the Unicode data
03541       int32_t fCapacity;  // capacity of fArray (in UChars)
03542       int32_t fLength;    // number of characters in fArray if >127; else undefined
03543     } fFields;
03544   } fUnion;
03545   UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
03546   int8_t fShortLength;  // 0..127: length  <0: real length is in fUnion.fFields.fLength
03547   uint8_t fFlags;       // bit flags: see constants above
03548 };
03549 
03558 U_COMMON_API UnicodeString U_EXPORT2
03559 operator+ (const UnicodeString &s1, const UnicodeString &s2);
03560 
03561 //========================================
03562 // Inline members
03563 //========================================
03564 
03565 //========================================
03566 // Privates
03567 //========================================
03568 
03569 inline void
03570 UnicodeString::pinIndex(int32_t& start) const
03571 {
03572   // pin index
03573   if(start < 0) {
03574     start = 0;
03575   } else if(start > length()) {
03576     start = length();
03577   }
03578 }
03579 
03580 inline void
03581 UnicodeString::pinIndices(int32_t& start,
03582                           int32_t& _length) const
03583 {
03584   // pin indices
03585   int32_t len = length();
03586   if(start < 0) {
03587     start = 0;
03588   } else if(start > len) {
03589     start = len;
03590   }
03591   if(_length < 0) {
03592     _length = 0;
03593   } else if(_length > (len - start)) {
03594     _length = (len - start);
03595   }
03596 }
03597 
03598 inline UChar*
03599 UnicodeString::getArrayStart()
03600 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03601 
03602 inline const UChar*
03603 UnicodeString::getArrayStart() const
03604 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03605 
03606 //========================================
03607 // Read-only implementation methods
03608 //========================================
03609 inline int32_t
03610 UnicodeString::length() const
03611 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
03612 
03613 inline int32_t
03614 UnicodeString::getCapacity() const
03615 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
03616 
03617 inline int32_t
03618 UnicodeString::hashCode() const
03619 { return doHashCode(); }
03620 
03621 inline UBool
03622 UnicodeString::isBogus() const
03623 { return (UBool)(fFlags & kIsBogus); }
03624 
03625 inline UBool
03626 UnicodeString::isWritable() const
03627 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
03628 
03629 inline UBool
03630 UnicodeString::isBufferWritable() const
03631 {
03632   return (UBool)(
03633       !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
03634       (!(fFlags&kRefCounted) || refCount()==1));
03635 }
03636 
03637 inline const UChar *
03638 UnicodeString::getBuffer() const {
03639   if(fFlags&(kIsBogus|kOpenGetBuffer)) {
03640     return 0;
03641   } else if(fFlags&kUsingStackBuffer) {
03642     return fUnion.fStackBuffer;
03643   } else {
03644     return fUnion.fFields.fArray;
03645   }
03646 }
03647 
03648 //========================================
03649 // Read-only alias methods
03650 //========================================
03651 inline int8_t
03652 UnicodeString::doCompare(int32_t start,
03653               int32_t thisLength,
03654               const UnicodeString& srcText,
03655               int32_t srcStart,
03656               int32_t srcLength) const
03657 {
03658   if(srcText.isBogus()) {
03659     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03660   } else {
03661     srcText.pinIndices(srcStart, srcLength);
03662     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
03663   }
03664 }
03665 
03666 inline UBool
03667 UnicodeString::operator== (const UnicodeString& text) const
03668 {
03669   if(isBogus()) {
03670     return text.isBogus();
03671   } else {
03672     int32_t len = length(), textLength = text.length();
03673     return !text.isBogus() && len == textLength && doEquals(text, len);
03674   }
03675 }
03676 
03677 inline UBool
03678 UnicodeString::operator!= (const UnicodeString& text) const
03679 { return (! operator==(text)); }
03680 
03681 inline UBool
03682 UnicodeString::operator> (const UnicodeString& text) const
03683 { return doCompare(0, length(), text, 0, text.length()) == 1; }
03684 
03685 inline UBool
03686 UnicodeString::operator< (const UnicodeString& text) const
03687 { return doCompare(0, length(), text, 0, text.length()) == -1; }
03688 
03689 inline UBool
03690 UnicodeString::operator>= (const UnicodeString& text) const
03691 { return doCompare(0, length(), text, 0, text.length()) != -1; }
03692 
03693 inline UBool
03694 UnicodeString::operator<= (const UnicodeString& text) const
03695 { return doCompare(0, length(), text, 0, text.length()) != 1; }
03696 
03697 inline int8_t
03698 UnicodeString::compare(const UnicodeString& text) const
03699 { return doCompare(0, length(), text, 0, text.length()); }
03700 
03701 inline int8_t
03702 UnicodeString::compare(int32_t start,
03703                int32_t _length,
03704                const UnicodeString& srcText) const
03705 { return doCompare(start, _length, srcText, 0, srcText.length()); }
03706 
03707 inline int8_t
03708 UnicodeString::compare(const UChar *srcChars,
03709                int32_t srcLength) const
03710 { return doCompare(0, length(), srcChars, 0, srcLength); }
03711 
03712 inline int8_t
03713 UnicodeString::compare(int32_t start,
03714                int32_t _length,
03715                const UnicodeString& srcText,
03716                int32_t srcStart,
03717                int32_t srcLength) const
03718 { return doCompare(start, _length, srcText, srcStart, srcLength); }
03719 
03720 inline int8_t
03721 UnicodeString::compare(int32_t start,
03722                int32_t _length,
03723                const UChar *srcChars) const
03724 { return doCompare(start, _length, srcChars, 0, _length); }
03725 
03726 inline int8_t
03727 UnicodeString::compare(int32_t start,
03728                int32_t _length,
03729                const UChar *srcChars,
03730                int32_t srcStart,
03731                int32_t srcLength) const
03732 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
03733 
03734 inline int8_t
03735 UnicodeString::compareBetween(int32_t start,
03736                   int32_t limit,
03737                   const UnicodeString& srcText,
03738                   int32_t srcStart,
03739                   int32_t srcLimit) const
03740 { return doCompare(start, limit - start,
03741            srcText, srcStart, srcLimit - srcStart); }
03742 
03743 inline int8_t
03744 UnicodeString::doCompareCodePointOrder(int32_t start,
03745                                        int32_t thisLength,
03746                                        const UnicodeString& srcText,
03747                                        int32_t srcStart,
03748                                        int32_t srcLength) const
03749 {
03750   if(srcText.isBogus()) {
03751     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03752   } else {
03753     srcText.pinIndices(srcStart, srcLength);
03754     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
03755   }
03756 }
03757 
03758 inline int8_t
03759 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
03760 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
03761 
03762 inline int8_t
03763 UnicodeString::compareCodePointOrder(int32_t start,
03764                                      int32_t _length,
03765                                      const UnicodeString& srcText) const
03766 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
03767 
03768 inline int8_t
03769 UnicodeString::compareCodePointOrder(const UChar *srcChars,
03770                                      int32_t srcLength) const
03771 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
03772 
03773 inline int8_t
03774 UnicodeString::compareCodePointOrder(int32_t start,
03775                                      int32_t _length,
03776                                      const UnicodeString& srcText,
03777                                      int32_t srcStart,
03778                                      int32_t srcLength) const
03779 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
03780 
03781 inline int8_t
03782 UnicodeString::compareCodePointOrder(int32_t start,
03783                                      int32_t _length,
03784                                      const UChar *srcChars) const
03785 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
03786 
03787 inline int8_t
03788 UnicodeString::compareCodePointOrder(int32_t start,
03789                                      int32_t _length,
03790                                      const UChar *srcChars,
03791                                      int32_t srcStart,
03792                                      int32_t srcLength) const
03793 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
03794 
03795 inline int8_t
03796 UnicodeString::compareCodePointOrderBetween(int32_t start,
03797                                             int32_t limit,
03798                                             const UnicodeString& srcText,
03799                                             int32_t srcStart,
03800                                             int32_t srcLimit) const
03801 { return doCompareCodePointOrder(start, limit - start,
03802            srcText, srcStart, srcLimit - srcStart); }
03803 
03804 inline int8_t
03805 UnicodeString::doCaseCompare(int32_t start,
03806                              int32_t thisLength,
03807                              const UnicodeString &srcText,
03808                              int32_t srcStart,
03809                              int32_t srcLength,
03810                              uint32_t options) const
03811 {
03812   if(srcText.isBogus()) {
03813     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03814   } else {
03815     srcText.pinIndices(srcStart, srcLength);
03816     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
03817   }
03818 }
03819 
03820 inline int8_t
03821 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
03822   return doCaseCompare(0, length(), text, 0, text.length(), options);
03823 }
03824 
03825 inline int8_t
03826 UnicodeString::caseCompare(int32_t start,
03827                            int32_t _length,
03828                            const UnicodeString &srcText,
03829                            uint32_t options) const {
03830   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
03831 }
03832 
03833 inline int8_t
03834 UnicodeString::caseCompare(const UChar *srcChars,
03835                            int32_t srcLength,
03836                            uint32_t options) const {
03837   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
03838 }
03839 
03840 inline int8_t
03841 UnicodeString::caseCompare(int32_t start,
03842                            int32_t _length,
03843                            const UnicodeString &srcText,
03844                            int32_t srcStart,
03845                            int32_t srcLength,
03846                            uint32_t options) const {
03847   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
03848 }
03849 
03850 inline int8_t
03851 UnicodeString::caseCompare(int32_t start,
03852                            int32_t _length,
03853                            const UChar *srcChars,
03854                            uint32_t options) const {
03855   return doCaseCompare(start, _length, srcChars, 0, _length, options);
03856 }
03857 
03858 inline int8_t
03859 UnicodeString::caseCompare(int32_t start,
03860                            int32_t _length,
03861                            const UChar *srcChars,
03862                            int32_t srcStart,
03863                            int32_t srcLength,
03864                            uint32_t options) const {
03865   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
03866 }
03867 
03868 inline int8_t
03869 UnicodeString::caseCompareBetween(int32_t start,
03870                                   int32_t limit,
03871                                   const UnicodeString &srcText,
03872                                   int32_t srcStart,
03873                                   int32_t srcLimit,
03874                                   uint32_t options) const {
03875   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
03876 }
03877 
03878 inline int32_t
03879 UnicodeString::indexOf(const UnicodeString& srcText,
03880                int32_t srcStart,
03881                int32_t srcLength,
03882                int32_t start,
03883                int32_t _length) const
03884 {
03885   if(!srcText.isBogus()) {
03886     srcText.pinIndices(srcStart, srcLength);
03887     if(srcLength > 0) {
03888       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
03889     }
03890   }
03891   return -1;
03892 }
03893 
03894 inline int32_t
03895 UnicodeString::indexOf(const UnicodeString& text) const
03896 { return indexOf(text, 0, text.length(), 0, length()); }
03897 
03898 inline int32_t
03899 UnicodeString::indexOf(const UnicodeString& text,
03900                int32_t start) const {
03901   pinIndex(start);
03902   return indexOf(text, 0, text.length(), start, length() - start);
03903 }
03904 
03905 inline int32_t
03906 UnicodeString::indexOf(const UnicodeString& text,
03907                int32_t start,
03908                int32_t _length) const
03909 { return indexOf(text, 0, text.length(), start, _length); }
03910 
03911 inline int32_t
03912 UnicodeString::indexOf(const UChar *srcChars,
03913                int32_t srcLength,
03914                int32_t start) const {
03915   pinIndex(start);
03916   return indexOf(srcChars, 0, srcLength, start, length() - start);
03917 }
03918 
03919 inline int32_t
03920 UnicodeString::indexOf(const UChar *srcChars,
03921                int32_t srcLength,
03922                int32_t start,
03923                int32_t _length) const
03924 { return indexOf(srcChars, 0, srcLength, start, _length); }
03925 
03926 inline int32_t
03927 UnicodeString::indexOf(UChar c,
03928                int32_t start,
03929                int32_t _length) const
03930 { return doIndexOf(c, start, _length); }
03931 
03932 inline int32_t
03933 UnicodeString::indexOf(UChar32 c,
03934                int32_t start,
03935                int32_t _length) const
03936 { return doIndexOf(c, start, _length); }
03937 
03938 inline int32_t
03939 UnicodeString::indexOf(UChar c) const
03940 { return doIndexOf(c, 0, length()); }
03941 
03942 inline int32_t
03943 UnicodeString::indexOf(UChar32 c) const
03944 { return indexOf(c, 0, length()); }
03945 
03946 inline int32_t
03947 UnicodeString::indexOf(UChar c,
03948                int32_t start) const {
03949   pinIndex(start);
03950   return doIndexOf(c, start, length() - start);
03951 }
03952 
03953 inline int32_t
03954 UnicodeString::indexOf(UChar32 c,
03955                int32_t start) const {
03956   pinIndex(start);
03957   return indexOf(c, start, length() - start);
03958 }
03959 
03960 inline int32_t
03961 UnicodeString::lastIndexOf(const UChar *srcChars,
03962                int32_t srcLength,
03963                int32_t start,
03964                int32_t _length) const
03965 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
03966 
03967 inline int32_t
03968 UnicodeString::lastIndexOf(const UChar *srcChars,
03969                int32_t srcLength,
03970                int32_t start) const {
03971   pinIndex(start);
03972   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
03973 }
03974 
03975 inline int32_t
03976 UnicodeString::lastIndexOf(const UnicodeString& srcText,
03977                int32_t srcStart,
03978                int32_t srcLength,
03979                int32_t start,
03980                int32_t _length) const
03981 {
03982   if(!srcText.isBogus()) {
03983     srcText.pinIndices(srcStart, srcLength);
03984     if(srcLength > 0) {
03985       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
03986     }
03987   }
03988   return -1;
03989 }
03990 
03991 inline int32_t
03992 UnicodeString::lastIndexOf(const UnicodeString& text,
03993                int32_t start,
03994                int32_t _length) const
03995 { return lastIndexOf(text, 0, text.length(), start, _length); }
03996 
03997 inline int32_t
03998 UnicodeString::lastIndexOf(const UnicodeString& text,
03999                int32_t start) const {
04000   pinIndex(start);
04001   return lastIndexOf(text, 0, text.length(), start, length() - start);
04002 }
04003 
04004 inline int32_t
04005 UnicodeString::lastIndexOf(const UnicodeString& text) const
04006 { return lastIndexOf(text, 0, text.length(), 0, length()); }
04007 
04008 inline int32_t
04009 UnicodeString::lastIndexOf(UChar c,
04010                int32_t start,
04011                int32_t _length) const
04012 { return doLastIndexOf(c, start, _length); }
04013 
04014 inline int32_t
04015 UnicodeString::lastIndexOf(UChar32 c,
04016                int32_t start,
04017                int32_t _length) const {
04018   return doLastIndexOf(c, start, _length);
04019 }
04020 
04021 inline int32_t
04022 UnicodeString::lastIndexOf(UChar c) const
04023 { return doLastIndexOf(c, 0, length()); }
04024 
04025 inline int32_t
04026 UnicodeString::lastIndexOf(UChar32 c) const {
04027   return lastIndexOf(c, 0, length());
04028 }
04029 
04030 inline int32_t
04031 UnicodeString::lastIndexOf(UChar c,
04032                int32_t start) const {
04033   pinIndex(start);
04034   return doLastIndexOf(c, start, length() - start);
04035 }
04036 
04037 inline int32_t
04038 UnicodeString::lastIndexOf(UChar32 c,
04039                int32_t start) const {
04040   pinIndex(start);
04041   return lastIndexOf(c, start, length() - start);
04042 }
04043 
04044 inline UBool
04045 UnicodeString::startsWith(const UnicodeString& text) const
04046 { return compare(0, text.length(), text, 0, text.length()) == 0; }
04047 
04048 inline UBool
04049 UnicodeString::startsWith(const UnicodeString& srcText,
04050               int32_t srcStart,
04051               int32_t srcLength) const
04052 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
04053 
04054 inline UBool
04055 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
04056   if(srcLength < 0) {
04057     srcLength = u_strlen(srcChars);
04058   }
04059   return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
04060 }
04061 
04062 inline UBool
04063 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
04064   if(srcLength < 0) {
04065     srcLength = u_strlen(srcChars);
04066   }
04067   return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
04068 }
04069 
04070 inline UBool
04071 UnicodeString::endsWith(const UnicodeString& text) const
04072 { return doCompare(length() - text.length(), text.length(),
04073            text, 0, text.length()) == 0; }
04074 
04075 inline UBool
04076 UnicodeString::endsWith(const UnicodeString& srcText,
04077             int32_t srcStart,
04078             int32_t srcLength) const {
04079   srcText.pinIndices(srcStart, srcLength);
04080   return doCompare(length() - srcLength, srcLength,
04081                    srcText, srcStart, srcLength) == 0;
04082 }
04083 
04084 inline UBool
04085 UnicodeString::endsWith(const UChar *srcChars,
04086             int32_t srcLength) const {
04087   if(srcLength < 0) {
04088     srcLength = u_strlen(srcChars);
04089   }
04090   return doCompare(length() - srcLength, srcLength,
04091                    srcChars, 0, srcLength) == 0;
04092 }
04093 
04094 inline UBool
04095 UnicodeString::endsWith(const UChar *srcChars,
04096             int32_t srcStart,
04097             int32_t srcLength) const {
04098   if(srcLength < 0) {
04099     srcLength = u_strlen(srcChars + srcStart);
04100   }
04101   return doCompare(length() - srcLength, srcLength,
04102                    srcChars, srcStart, srcLength) == 0;
04103 }
04104 
04105 //========================================
04106 // replace
04107 //========================================
04108 inline UnicodeString&
04109 UnicodeString::replace(int32_t start,
04110                int32_t _length,
04111                const UnicodeString& srcText)
04112 { return doReplace(start, _length, srcText, 0, srcText.length()); }
04113 
04114 inline UnicodeString&
04115 UnicodeString::replace(int32_t start,
04116                int32_t _length,
04117                const UnicodeString& srcText,
04118                int32_t srcStart,
04119                int32_t srcLength)
04120 { return doReplace(start, _length, srcText, srcStart, srcLength); }
04121 
04122 inline UnicodeString&
04123 UnicodeString::replace(int32_t start,
04124                int32_t _length,
04125                const UChar *srcChars,
04126                int32_t srcLength)
04127 { return doReplace(start, _length, srcChars, 0, srcLength); }
04128 
04129 inline UnicodeString&
04130 UnicodeString::replace(int32_t start,
04131                int32_t _length,
04132                const UChar *srcChars,
04133                int32_t srcStart,
04134                int32_t srcLength)
04135 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
04136 
04137 inline UnicodeString&
04138 UnicodeString::replace(int32_t start,
04139                int32_t _length,
04140                UChar srcChar)
04141 { return doReplace(start, _length, &srcChar, 0, 1); }
04142 
04143 inline UnicodeString&
04144 UnicodeString::replaceBetween(int32_t start,
04145                   int32_t limit,
04146                   const UnicodeString& srcText)
04147 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
04148 
04149 inline UnicodeString&
04150 UnicodeString::replaceBetween(int32_t start,
04151                   int32_t limit,
04152                   const UnicodeString& srcText,
04153                   int32_t srcStart,
04154                   int32_t srcLimit)
04155 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
04156 
04157 inline UnicodeString&
04158 UnicodeString::findAndReplace(const UnicodeString& oldText,
04159                   const UnicodeString& newText)
04160 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
04161             newText, 0, newText.length()); }
04162 
04163 inline UnicodeString&
04164 UnicodeString::findAndReplace(int32_t start,
04165                   int32_t _length,
04166                   const UnicodeString& oldText,
04167                   const UnicodeString& newText)
04168 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
04169             newText, 0, newText.length()); }
04170 
04171 // ============================
04172 // extract
04173 // ============================
04174 inline void
04175 UnicodeString::doExtract(int32_t start,
04176              int32_t _length,
04177              UnicodeString& target) const
04178 { target.replace(0, target.length(), *this, start, _length); }
04179 
04180 inline void
04181 UnicodeString::extract(int32_t start,
04182                int32_t _length,
04183                UChar *target,
04184                int32_t targetStart) const
04185 { doExtract(start, _length, target, targetStart); }
04186 
04187 inline void
04188 UnicodeString::extract(int32_t start,
04189                int32_t _length,
04190                UnicodeString& target) const
04191 { doExtract(start, _length, target); }
04192 
04193 #if !UCONFIG_NO_CONVERSION
04194 
04195 inline int32_t
04196 UnicodeString::extract(int32_t start,
04197                int32_t _length,
04198                char *dst,
04199                const char *codepage) const
04200 
04201 {
04202   // This dstSize value will be checked explicitly
04203   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
04204 }
04205 
04206 #endif
04207 
04208 inline void
04209 UnicodeString::extractBetween(int32_t start,
04210                   int32_t limit,
04211                   UChar *dst,
04212                   int32_t dstStart) const {
04213   pinIndex(start);
04214   pinIndex(limit);
04215   doExtract(start, limit - start, dst, dstStart);
04216 }
04217 
04218 inline UnicodeString
04219 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
04220     return tempSubString(start, limit - start);
04221 }
04222 
04223 inline UChar
04224 UnicodeString::doCharAt(int32_t offset) const
04225 {
04226   if((uint32_t)offset < (uint32_t)length()) {
04227     return getArrayStart()[offset];
04228   } else {
04229     return kInvalidUChar;
04230   }
04231 }
04232 
04233 inline UChar
04234 UnicodeString::charAt(int32_t offset) const
04235 { return doCharAt(offset); }
04236 
04237 inline UChar
04238 UnicodeString::operator[] (int32_t offset) const
04239 { return doCharAt(offset); }
04240 
04241 inline UBool
04242 UnicodeString::isEmpty() const {
04243   return fShortLength == 0;
04244 }
04245 
04246 //========================================
04247 // Write implementation methods
04248 //========================================
04249 inline void
04250 UnicodeString::setLength(int32_t len) {
04251   if(len <= 127) {
04252     fShortLength = (int8_t)len;
04253   } else {
04254     fShortLength = (int8_t)-1;
04255     fUnion.fFields.fLength = len;
04256   }
04257 }
04258 
04259 inline void
04260 UnicodeString::setToEmpty() {
04261   fShortLength = 0;
04262   fFlags = kShortString;
04263 }
04264 
04265 inline void
04266 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
04267   setLength(len);
04268   fUnion.fFields.fArray = array;
04269   fUnion.fFields.fCapacity = capacity;
04270 }
04271 
04272 inline const UChar *
04273 UnicodeString::getTerminatedBuffer() {
04274   if(!isWritable()) {
04275     return 0;
04276   } else {
04277     UChar *array = getArrayStart();
04278     int32_t len = length();
04279     if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
04280       /*
04281        * kRefCounted: Do not write the NUL if the buffer is shared.
04282        * That is mostly safe, except when the length of one copy was modified
04283        * without copy-on-write, e.g., via truncate(newLength) or remove(void).
04284        * Then the NUL would be written into the middle of another copy's string.
04285        */
04286       if(!(fFlags&kBufferIsReadonly)) {
04287         /*
04288          * We must not write to a readonly buffer, but it is known to be
04289          * NUL-terminated if len<capacity.
04290          * A shared, allocated buffer (refCount()>1) must not have its contents
04291          * modified, but the NUL at [len] is beyond the string contents,
04292          * and multiple string objects and threads writing the same NUL into the
04293          * same location is harmless.
04294          * In all other cases, the buffer is fully writable and it is anyway safe
04295          * to write the NUL.
04296          *
04297          * Note: An earlier version of this code tested whether there is a NUL
04298          * at [len] already, but, while safe, it generated lots of warnings from
04299          * tools like valgrind and Purify.
04300          */
04301         array[len] = 0;
04302       }
04303       return array;
04304     } else if(cloneArrayIfNeeded(len+1)) {
04305       array = getArrayStart();
04306       array[len] = 0;
04307       return array;
04308     } else {
04309       return 0;
04310     }
04311   }
04312 }
04313 
04314 inline UnicodeString&
04315 UnicodeString::operator= (UChar ch)
04316 { return doReplace(0, length(), &ch, 0, 1); }
04317 
04318 inline UnicodeString&
04319 UnicodeString::operator= (UChar32 ch)
04320 { return replace(0, length(), ch); }
04321 
04322 inline UnicodeString&
04323 UnicodeString::setTo(const UnicodeString& srcText,
04324              int32_t srcStart,
04325              int32_t srcLength)
04326 {
04327   unBogus();
04328   return doReplace(0, length(), srcText, srcStart, srcLength);
04329 }
04330 
04331 inline UnicodeString&
04332 UnicodeString::setTo(const UnicodeString& srcText,
04333              int32_t srcStart)
04334 {
04335   unBogus();
04336   srcText.pinIndex(srcStart);
04337   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
04338 }
04339 
04340 inline UnicodeString&
04341 UnicodeString::setTo(const UnicodeString& srcText)
04342 {
04343   return copyFrom(srcText);
04344 }
04345 
04346 inline UnicodeString&
04347 UnicodeString::setTo(const UChar *srcChars,
04348              int32_t srcLength)
04349 {
04350   unBogus();
04351   return doReplace(0, length(), srcChars, 0, srcLength);
04352 }
04353 
04354 inline UnicodeString&
04355 UnicodeString::setTo(UChar srcChar)
04356 {
04357   unBogus();
04358   return doReplace(0, length(), &srcChar, 0, 1);
04359 }
04360 
04361 inline UnicodeString&
04362 UnicodeString::setTo(UChar32 srcChar)
04363 {
04364   unBogus();
04365   return replace(0, length(), srcChar);
04366 }
04367 
04368 inline UnicodeString&
04369 UnicodeString::append(const UnicodeString& srcText,
04370               int32_t srcStart,
04371               int32_t srcLength)
04372 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
04373 
04374 inline UnicodeString&
04375 UnicodeString::append(const UnicodeString& srcText)
04376 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
04377 
04378 inline UnicodeString&
04379 UnicodeString::append(const UChar *srcChars,
04380               int32_t srcStart,
04381               int32_t srcLength)
04382 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
04383 
04384 inline UnicodeString&
04385 UnicodeString::append(const UChar *srcChars,
04386               int32_t srcLength)
04387 { return doReplace(length(), 0, srcChars, 0, srcLength); }
04388 
04389 inline UnicodeString&
04390 UnicodeString::append(UChar srcChar)
04391 { return doReplace(length(), 0, &srcChar, 0, 1); }
04392 
04393 inline UnicodeString&
04394 UnicodeString::operator+= (UChar ch)
04395 { return doReplace(length(), 0, &ch, 0, 1); }
04396 
04397 inline UnicodeString&
04398 UnicodeString::operator+= (UChar32 ch) {
04399   return append(ch);
04400 }
04401 
04402 inline UnicodeString&
04403 UnicodeString::operator+= (const UnicodeString& srcText)
04404 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
04405 
04406 inline UnicodeString&
04407 UnicodeString::insert(int32_t start,
04408               const UnicodeString& srcText,
04409               int32_t srcStart,
04410               int32_t srcLength)
04411 { return doReplace(start, 0, srcText, srcStart, srcLength); }
04412 
04413 inline UnicodeString&
04414 UnicodeString::insert(int32_t start,
04415               const UnicodeString& srcText)
04416 { return doReplace(start, 0, srcText, 0, srcText.length()); }
04417 
04418 inline UnicodeString&
04419 UnicodeString::insert(int32_t start,
04420               const UChar *srcChars,
04421               int32_t srcStart,
04422               int32_t srcLength)
04423 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
04424 
04425 inline UnicodeString&
04426 UnicodeString::insert(int32_t start,
04427               const UChar *srcChars,
04428               int32_t srcLength)
04429 { return doReplace(start, 0, srcChars, 0, srcLength); }
04430 
04431 inline UnicodeString&
04432 UnicodeString::insert(int32_t start,
04433               UChar srcChar)
04434 { return doReplace(start, 0, &srcChar, 0, 1); }
04435 
04436 inline UnicodeString&
04437 UnicodeString::insert(int32_t start,
04438               UChar32 srcChar)
04439 { return replace(start, 0, srcChar); }
04440 
04441 
04442 inline UnicodeString&
04443 UnicodeString::remove()
04444 {
04445   // remove() of a bogus string makes the string empty and non-bogus
04446   // we also un-alias a read-only alias to deal with NUL-termination
04447   // issues with getTerminatedBuffer()
04448   if(fFlags & (kIsBogus|kBufferIsReadonly)) {
04449     setToEmpty();
04450   } else {
04451     fShortLength = 0;
04452   }
04453   return *this;
04454 }
04455 
04456 inline UnicodeString&
04457 UnicodeString::remove(int32_t start,
04458              int32_t _length)
04459 {
04460     if(start <= 0 && _length == INT32_MAX) {
04461         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
04462         return remove();
04463     }
04464     return doReplace(start, _length, NULL, 0, 0);
04465 }
04466 
04467 inline UnicodeString&
04468 UnicodeString::removeBetween(int32_t start,
04469                 int32_t limit)
04470 { return doReplace(start, limit - start, NULL, 0, 0); }
04471 
04472 inline UnicodeString &
04473 UnicodeString::retainBetween(int32_t start, int32_t limit) {
04474   truncate(limit);
04475   return doReplace(0, start, NULL, 0, 0);
04476 }
04477 
04478 inline UBool
04479 UnicodeString::truncate(int32_t targetLength)
04480 {
04481   if(isBogus() && targetLength == 0) {
04482     // truncate(0) of a bogus string makes the string empty and non-bogus
04483     unBogus();
04484     return FALSE;
04485   } else if((uint32_t)targetLength < (uint32_t)length()) {
04486     setLength(targetLength);
04487     if(fFlags&kBufferIsReadonly) {
04488       fUnion.fFields.fCapacity = targetLength;  // not NUL-terminated any more
04489     }
04490     return TRUE;
04491   } else {
04492     return FALSE;
04493   }
04494 }
04495 
04496 inline UnicodeString&
04497 UnicodeString::reverse()
04498 { return doReverse(0, length()); }
04499 
04500 inline UnicodeString&
04501 UnicodeString::reverse(int32_t start,
04502                int32_t _length)
04503 { return doReverse(start, _length); }
04504 
04505 U_NAMESPACE_END
04506 
04507 #endif

Generated on 25 Nov 2014 for ICU 50.1.2 by  doxygen 1.4.7