00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef INDEXCHARS_H
00011 #define INDEXCHARS_H
00012
00013 #include "unicode/utypes.h"
00014 #include "unicode/uobject.h"
00015 #include "unicode/locid.h"
00016
00017
00018 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
00019
00025 U_CDECL_BEGIN
00026
00033 typedef enum UAlphabeticIndexLabelType {
00039 U_ALPHAINDEX_NORMAL = 0,
00040
00046 U_ALPHAINDEX_UNDERFLOW = 1,
00047
00056 U_ALPHAINDEX_INFLOW = 2,
00057
00063 U_ALPHAINDEX_OVERFLOW = 3
00064 } UAlphabeticIndexLabelType;
00065
00066
00067 struct UHashtable;
00068 U_CDECL_END
00069
00070 U_NAMESPACE_BEGIN
00071
00072
00073
00074 class Collator;
00075 class RuleBasedCollator;
00076 class StringEnumeration;
00077 class UnicodeSet;
00078 class UVector;
00079
00080
00081
00165 class U_I18N_API AlphabeticIndex: public UObject {
00166
00167 public:
00168
00181 AlphabeticIndex(const Locale &locale, UErrorCode &status);
00182
00183
00184
00195 virtual AlphabeticIndex &addLabels(const UnicodeSet &additions, UErrorCode &status);
00196
00210 virtual AlphabeticIndex &addLabels(const Locale &locale, UErrorCode &status);
00211
00216 virtual ~AlphabeticIndex();
00217
00218
00231 virtual const RuleBasedCollator &getCollator() const;
00232
00233
00242 virtual const UnicodeString &getInflowLabel() const;
00243
00255 virtual AlphabeticIndex &setInflowLabel(const UnicodeString &inflowLabel, UErrorCode &status);
00256
00257
00258
00266 virtual const UnicodeString &getOverflowLabel() const;
00267
00268
00278 virtual AlphabeticIndex &setOverflowLabel(const UnicodeString &overflowLabel, UErrorCode &status);
00279
00287 virtual const UnicodeString &getUnderflowLabel() const;
00288
00298 virtual AlphabeticIndex &setUnderflowLabel(const UnicodeString &underflowLabel, UErrorCode &status);
00299
00300
00308 virtual int32_t getMaxLabelCount() const;
00309
00322 virtual AlphabeticIndex &setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status);
00323
00324
00337 virtual const UnicodeString &getOverflowComparisonString(const UnicodeString &lowerLimit,
00338 UErrorCode &status);
00339
00340
00357 virtual AlphabeticIndex &addRecord(const UnicodeString &name, const void *data, UErrorCode &status);
00358
00367 virtual AlphabeticIndex &clearRecords(UErrorCode &status);
00368
00369
00378 virtual int32_t getBucketCount(UErrorCode &status);
00379
00380
00389 virtual int32_t getRecordCount(UErrorCode &status);
00390
00391
00392
00405 virtual int32_t getBucketIndex(const UnicodeString &itemName, UErrorCode &status);
00406
00407
00414 virtual int32_t getBucketIndex() const;
00415
00416
00428 virtual UBool nextBucket(UErrorCode &status);
00429
00438 virtual const UnicodeString &getBucketLabel() const;
00439
00447 virtual UAlphabeticIndexLabelType getBucketLabelType() const;
00448
00457 virtual int32_t getBucketRecordCount() const;
00458
00459
00468 virtual AlphabeticIndex &resetBucketIterator(UErrorCode &status);
00469
00481 virtual UBool nextRecord(UErrorCode &status);
00482
00491 virtual const UnicodeString &getRecordName() const;
00492
00493
00502 virtual const void *getRecordData() const;
00503
00504
00511 virtual AlphabeticIndex &resetRecordIterator();
00512
00513 private:
00514
00515 virtual UClassID getDynamicClassID() const;
00516
00521 AlphabeticIndex(const AlphabeticIndex &other);
00522
00526 AlphabeticIndex &operator =(const AlphabeticIndex & ) { return *this;};
00527
00532 virtual UBool operator==(const AlphabeticIndex& other) const;
00533
00538 virtual UBool operator!=(const AlphabeticIndex& other) const;
00539
00540
00541 void init(UErrorCode &status);
00542
00543
00544 static void staticInit(UErrorCode &status);
00545
00546
00547 void hackName(UnicodeString &dest, const UnicodeString &name, const Collator *coll);
00548 void initPinyinBounds(const Collator *coll, UErrorCode &status);
00549
00550 public:
00551 #ifndef U_HIDE_INTERNAL_API
00552
00557 static void staticCleanup();
00558 #endif
00559 private:
00560
00561
00562
00563 static void getIndexExemplars(UnicodeSet &dest, const Locale &locale, UErrorCode &status);
00564
00565 UVector *firstStringsInScript(UErrorCode &status);
00566
00567 static UnicodeString separated(const UnicodeString &item);
00568
00569 static UnicodeSet *getScriptSet(UnicodeSet &dest, const UnicodeString &codePoint, UErrorCode &status);
00570
00571 void buildIndex(UErrorCode &status);
00572 void buildBucketList(UErrorCode &status);
00573 void bucketRecords(UErrorCode &status);
00574
00575
00576 public:
00577
00578
00579
00580
00581
00582 #ifndef U_HIDE_INTERNAL_API
00583
00587 struct Record: public UMemory {
00588 AlphabeticIndex *alphaIndex_;
00589 const UnicodeString name_;
00590 UnicodeString sortingName_;
00591 const void *data_;
00592 int32_t serialNumber_;
00593 Record(AlphabeticIndex *alphaIndex, const UnicodeString &name, const void *data);
00594 ~Record();
00595 };
00596 #endif
00597
00603 UVector *inputRecords_;
00604
00610 struct Bucket: public UMemory {
00611 UnicodeString label_;
00612 UnicodeString lowerBoundary_;
00613 UAlphabeticIndexLabelType labelType_;
00614 UVector *records_;
00615
00616 Bucket(const UnicodeString &label,
00617 const UnicodeString &lowerBoundary,
00618 UAlphabeticIndexLabelType type, UErrorCode &status);
00619 ~Bucket();
00620 };
00621
00622 public:
00623
00628 enum ELangType {
00630 kNormal,
00632 kSimplified,
00634 kTraditional
00635 };
00636
00641 static ELangType langTypeFromLocale(const Locale &loc);
00642
00643
00644 private:
00645
00646
00647
00648 UVector *bucketList_;
00649
00650 int32_t labelsIterIndex_;
00651 int32_t itemsIterIndex_;
00652 Bucket *currentBucket_;
00653
00654
00655
00656 UBool indexBuildRequired_;
00657
00658
00659
00660 int32_t maxLabelCount_;
00661
00662 UHashtable *alreadyIn_;
00663
00664 UnicodeSet *initialLabels_;
00665
00666
00667
00668
00669 UVector *labels_;
00670
00671
00672 UnicodeSet *noDistinctSorting_;
00673
00674
00675
00676
00677 UnicodeSet *notAlphabetic_;
00678
00679
00680
00681
00682
00683 UVector *firstScriptCharacters_;
00684
00685
00686 Locale locale_;
00687 Collator *collator_;
00688 Collator *collatorPrimaryOnly_;
00689
00690 UnicodeString inflowLabel_;
00691 UnicodeString overflowLabel_;
00692 UnicodeString underflowLabel_;
00693 UnicodeString overflowComparisonString_;
00694
00695 ELangType langType_;
00696
00697
00698 typedef const UChar PinyinLookup[24][3];
00699 static PinyinLookup HACK_PINYIN_LOOKUP_SHORT;
00700 static PinyinLookup HACK_PINYIN_LOOKUP_LONG;
00701
00702
00703
00704 static PinyinLookup *HACK_PINYIN_LOOKUP;
00705 static const UChar *PINYIN_LOWER_BOUNDS;
00706
00707
00708
00709 int32_t recordCounter_;
00710
00711
00712
00713 static UnicodeSet *ALPHABETIC;
00714 static UnicodeSet *CORE_LATIN;
00715 static UnicodeSet *ETHIOPIC;
00716 static UnicodeSet *HANGUL;
00717 static UnicodeSet *IGNORE_SCRIPTS;
00718 static UnicodeSet *TO_TRY;
00719 static UnicodeSet *UNIHAN;
00720 static const UnicodeString *EMPTY_STRING;
00721
00722 };
00723
00724 U_NAMESPACE_END
00725
00726 #endif
00727 #endif