alphaindex.h

Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 2011-2012 International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 */
00009 
00010 #ifndef INDEXCHARS_H
00011 #define INDEXCHARS_H
00012 
00013 #include "unicode/utypes.h"
00014 #include "unicode/uobject.h"
00015 #include "unicode/locid.h"
00016 
00017 
00018 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
00019 
00025 U_CDECL_BEGIN
00026 
00033 typedef enum UAlphabeticIndexLabelType {
00039     U_ALPHAINDEX_NORMAL    = 0,
00040 
00046     U_ALPHAINDEX_UNDERFLOW = 1,
00047 
00056     U_ALPHAINDEX_INFLOW    = 2,
00057 
00063     U_ALPHAINDEX_OVERFLOW  = 3
00064 } UAlphabeticIndexLabelType;
00065 
00066 
00067 struct UHashtable;
00068 U_CDECL_END
00069 
00070 U_NAMESPACE_BEGIN
00071 
00072 // Forward Declarations
00073 
00074 class Collator;
00075 class RuleBasedCollator;
00076 class StringEnumeration;
00077 class UnicodeSet;
00078 class UVector;
00079 
00080 
00081 
00165 class U_I18N_API AlphabeticIndex: public UObject {
00166 
00167   public:
00168 
00181      AlphabeticIndex(const Locale &locale, UErrorCode &status);
00182 
00183 
00184 
00195      virtual AlphabeticIndex &addLabels(const UnicodeSet &additions, UErrorCode &status);
00196 
00210      virtual AlphabeticIndex &addLabels(const Locale &locale, UErrorCode &status);
00211 
00216      virtual ~AlphabeticIndex();
00217 
00218 
00231     virtual const RuleBasedCollator &getCollator() const;
00232 
00233 
00242     virtual const UnicodeString &getInflowLabel() const;
00243 
00255     virtual AlphabeticIndex &setInflowLabel(const UnicodeString &inflowLabel, UErrorCode &status);
00256 
00257 
00258 
00266     virtual const UnicodeString &getOverflowLabel() const;
00267 
00268 
00278     virtual AlphabeticIndex &setOverflowLabel(const UnicodeString &overflowLabel, UErrorCode &status);
00279 
00287     virtual const UnicodeString &getUnderflowLabel() const;
00288 
00298     virtual AlphabeticIndex &setUnderflowLabel(const UnicodeString &underflowLabel, UErrorCode &status);
00299 
00300 
00308     virtual int32_t getMaxLabelCount() const;
00309 
00322     virtual AlphabeticIndex &setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status);
00323 
00324 
00337     virtual const UnicodeString &getOverflowComparisonString(const UnicodeString &lowerLimit,
00338                                                              UErrorCode &status);
00339 
00340 
00357     virtual AlphabeticIndex &addRecord(const UnicodeString &name, const void *data, UErrorCode &status);
00358 
00367     virtual AlphabeticIndex &clearRecords(UErrorCode &status);
00368 
00369 
00378     virtual int32_t  getBucketCount(UErrorCode &status);
00379 
00380 
00389     virtual int32_t  getRecordCount(UErrorCode &status);
00390 
00391 
00392 
00405     virtual int32_t  getBucketIndex(const UnicodeString &itemName, UErrorCode &status);
00406 
00407 
00414     virtual int32_t  getBucketIndex() const;
00415 
00416 
00428     virtual UBool nextBucket(UErrorCode &status);
00429 
00438     virtual const UnicodeString &getBucketLabel() const;
00439 
00447     virtual UAlphabeticIndexLabelType getBucketLabelType() const;
00448 
00457     virtual int32_t getBucketRecordCount() const;
00458 
00459 
00468     virtual AlphabeticIndex &resetBucketIterator(UErrorCode &status);
00469 
00481     virtual UBool nextRecord(UErrorCode &status);
00482 
00491     virtual const UnicodeString &getRecordName() const;
00492 
00493 
00502     virtual const void *getRecordData() const;
00503 
00504 
00511     virtual AlphabeticIndex &resetRecordIterator();
00512 
00513 private:
00514     // No ICU "poor man's RTTI" for this class nor its subclasses.
00515     virtual UClassID getDynamicClassID() const;
00516 
00521      AlphabeticIndex(const AlphabeticIndex &other);
00522 
00526      AlphabeticIndex &operator =(const AlphabeticIndex & /*other*/) { return *this;};
00527 
00532      virtual UBool operator==(const AlphabeticIndex& other) const;
00533 
00538      virtual UBool operator!=(const AlphabeticIndex& other) const;
00539 
00540      // Common initialization, for use from all constructors.
00541      void init(UErrorCode &status);
00542 
00543      // Initialize & destruct static constants used by this class.
00544      static void staticInit(UErrorCode &status);
00545 
00546      // Pinyin stuff.  If the input name is Chinese, add the Pinyin prefix to the dest string.
00547      void hackName(UnicodeString &dest, const UnicodeString &name, const Collator *coll);
00548      void initPinyinBounds(const Collator *coll, UErrorCode &status);
00549 
00550    public:
00551 #ifndef U_HIDE_INTERNAL_API
00552 
00557      static void staticCleanup();
00558 #endif  /* U_HIDE_INTERNAL_API */
00559    private:
00560 
00561      // Add index characters from the specified locale to the dest set.
00562      // Does not remove any previous contents from dest.
00563      static void getIndexExemplars(UnicodeSet &dest, const Locale &locale, UErrorCode &status);
00564 
00565      UVector *firstStringsInScript(UErrorCode &status);
00566 
00567      static UnicodeString separated(const UnicodeString &item);
00568 
00569      static UnicodeSet *getScriptSet(UnicodeSet &dest, const UnicodeString &codePoint, UErrorCode &status);
00570 
00571      void buildIndex(UErrorCode &status);
00572      void buildBucketList(UErrorCode &status);
00573      void bucketRecords(UErrorCode &status);
00574 
00575 
00576   public:
00577 
00578     //  The following internal items are declared public only to allow access from
00579     //  implementation code written in plain C.  They are not intended for
00580     //  public use.
00581 
00582 #ifndef U_HIDE_INTERNAL_API
00583 
00587      struct Record: public UMemory {
00588          AlphabeticIndex     *alphaIndex_;
00589          const UnicodeString  name_;
00590          UnicodeString        sortingName_;  // Usually the same as name_; different for Pinyin.
00591          const void           *data_;
00592          int32_t              serialNumber_;  // Defines sorting order for names that compare equal.
00593          Record(AlphabeticIndex *alphaIndex, const UnicodeString &name, const void *data);
00594          ~Record();
00595      };
00596 #endif  /* U_HIDE_INTERNAL_API */
00597 
00603      UVector  *inputRecords_;
00604 
00610      struct Bucket: public UMemory {
00611          UnicodeString     label_;
00612          UnicodeString     lowerBoundary_;
00613          UAlphabeticIndexLabelType labelType_;
00614          UVector           *records_; // Records are owned by inputRecords_ vector.
00615 
00616          Bucket(const UnicodeString &label,   // Parameter strings are copied.
00617                 const UnicodeString &lowerBoundary,
00618                 UAlphabeticIndexLabelType type, UErrorCode &status);
00619          ~Bucket();
00620      };
00621 
00622   public:
00623 
00628     enum ELangType {
00630         kNormal,
00632         kSimplified,
00634         kTraditional
00635     };
00636 
00641     static ELangType  langTypeFromLocale(const Locale &loc);
00642 
00643 
00644    private:
00645 
00646      // Holds the contents of this index, buckets of user items.
00647      // UVector elements are of type (Bucket *)
00648      UVector *bucketList_;
00649 
00650      int32_t  labelsIterIndex_;      // Index of next item to return.
00651      int32_t  itemsIterIndex_;
00652      Bucket   *currentBucket_;       // While an iteration of the index in underway,
00653                                      //   point to the bucket for the current label.
00654                                      // NULL when no iteration underway.
00655 
00656      UBool    indexBuildRequired_;   //  Caller has made changes to the index that
00657                                      //  require rebuilding & bucketing before the
00658                                      //  contents can be iterated.
00659 
00660      int32_t    maxLabelCount_;      // Limit on # of labels permitted in the index.
00661 
00662      UHashtable *alreadyIn_;         // Key=UnicodeString, value=UnicodeSet
00663 
00664      UnicodeSet *initialLabels_;     // Initial (unprocessed) set of Labels.  Union
00665                                      //   of those explicitly set by the user plus
00666                                      //   those from locales.  Raw values, before
00667                                      //   crunching into bucket labels.
00668 
00669      UVector    *labels_;            // List of Labels, after processing, sorting.
00670                                      //   Contents are (UnicodeString *)
00671 
00672      UnicodeSet *noDistinctSorting_; // As the set of labels is built, strings may 
00673                                      // be discarded from the exemplars. This contains 
00674                                      // some of the discards, and is
00675                                      // intended for debugging.
00676 
00677      UnicodeSet *notAlphabetic_;     // As the set of labels is built, strings may 
00678                                      // be discarded from the exemplars. This contains 
00679                                      // some of the discards, and is
00680                                      // intended for debugging.
00681 
00682 
00683      UVector    *firstScriptCharacters_;  // The first character from each script,
00684                                           //   in collation order.
00685 
00686      Locale    locale_;
00687      Collator  *collator_;
00688      Collator  *collatorPrimaryOnly_;
00689 
00690      UnicodeString  inflowLabel_;
00691      UnicodeString  overflowLabel_;
00692      UnicodeString  underflowLabel_;
00693      UnicodeString  overflowComparisonString_;
00694 
00695      ELangType      langType_;        // The language type, simplified Chinese, Traditional Chinese,
00696                                       //  or not Chinese (Normal).  Part of the Pinyin support
00697 
00698      typedef const UChar PinyinLookup[24][3];
00699      static PinyinLookup   HACK_PINYIN_LOOKUP_SHORT;
00700      static PinyinLookup   HACK_PINYIN_LOOKUP_LONG;
00701      
00702      // These will be lazily set to the short or long tables based on which
00703      //   Chinese collation has been configured into the ICU library.
00704      static PinyinLookup   *HACK_PINYIN_LOOKUP;
00705      static const UChar    *PINYIN_LOWER_BOUNDS;
00706 
00707 
00708 
00709      int32_t    recordCounter_;         // Counts Records created.  For minting record serial numbers.
00710 
00711 // Constants.  Lazily initialized the first time an AlphabeticIndex object is created.
00712 
00713      static UnicodeSet *ALPHABETIC;
00714      static UnicodeSet *CORE_LATIN;
00715      static UnicodeSet *ETHIOPIC;
00716      static UnicodeSet *HANGUL;
00717      static UnicodeSet *IGNORE_SCRIPTS;
00718      static UnicodeSet *TO_TRY;
00719      static UnicodeSet *UNIHAN;
00720      static const UnicodeString *EMPTY_STRING;
00721 
00722 };
00723 
00724 U_NAMESPACE_END
00725 
00726 #endif /* UCONFIG_NO_COLLATION / UCONFIG_NO_NORMALIZATION */
00727 #endif

Generated on 27 Oct 2013 for ICU 50.1.2 by  doxygen 1.4.7