00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifndef __ZUnicode__
00026 #define __ZUnicode__ 1
00027 #include "zconfig.h"
00028
00029 #include "ZTypes.h"
00030 #include <string>
00031
00032
00033
00034 namespace ZUnicode {
00035
00036
00037 template <int s> struct Types_T {};
00038
00039 template <> struct Types_T<4>
00040 {
00041 typedef wchar_t utf32_t;
00042 typedef uint16 utf16_t;
00043 };
00044
00045 template <> struct Types_T<2>
00046 {
00047 typedef uint32 utf32_t;
00048 typedef wchar_t utf16_t;
00049 };
00050 }
00051
00052
00053 typedef ZUnicode::Types_T<sizeof(wchar_t)>::utf32_t UTF32;
00054 typedef ZUnicode::Types_T<sizeof(wchar_t)>::utf16_t UTF16;
00055 typedef char UTF8;
00056
00058 typedef std::basic_string<UTF32> string32;
00059
00061 typedef std::basic_string<UTF16> string16;
00062
00065 typedef std::basic_string<UTF8> string8;
00066
00067 namespace ZUnicode {
00070 extern const uint8 sUTF8SequenceLength[256];
00071 extern const uint8 sUTF8StartByteMark[7];
00072 extern const uint8 sUTF8StartByteMask[7];
00073
00075 static const UTF32 kCPEOF = UTF32(-1);
00076
00078 static const UTF32 kCPMaxUCS2 = 0xFFFFul;
00079
00081 static const UTF32 kCPMaxUTF = 0x10FFFFul;
00082
00084 static const UTF32 kCPMaxUCS4 = 0x7FFFFFFFul;
00085
00087 static const UTF32 kCPReplacement = 0xFFFDul;
00088
00090 static const UTF32 kCPSurrogateHighBegin = 0xD800ul;
00091 static const UTF32 kCPSurrogateHighEnd = 0xDC00ul;
00092 static const UTF32 kCPSurrogateLowBegin = 0xDC00ul;
00093 static const UTF32 kCPSurrogateLowEnd = 0xE000ul;
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110 inline bool sIsValidCP(uint32 iCP)
00111 {
00112 return iCP < kCPSurrogateHighBegin || (iCP >= kCPSurrogateLowEnd && iCP <= kCPMaxUTF);
00113 }
00114
00115 inline UTF32 sUTF32FromSurrogates(uint32 hi, uint32 lo)
00116 {
00117 static const int kSurrogateShift = 10;
00118 static const uint32 kSurrogateBase = 0x10000ul;
00119 return kSurrogateBase
00120 + ((hi - uint32(kCPSurrogateHighBegin)) << kSurrogateShift)
00121 + (lo - uint32(kCPSurrogateLowBegin));
00122 }
00123
00124 inline bool sIsLowSurrogate(uint32 iCU)
00125 {
00126 return iCU >= uint32(kCPSurrogateLowBegin) && iCU < uint32(kCPSurrogateLowEnd);
00127 }
00128
00129 inline bool sIsSmallNormal(uint32 iCU)
00130 {
00131 return iCU < uint32(kCPSurrogateHighBegin);
00132 }
00133
00134 inline bool sIsSmallNormalOrHighSurrogate(uint32 iCU)
00135 {
00136 return iCU < uint32(kCPSurrogateLowBegin);
00137 }
00138
00139 inline bool sIsBigNormalOrBeyond(uint32 iCU)
00140 {
00141 return iCU >= uint32(kCPSurrogateLowEnd);
00142 }
00143
00144 inline bool sIsContinuation(uint8 iCU)
00145 {
00146 return (iCU & 0xC0) == 0x80;
00147 }
00148
00149 inline void sAppendContinuation(uint32& ioCP, uint8 iContinuation)
00150 {
00151 ioCP = (ioCP << 6) + (iContinuation & 0x3F);
00152 }
00153 }
00154
00155
00156
00158 string16 operator+(UTF32 iCP, const string16& iString);
00159
00161 string16& operator+=(string16& ioString, UTF32 iCP);
00162
00164 inline string16 operator+(const string16& iString, UTF32 iCP)
00165 {
00166 string16 temp = iString;
00167 return temp += iCP;
00168 }
00169
00170
00172 string8 operator+(UTF32 iCP, const string8& iString);
00173
00175 string8& operator+=(string8& ioString, UTF32 iCP);
00176
00178 inline string8 operator+(const string8& iString, UTF32 iCP)
00179 {
00180 string8 temp = iString;
00181 return temp += iCP;
00182 }
00183
00184
00185 #pragma mark -
00186 #pragma mark * Include gnarly template stuff
00187
00188 #include "ZUnicodePriv.h"
00189
00190
00191 #pragma mark -
00192 #pragma mark * ZUnicode
00193
00194 namespace ZUnicode {
00195
00196
00197
00198
00199
00200
00201
00206 template <class I>
00207 inline size_t sCountCU(I iSource)
00208 { return Functions_CountCU_T<I>::sCountCU(iSource); }
00209
00212 template <class I>
00213 inline size_t sCountCP(I iSource)
00214 { return Functions_Count_T<I>::sCountCP(iSource); }
00215
00218 template <class I>
00219 inline void sCount(I iSource, size_t* oCountCU, size_t* oCountCP)
00220 { Functions_Count_T<I>::sCount(iSource, oCountCU, oCountCP); }
00222
00223
00228 template <class I>
00229 inline size_t sCPToCU(I iSource, size_t iCountCP)
00230 { return Functions_Count_T<I>::sCPToCU(iSource, iCountCP); }
00231
00235 template <class I>
00236 inline size_t sCPToCU(I iSource, size_t iCountCU, size_t iCountCP, size_t* oCountCP)
00237 { return Functions_Count_T<I>::sCPToCU(iSource, iCountCU, iCountCP, oCountCP); }
00238
00242 template <class I>
00243 inline size_t sCPToCU(I iSource, I iEnd, size_t iCountCP, size_t* oCountCP)
00244 { return Functions_Count_T<I>::sCPToCU(iSource, iSource, iCountCP, oCountCP); }
00245
00248 template <class I>
00249 inline size_t sCUToCP(I iSource, size_t iCountCU)
00250 { return Functions_Count_T<I>::sCUToCP(iSource, iCountCU); }
00251
00254 template <class I>
00255 inline size_t sCUToCP(I iSource, I iEnd)
00256 { return Functions_Count_T<I>::sCUToCP(iSource, iEnd); }
00258
00259
00264 template <class I>
00265 inline void sAlign(I& ioCurrent)
00266 { Functions_Read_T<I>::sAlign(ioCurrent); }
00267
00270 template <class I>
00271 inline void sAlign(I& ioCurrent, I iEnd)
00272 { Functions_Read_T<I>::sAlign(ioCurrent, iEnd); }
00274
00275
00279 template <class I>
00280 inline void sInc(I& ioCurrent)
00281 { return Functions_Read_T<I>::sInc(ioCurrent); }
00282
00286 template <class I>
00287 inline bool sInc(I& ioCurrent, I iEnd)
00288 { return Functions_Read_T<I>::sInc(ioCurrent, iEnd); }
00289
00291 template <class I>
00292 inline void sDec(I& ioCurrent)
00293 { return Functions_Read_T<I>::sDec(ioCurrent); }
00294
00299 template <class I>
00300 inline bool sDec(I iStart, I& ioCurrent, I iEnd)
00301 { return Functions_Read_T<I>::sDec(iStart, ioCurrent, iEnd); }
00302
00304 template <class I>
00305 inline UTF32 sRead(I iCurrent)
00306 { return Functions_Read_T<I>::sRead(iCurrent); }
00307
00310 template <class I>
00311 inline bool sRead(I iCurrent, I iEnd, UTF32& oCP)
00312 { return Functions_Read_T<I>::sRead(iCurrent, iEnd, oCP); }
00313
00317 template <class I>
00318 inline UTF32 sReadInc(I& ioCurrent)
00319 { return Functions_Read_T<I>::sReadInc(ioCurrent); }
00320
00325 template <class I>
00326 inline bool sReadInc(I& ioCurrent, I iEnd, UTF32& oCP)
00327 { return Functions_Read_T<I>::sReadInc(ioCurrent, iEnd, oCP); }
00328
00335 template <class I>
00336 inline bool sReadInc(I& ioCurrent, I iEnd, UTF32& oCP, size_t& ioCountSkipped)
00337 { return Functions_Read_T<I>::sReadInc(ioCurrent, iEnd, oCP, ioCountSkipped); }
00338
00340 template <class I>
00341 inline UTF32 sDecRead(I& ioCurrent)
00342 { return Functions_Read_T<I>::sDecRead(ioCurrent); }
00343
00346 template <class I>
00347 inline bool sDecRead(I iStart, I& ioCurrent, I iEnd, UTF32& oCP)
00348 { return Functions_Read_T<I>::sDecRead(iStart, ioCurrent, iEnd, oCP); }
00349
00353 template <class I>
00354 inline bool sWrite(I iDest, I iEnd, UTF32 iCP)
00355 { return Functions_Write_T<I>::sWrite(iDest, iEnd, iCP); }
00356
00361 template <class I>
00362 inline bool sWriteInc(I& ioDest, I iEnd, UTF32 iCP)
00363 { return Functions_Write_T<I>::sWriteInc(ioDest, iEnd, iCP); }
00365
00366
00369 template <class I>
00370 inline string32 sAsUTF32(I iSource, size_t iCountCU)
00371 { return Functions_Convert_T<I>::sAsUTF32(iSource, iCountCU); }
00372
00373 template <class I>
00374 inline string32 sAsUTF32(I iSource, I iEnd)
00375 { return Functions_Convert_T<I>::sAsUTF32(iSource, iEnd); }
00376
00377 inline string32 sAsUTF32(const UTF32* iString)
00378 { return string32(iString); }
00379
00380 inline string32 sAsUTF32(const UTF32* iString, size_t iCountCU)
00381 { return string32(iString, iCountCU); }
00382
00383 inline string32 sAsUTF32(const string32& iString)
00384 { return iString; }
00385
00386 inline string32 sAsUTF32(const string16& iString)
00387 {
00388 return Functions_Convert_T<string16::const_iterator>::sAsUTF32(iString.begin(), iString.size());
00389 }
00390
00391 inline string32 sAsUTF32(const string8& iString)
00392 {
00393 return Functions_Convert_T<string8::const_iterator>::sAsUTF32(iString.begin(), iString.size());
00394 }
00396
00397
00400 template <class I>
00401 inline string16 sAsUTF16(I iSource, size_t iCountCU)
00402 { return Functions_Convert_T<I>::sAsUTF16(iSource, iCountCU); }
00403
00404 template <class I>
00405 inline string16 sAsUTF16(I iSource, I iEnd)
00406 { return Functions_Convert_T<I>::sAsUTF16(iSource, iEnd); }
00407
00408 inline string16 sAsUTF16(const UTF16* iString)
00409 { return string16(iString); }
00410
00411 inline string16 sAsUTF16(const UTF16* iString, size_t iCountCU)
00412 { return string16(iString, iCountCU); }
00413
00414 inline string16 sAsUTF16(const string32& iString)
00415 {
00416 return Functions_Convert_T<string32::const_iterator>::sAsUTF16(iString.begin(), iString.size());
00417 }
00418
00419 inline string16 sAsUTF16(const string16& iString)
00420 { return iString; }
00421
00422 #if 1
00423 string16 sAsUTF16(const string8& iString);
00424 #else
00425 inline string16 sAsUTF16(const string8& iString)
00426 {
00427 return Functions_Convert_T<string8::const_iterator>::sAsUTF16(iString.begin(), iString.size());
00428 }
00429 #endif
00430
00431
00432
00435 template <class I>
00436 inline string8 sAsUTF8(I iSource, size_t iCountCU)
00437 { return Functions_Convert_T<I>::sAsUTF8(iSource, iCountCU); }
00438
00439 template <class I>
00440 inline string8 sAsUTF8(I iSource, I iEnd)
00441 { return Functions_Convert_T<I>::sAsUTF8(iSource, iEnd); }
00442
00443 inline string8 sAsUTF8(const UTF8* iString)
00444 { return string8(iString); }
00445
00446 inline string8 sAsUTF8(const UTF8* iString, size_t iCountCU)
00447 { return string8(iString, iCountCU); }
00448
00449 inline string8 sAsUTF8(const string32& iString)
00450 {
00451 return Functions_Convert_T<string32::const_iterator>::sAsUTF8(iString.begin(), iString.size());
00452 }
00453
00454 inline string8 sAsUTF8(const string16& iString)
00455 {
00456 return Functions_Convert_T<string16::const_iterator>::sAsUTF8(iString.begin(), iString.size());
00457 }
00458
00459 inline string8 sAsUTF8(const string8& iString)
00460 { return iString; }
00462
00463
00466 void sUTF32ToUTF8(const UTF32* iSource, size_t iSourceCount,
00467 size_t* oSourceCount,
00468 UTF8* iDest, size_t iDestCU,
00469 size_t* oDestCU, size_t* oCountCP);
00470
00471 void sUTF32ToUTF8(const UTF32* iSource, size_t iSourceCount,
00472 size_t* oSourceCount, size_t* oSourceCountSkipped,
00473 UTF8* iDest, size_t iDestCU,
00474 size_t* oDestCU, size_t* oCountCP);
00475
00476
00477 bool sUTF8ToUTF32(const UTF8* iSource, size_t iSourceCU,
00478 size_t* oSourceCU,
00479 UTF32* iDest, size_t iDestCount,
00480 size_t* oDestCount);
00481
00482 bool sUTF8ToUTF32(const UTF8* iSource, size_t iSourceCU,
00483 size_t* oSourceCU, size_t* oSourceCUSkipped,
00484 UTF32* iDest, size_t iDestCount,
00485 size_t* oDestCount);
00486
00487
00488 void sUTF32ToUTF16(const UTF32* iSource, size_t iSourceCount,
00489 size_t* oSourceCount,
00490 UTF16* iDest, size_t iDestCU,
00491 size_t* oDestCU, size_t* oCountCP);
00492
00493 void sUTF32ToUTF16(const UTF32* iSource, size_t iSourceCount,
00494 size_t* oSourceCount, size_t* oSourceCountSkipped,
00495 UTF16* iDest, size_t iDestCU,
00496 size_t* oDestCU, size_t* oCountCP);
00497
00498
00499 bool sUTF16ToUTF32(const UTF16* iSource, size_t iSourceCU,
00500 size_t* oSourceCU,
00501 UTF32* iDest, size_t iDestCount,
00502 size_t* oDestCount);
00503
00504 bool sUTF16ToUTF32(const UTF16* iSource, size_t iSourceCU,
00505 size_t* oSourceCU, size_t* oSourceCUSkipped,
00506 UTF32* iDest, size_t iDestCount,
00507 size_t* oDestCount);
00508
00509
00510 bool sUTF16ToUTF8(const UTF16* iSource, size_t iSourceCU,
00511 size_t* oSourceCU,
00512 UTF8* iDest, size_t iDestCU,
00513 size_t* oDestCU, size_t iMaxCP, size_t* oCountCP);
00514
00515 bool sUTF16ToUTF8(const UTF16* iSource, size_t iSourceCU,
00516 size_t* oSourceCU, size_t* oSourceCUSkipped,
00517 UTF8* iDest, size_t iDestCU,
00518 size_t* oDestCU, size_t iMaxCP, size_t* oCountCP);
00519
00520
00521 bool sUTF8ToUTF16(const UTF8* iSource, size_t iSourceCU,
00522 size_t* oSourceCU,
00523 UTF16* iDest, size_t iDestCU,
00524 size_t* oDestCU, size_t iMaxCP, size_t* oCountCP);
00525
00526 bool sUTF8ToUTF16(const UTF8* iSource, size_t iSourceCU,
00527 size_t* oSourceCU, size_t* oSourceCUSkipped,
00528 UTF16* iDest, size_t iDestCU,
00529 size_t* oDestCU, size_t iMaxCP, size_t* oCountCP);
00531
00532
00535 bool sIsValid(UTF32 iCP);
00536 bool sIsAlpha(UTF32 iCP);
00537 bool sIsDigit(UTF32 iCP);
00538 bool sIsAlphaDigit(UTF32 iCP);
00539 bool sIsWhitespace(UTF32 iCP);
00540 bool sIsEOL(UTF32 iCP);
00542
00543
00546 UTF32 sToUpper(UTF32 iCP);
00547 UTF32 sToLower(UTF32 iCP);
00548
00549 string8 sToLower(const string8& iString);
00550 string8 sToUpper(const string8& iString);
00551
00552 int sHexValue(UTF32 iCP);
00554
00555 }
00556
00557 #endif // __ZUnicode__