00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifndef __ZUnicodePrivB__
00026 #define __ZUnicodePrivB__ 1
00027 #include "zconfig.h"
00028
00029 #include "ZUnicode.h"
00030
00031 namespace ZUnicode {
00032
00033
00034 #pragma mark -
00035 #pragma mark * ZUnicode::Functions_CountCU
00036
00037 template <class I>
00038 size_t Functions_CountCU_T<I>::sCountCU(I iSource)
00039 {
00040 I localSource = iSource;
00041 while (0 != *localSource)
00042 ++localSource;
00043 return localSource - iSource;
00044 }
00045
00046
00047 #pragma mark -
00048 #pragma mark * ZUnicode::Functions_Count
00049
00050 template <class I>
00051 size_t Functions_Count_T<I>::sCountCP(I iSource)
00052 {
00053 size_t count = 0;
00054 while (Functions_Read_T<I>::sReadInc(iSource))
00055 ++count;
00056 return count;
00057 }
00058
00059 template <class I>
00060 void Functions_Count_T<I>::sCount(I iSource, size_t* oCountCU, size_t* oCountCP)
00061 {
00062 if (oCountCU)
00063 {
00064 if (oCountCP)
00065 {
00066 size_t count = 0;
00067 I localSource = iSource;
00068 while (Functions_Read_T<I>::sReadInc(localSource))
00069 ++count;
00070 *oCountCU = localSource - iSource;
00071 *oCountCP = count;
00072 }
00073 else
00074 {
00075 *oCountCU = Functions_CountCU_T<I>::sCountCU(iSource);
00076 }
00077 }
00078 else
00079 {
00080 if (oCountCP)
00081 *oCountCP = sCountCP(iSource);
00082 }
00083 }
00084
00085 template <class I>
00086 size_t Functions_Count_T<I>::sCPToCU(I iSource, size_t iCountCP)
00087 {
00088 I localSource = iSource;
00089 while (iCountCP--)
00090 Functions_Read_T<I>::sInc(localSource);
00091 return localSource - iSource;
00092 }
00093
00094 template <class I>
00095 size_t Functions_Count_T<I>::sCPToCU(I iSource, size_t iCountCU, size_t iCountCP, size_t* oCountCP)
00096 {
00097 I localSource = iSource;
00098 I localEnd = iSource + iCountCU;
00099 size_t countRemaining = iCountCP;
00100 if (countRemaining)
00101 {
00102 ++countRemaining;
00103 while (--countRemaining)
00104 {
00105 if (!Functions_Read_T<I>::sInc(localSource, localEnd))
00106 break;
00107 }
00108 }
00109 if (oCountCP)
00110 *oCountCP = iCountCP - countRemaining;
00111 return localSource - iSource;
00112 }
00113
00114 template <class I>
00115 size_t Functions_Count_T<I>::sCPToCU(I iSource, I iEnd, size_t iCountCP, size_t* oCountCP)
00116 {
00117 I localSource = iSource;
00118 size_t countRemaining = iCountCP;
00119 if (countRemaining)
00120 {
00121 ++countRemaining;
00122 while (--countRemaining)
00123 {
00124 if (!Functions_Read_T<I>::sInc(localSource, iEnd))
00125 break;
00126 }
00127 }
00128 if (oCountCP)
00129 *oCountCP = iCountCP - countRemaining;
00130 return localSource - iSource;
00131 }
00132
00133 template <class I>
00134 size_t Functions_Count_T<I>::sCUToCP(I iSource, size_t iCountCU)
00135 {
00136 size_t countCP = 0;
00137 I localEnd = iSource + iCountCU;
00138 for (;;)
00139 {
00140 if (!Functions_Read_T<I>::sInc(iSource, localEnd))
00141 break;
00142 ++countCP;
00143 }
00144 return countCP;
00145 }
00146
00147 template <class I>
00148 size_t Functions_Count_T<I>::sCUToCP(I iSource, I iEnd)
00149 {
00150 size_t countCP = 0;
00151 for (;;)
00152 {
00153 if (!Functions_Read_T<I>::sInc(iSource, iEnd))
00154 break;
00155 ++countCP;
00156 }
00157 return countCP;
00158 }
00159
00160
00161 #pragma mark -
00162 #pragma mark * ZUnicode::Functions_Read_T<I, UTF32>
00163
00164 template <class I>
00165 struct Functions_Read_T<I, UTF32>
00166 {
00167 static void sAlign(I& ioCurrent);
00168 static void sAlign(I& ioCurrent, I iEnd);
00169
00170 static void sInc(I& ioCurrent);
00171 static bool sInc(I& ioCurrent, I iEnd);
00172
00173 static void sDec(I& ioCurrent);
00174 static bool sDec(I iStart, I& ioCurrent, I iEnd);
00175
00176 static UTF32 sRead(I iCurrent);
00177 static bool sRead(I iCurrent, I iEnd, UTF32& oCP);
00178
00179 static UTF32 sReadInc(I& ioCurrent);
00180 static bool sReadInc(I& ioCurrent, I iEnd, UTF32& oCP);
00181 static bool sReadInc(I& ioCurrent, I iEnd, UTF32& oCP, size_t& ioCountSkipped);
00182
00183 static UTF32 sDecRead(I& ioCurrent);
00184 static bool sDecRead(I iStart, I& ioCurrent, I iEnd, UTF32& oCP);
00185 };
00186
00187 template <class I>
00188 void Functions_Read_T<I, UTF32>::sAlign(I& ioCurrent)
00189 {
00190 for (;;)
00191 {
00192 if (sIsValidCP(*ioCurrent))
00193 break;
00194 ++ioCurrent;
00195 }
00196 }
00197
00198 template <class I>
00199 void Functions_Read_T<I, UTF32>::sAlign(I& ioCurrent, I iEnd)
00200 {
00201 while (ioCurrent < iEnd)
00202 {
00203 if (sIsValidCP(*ioCurrent))
00204 break;
00205 ++ioCurrent;
00206 }
00207 }
00208
00209 template <class I>
00210 void Functions_Read_T<I, UTF32>::sInc(I& ioCurrent)
00211 {
00212 for (;;)
00213 {
00214 if (sIsValidCP(*ioCurrent++))
00215 return;
00216 }
00217 }
00218
00219 template <class I>
00220 bool Functions_Read_T<I, UTF32>::sInc(I& ioCurrent, I iEnd)
00221 {
00222 for (;;)
00223 {
00224 if (ioCurrent >= iEnd)
00225 {
00226
00227 return false;
00228 }
00229
00230 if (sIsValidCP(*ioCurrent++))
00231 return true;
00232 }
00233 }
00234
00235 template <class I>
00236 void Functions_Read_T<I, UTF32>::sDec(I& ioCurrent)
00237 {
00238 for (;;)
00239 {
00240 if (sIsValidCP(*--ioCurrent))
00241 return;
00242 }
00243 }
00244
00245 template <class I>
00246 bool Functions_Read_T<I, UTF32>::sDec(I iStart, I& ioCurrent, I iEnd)
00247 {
00248 for (;;)
00249 {
00250 if (iStart >= ioCurrent)
00251 {
00252
00253 return false;
00254 }
00255 if (sIsValidCP(*--ioCurrent))
00256 return true;
00257 }
00258 }
00259
00260 template <class I>
00261 UTF32 Functions_Read_T<I, UTF32>::sRead(I iCurrent)
00262 {
00263 return sReadInc(iCurrent);
00264 }
00265
00266 template <class I>
00267 bool Functions_Read_T<I, UTF32>::sRead(I iCurrent, I iEnd, UTF32& oCP)
00268 {
00269 return sReadInc(iCurrent, iEnd, oCP);
00270 }
00271
00272 template <class I>
00273 UTF32 Functions_Read_T<I, UTF32>::sReadInc(I& ioCurrent)
00274 {
00275 for (;;)
00276 {
00277 uint32 theCU = *ioCurrent++;
00278 if (sIsValidCP(theCU))
00279 return theCU;
00280 }
00281 }
00282
00283 template <class I>
00284 bool Functions_Read_T<I, UTF32>::sReadInc(I& ioCurrent, I iEnd, UTF32& oCP)
00285 {
00286 for (;;)
00287 {
00288 if (ioCurrent >= iEnd)
00289 {
00290
00291 return false;
00292 }
00293
00294 uint32 theCU = *ioCurrent++;
00295 if (sIsValidCP(theCU))
00296 {
00297 oCP = theCU;
00298 return true;
00299 }
00300 }
00301 }
00302
00303 template <class I>
00304 bool Functions_Read_T<I, UTF32>::sReadInc(I& ioCurrent, I iEnd, UTF32& oCP, size_t& ioCountSkipped)
00305 {
00306 for (;;)
00307 {
00308 if (ioCurrent >= iEnd)
00309 {
00310
00311 return false;
00312 }
00313
00314 uint32 theCU = *ioCurrent++;
00315 if (sIsValidCP(theCU))
00316 {
00317 oCP = theCU;
00318 return true;
00319 }
00320 ++ioCountSkipped;
00321 }
00322 }
00323
00324 template <class I>
00325 UTF32 Functions_Read_T<I, UTF32>::sDecRead(I& ioCurrent)
00326 {
00327 for (;;)
00328 {
00329 uint32 theCP = *--ioCurrent;
00330 if (sIsValidCP(theCP))
00331 return theCP;
00332 }
00333 }
00334
00335 template <class I>
00336 bool Functions_Read_T<I, UTF32>::sDecRead(I iStart, I& ioCurrent, I iEnd, UTF32& oCP)
00337 {
00338 for (;;)
00339 {
00340 if (iStart >= ioCurrent)
00341 {
00342
00343 return false;
00344 }
00345 uint32 theCP = *--ioCurrent;
00346 if (sIsValidCP(theCP))
00347 {
00348 oCP = theCP;
00349 return true;
00350 }
00351 }
00352 }
00353
00354
00355 #pragma mark -
00356 #pragma mark * ZUnicode::Functions_Write_T<I, UTF32>
00357
00358 template <class I>
00359 struct Functions_Write_T<I, UTF32>
00360 {
00361 static bool sWrite(I iDest, I iEnd, UTF32 iCP);
00362 static bool sWriteInc(I& ioDest, I iEnd, UTF32 iCP);
00363 };
00364
00365 template <class I>
00366 bool Functions_Write_T<I, UTF32>::sWrite(I iDest, I iEnd, UTF32 iCP)
00367 {
00368 return sWriteInc(iDest, iEnd, iCP);
00369 }
00370
00371 template <class I>
00372 bool Functions_Write_T<I, UTF32>::sWriteInc(I& ioDest, I iEnd, UTF32 iCP)
00373 {
00374 if (sIsValidCP(iCP))
00375 {
00376 if (ioDest >= iEnd)
00377 return false;
00378 *ioDest++ = iCP;
00379 }
00380 return true;
00381 }
00382
00383
00384 #pragma mark -
00385 #pragma mark * ZUnicode::Functions_Read_T<I, UTF16>
00386
00387 template <class I>
00388 struct Functions_Read_T<I, UTF16>
00389 {
00390 static void sAlign(I& ioCurrent);
00391 static void sAlign(I& ioCurrent, I iEnd);
00392
00393 static void sInc(I& ioCurrent);
00394 static bool sInc(I& ioCurrent, I iEnd);
00395
00396 static void sDec(I& ioCurrent);
00397 static bool sDec(I iStart, I& ioCurrent, I iEnd);
00398
00399 static UTF32 sRead(I iCurrent);
00400 static bool sRead(I iCurrent, I iEnd, UTF32& oCP);
00401
00402 static UTF32 sReadInc(I& ioCurrent);
00403 static bool sReadInc(I& ioCurrent, I iEnd, UTF32& oCP);
00404 static bool sReadInc(I& ioCurrent, I iEnd, UTF32& oCP, size_t& ioCountSkipped);
00405
00406 static UTF32 sDecRead(I& ioCurrent);
00407 static bool sDecRead(I iStart, I& ioCurrent, I iEnd, UTF32& oCP);
00408 };
00409
00410 template <class I>
00411 void Functions_Read_T<I, UTF16>::sAlign(I& ioCurrent)
00412 {
00413 for (;;)
00414 {
00415 uint16 theCU = *ioCurrent;
00416 if (sIsSmallNormal(theCU))
00417 {
00418 break;
00419 }
00420 else if (sIsSmallNormalOrHighSurrogate(theCU))
00421 {
00422
00423 if (sIsLowSurrogate(uint16(ioCurrent[1])))
00424 break;
00425
00426 ++ioCurrent;
00427 }
00428 else if (sIsBigNormalOrBeyond(theCU))
00429 {
00430 break;
00431 }
00432 else
00433 {
00434
00435 ++ioCurrent;
00436 }
00437 }
00438 }
00439
00440 template <class I>
00441 void Functions_Read_T<I, UTF16>::sAlign(I& ioCurrent, I iEnd)
00442 {
00443 while (ioCurrent < iEnd)
00444 {
00445 uint16 theCU = *ioCurrent;
00446 if (sIsSmallNormal(theCU))
00447 {
00448 break;
00449 }
00450 else if (sIsSmallNormalOrHighSurrogate(theCU))
00451 {
00452
00453 if (ioCurrent + 1 >= iEnd)
00454 {
00455
00456 break;
00457 }
00458
00459 if (sIsLowSurrogate(uint16(ioCurrent[1])))
00460 break;
00461
00462
00463 ++ioCurrent;
00464 }
00465 else if (sIsBigNormalOrBeyond(theCU))
00466 {
00467 break;
00468 }
00469 else
00470 {
00471
00472 ++ioCurrent;
00473 }
00474 }
00475 }
00476
00477 template <class I>
00478 void Functions_Read_T<I, UTF16>::sInc(I& ioCurrent)
00479 {
00480 for (;;)
00481 {
00482 uint16 theCU = uint16(*ioCurrent++);
00483 if (sIsSmallNormal(theCU))
00484 {
00485 return;
00486 }
00487 else if (sIsSmallNormalOrHighSurrogate(theCU))
00488 {
00489
00490 if (sIsLowSurrogate(uint16(*ioCurrent++)))
00491 return;
00492 --ioCurrent;
00493 }
00494 else if (sIsBigNormalOrBeyond(theCU))
00495 {
00496 return;
00497 }
00498 else
00499 {
00500
00501 }
00502 }
00503 }
00504
00505 template <class I>
00506 bool Functions_Read_T<I, UTF16>::sInc(I& ioCurrent, I iEnd)
00507 {
00508 for (;;)
00509 {
00510 if (ioCurrent >= iEnd)
00511 {
00512
00513 return false;
00514 }
00515
00516 uint16 theCU = uint16(*ioCurrent++);
00517 if (sIsSmallNormal(theCU))
00518 {
00519 return true;
00520 }
00521 else if (sIsSmallNormalOrHighSurrogate(theCU))
00522 {
00523
00524 if (ioCurrent >= iEnd)
00525 {
00526
00527 --ioCurrent;
00528 return false;
00529 }
00530
00531 if (sIsLowSurrogate(uint16(*ioCurrent++)))
00532 return true;
00533 --ioCurrent;
00534 }
00535 else if (sIsBigNormalOrBeyond(theCU))
00536 {
00537 return true;
00538 }
00539 else
00540 {
00541
00542 }
00543 }
00544 }
00545
00546 template <class I>
00547 void Functions_Read_T<I, UTF16>::sDec(I& ioCurrent)
00548 {
00549 bool lastWasLowSurrogate = false;
00550 for (;;)
00551 {
00552 uint16 theCU = uint16(*--ioCurrent);
00553 if (sIsSmallNormal(theCU))
00554 {
00555
00556 break;
00557 }
00558 else if (sIsSmallNormalOrHighSurrogate(theCU))
00559 {
00560
00561 if (lastWasLowSurrogate)
00562 break;
00563 lastWasLowSurrogate = false;
00564 }
00565 else if (sIsBigNormalOrBeyond(theCU))
00566 {
00567
00568 break;
00569 }
00570 else
00571 {
00572
00573 lastWasLowSurrogate = true;
00574 }
00575 }
00576 }
00577
00578 template <class I>
00579 bool Functions_Read_T<I, UTF16>::sDec(I iStart, I& ioCurrent, I iEnd)
00580 {
00581 bool lastWasLowSurrogate = false;
00582 for (;;)
00583 {
00584 if (iStart >= ioCurrent)
00585 {
00586
00587 return false;
00588 }
00589
00590 uint16 theCU = uint16(*--ioCurrent);
00591 if (sIsSmallNormal(theCU))
00592 {
00593
00594 return true;
00595 }
00596 else if (sIsSmallNormalOrHighSurrogate(theCU))
00597 {
00598
00599 if (lastWasLowSurrogate)
00600 return true;
00601 lastWasLowSurrogate = false;
00602 }
00603 else if (sIsBigNormalOrBeyond(theCU))
00604 {
00605
00606 return true;
00607 }
00608 else
00609 {
00610
00611 lastWasLowSurrogate = true;
00612 }
00613 }
00614 }
00615
00616 template <class I>
00617 UTF32 Functions_Read_T<I, UTF16>::sRead(I iCurrent)
00618 {
00619 return sReadInc(iCurrent);
00620 }
00621
00622 template <class I>
00623 bool Functions_Read_T<I, UTF16>::sRead(I iCurrent, I iEnd, UTF32& oCP)
00624 {
00625 return sReadInc(iCurrent, iEnd, oCP);
00626 }
00627
00628 template <class I>
00629 UTF32 Functions_Read_T<I, UTF16>::sReadInc(I& ioCurrent)
00630 {
00631 for (;;)
00632 {
00633 uint16 theCU = *ioCurrent++;
00634 if (sIsSmallNormal(theCU))
00635 {
00636 return theCU;
00637 }
00638 else if (sIsSmallNormalOrHighSurrogate(theCU))
00639 {
00640
00641 uint16 theCU2 = *ioCurrent++;
00642 if (sIsLowSurrogate(theCU2))
00643 return sUTF32FromSurrogates(theCU, theCU2);
00644 --ioCurrent;
00645 }
00646 else if (sIsBigNormalOrBeyond(theCU))
00647 {
00648 return theCU;
00649 }
00650 else
00651 {
00652
00653 }
00654 }
00655 }
00656
00657 template <class I>
00658 bool Functions_Read_T<I, UTF16>::sReadInc(I& ioCurrent, I iEnd, UTF32& oCP)
00659 {
00660 for (;;)
00661 {
00662 if (ioCurrent >= iEnd)
00663 {
00664
00665 return false;
00666 }
00667
00668 uint16 theCU = *ioCurrent++;
00669 if (sIsSmallNormal(theCU))
00670 {
00671 oCP = theCU;
00672 return true;
00673 }
00674 else if (sIsSmallNormalOrHighSurrogate(theCU))
00675 {
00676
00677 if (ioCurrent >= iEnd)
00678 {
00679
00680 --ioCurrent;
00681 return false;
00682 }
00683
00684 uint16 theCU2 = *ioCurrent++;
00685 if (sIsLowSurrogate(theCU2))
00686 {
00687 oCP = sUTF32FromSurrogates(theCU, theCU2);
00688 return true;
00689 }
00690 --ioCurrent;
00691 }
00692 else if (sIsBigNormalOrBeyond(theCU))
00693 {
00694 oCP = theCU;
00695 return true;
00696 }
00697 else
00698 {
00699
00700 }
00701 }
00702 }
00703
00704 template <class I>
00705 bool Functions_Read_T<I, UTF16>::sReadInc(I& ioCurrent, I iEnd, UTF32& oCP, size_t& ioCountSkipped)
00706 {
00707 for (;;)
00708 {
00709 if (ioCurrent >= iEnd)
00710 {
00711
00712 return false;
00713 }
00714
00715 uint16 theCU = *ioCurrent++;
00716 if (sIsSmallNormal(theCU))
00717 {
00718 oCP = theCU;
00719 return true;
00720 }
00721 else if (sIsSmallNormalOrHighSurrogate(theCU))
00722 {
00723
00724 if (ioCurrent >= iEnd)
00725 {
00726
00727 --ioCurrent;
00728 return false;
00729 }
00730
00731 uint16 theCU2 = *ioCurrent++;
00732 if (sIsLowSurrogate(theCU2))
00733 {
00734 oCP = sUTF32FromSurrogates(theCU, theCU2);
00735 return true;
00736 }
00737
00738 ++ioCountSkipped;
00739 --ioCurrent;
00740 }
00741 else if (sIsBigNormalOrBeyond(theCU))
00742 {
00743 oCP = theCU;
00744 return true;
00745 }
00746 else
00747 {
00748
00749 ++ioCountSkipped;
00750 }
00751 }
00752 }
00753
00754 template <class I>
00755 UTF32 Functions_Read_T<I, UTF16>::sDecRead(I& ioCurrent)
00756 {
00757 uint16 priorLowSurrogate = 0;
00758 for (;;)
00759 {
00760 uint16 theCU = *--ioCurrent;
00761 if (sIsSmallNormal(theCU))
00762 {
00763
00764 return theCU;
00765 }
00766 else if (sIsSmallNormalOrHighSurrogate(theCU))
00767 {
00768
00769 if (priorLowSurrogate)
00770 return sUTF32FromSurrogates(theCU, priorLowSurrogate);
00771 priorLowSurrogate = 0;
00772 }
00773 else if (sIsBigNormalOrBeyond(theCU))
00774 {
00775 return theCU;
00776 }
00777 else
00778 {
00779
00780 priorLowSurrogate = theCU;
00781 }
00782 }
00783 }
00784
00785 template <class I>
00786 bool Functions_Read_T<I, UTF16>::sDecRead(I iStart, I& ioCurrent, I iEnd, UTF32& oCP)
00787 {
00788 uint16 priorLowSurrogate = 0;
00789 for (;;)
00790 {
00791 if (iStart >= ioCurrent)
00792 {
00793
00794 return false;
00795 }
00796
00797 uint16 theCU = *--ioCurrent;
00798 if (sIsSmallNormal(theCU))
00799 {
00800
00801 oCP = theCU;
00802 return true;
00803 }
00804 else if (sIsSmallNormalOrHighSurrogate(theCU))
00805 {
00806
00807 if (priorLowSurrogate)
00808 {
00809 oCP = sUTF32FromSurrogates(theCU, priorLowSurrogate);
00810 return true;
00811 }
00812 priorLowSurrogate = 0;
00813 }
00814 else if (sIsBigNormalOrBeyond(theCU))
00815 {
00816 oCP = theCU;
00817 return true;
00818 }
00819 else
00820 {
00821
00822 priorLowSurrogate = theCU;
00823 }
00824 }
00825 }
00826
00827
00828 #pragma mark -
00829 #pragma mark * ZUnicode::Functions_Write_T<I, UTF16>
00830
00831 template <class I>
00832 struct Functions_Write_T<I, UTF16>
00833 {
00834 static bool sWrite(I iDest, I iEnd, UTF32 iCP);
00835 static bool sWriteInc(I& ioDest, I iEnd, UTF32 iCP);
00836 };
00837
00838 template <class I>
00839 bool Functions_Write_T<I, UTF16>::sWrite(I iDest, I iEnd, UTF32 iCP)
00840 {
00841 return sWriteInc(iDest, iEnd, iCP);
00842 }
00843
00844 template <class I>
00845 bool Functions_Write_T<I, UTF16>::sWriteInc(I& ioDest, I iEnd, UTF32 iCP)
00846 {
00847 if (sIsValidCP(iCP))
00848 {
00849 if (iCP > kCPMaxUCS2)
00850 {
00851
00852 if (ioDest + 1 >= iEnd)
00853 {
00854
00855 return false;
00856 }
00857 iCP -= 0x10000;
00858 *ioDest++ = iCP / 0x400 + kCPSurrogateHighBegin;
00859 *ioDest++ = iCP & 0x3FF + kCPSurrogateLowBegin;
00860 }
00861 else
00862 {
00863 if (ioDest >= iEnd)
00864 return false;
00865 *ioDest++ = iCP;
00866 }
00867 }
00868 return true;
00869 }
00870
00871
00872 #pragma mark -
00873 #pragma mark * ZUnicode::Functions_Read_T<I, UTF8>
00874 template <class I>
00875 struct Functions_Read_T<I, UTF8>
00876 {
00877 static void sAlign(I& ioCurrent);
00878 static void sAlign(I& ioCurrent, I iEnd);
00879
00880 static void sInc(I& ioCurrent);
00881 static bool sInc(I& ioCurrent, I iEnd);
00882
00883 static void sDec(I& ioCurrent);
00884 static bool sDec(I iStart, I& ioCurrent, I iEnd);
00885
00886 static UTF32 sRead(I iCurrent);
00887 static bool sRead(I iCurrent, I iEnd, UTF32& oCP);
00888
00889 static UTF32 sReadInc(I& ioCurrent);
00890 static bool sReadInc(I& ioCurrent, I iEnd, UTF32& oCP);
00891 static bool sReadInc(I& ioCurrent, I iEnd, UTF32& oCP, size_t& ioCountSkipped);
00892
00893 static UTF32 sDecRead(I& ioCurrent);
00894 static bool sDecRead(I iStart, I& ioCurrent, I iEnd, UTF32& oCP);
00895 };
00896
00897 template <class I>
00898 void Functions_Read_T<I, UTF8>::sAlign(I& ioCurrent)
00899 {
00900 for (;;)
00901 {
00902 I localSource = ioCurrent;
00903 const uint8 firstByte = *localSource++;
00904 size_t sequenceLength = sUTF8SequenceLength[firstByte];
00905 if (sequenceLength == 1)
00906 {
00907
00908 break;
00909 }
00910 else if (sequenceLength == 0)
00911 {
00912
00913 }
00914 else
00915 {
00916 bool okay = true;
00917 while (--sequenceLength)
00918 {
00919 if (!sIsContinuation(*localSource++))
00920 {
00921 --localSource;
00922 okay = false;
00923 break;
00924 }
00925 }
00926 if (okay)
00927 return;
00928 }
00929 ioCurrent = localSource;
00930 }
00931 }
00932
00933 template <class I>
00934 void Functions_Read_T<I, UTF8>::sAlign(I& ioCurrent, I iEnd)
00935 {
00936 while (ioCurrent < iEnd)
00937 {
00938 I localSource = ioCurrent;
00939 const uint8 firstByte = *localSource++;
00940 size_t sequenceLength = sUTF8SequenceLength[firstByte];
00941 if (sequenceLength == 1)
00942 {
00943
00944 break;
00945 }
00946 else if (sequenceLength == 0)
00947 {
00948
00949 }
00950 else
00951 {
00952 bool okay = true;
00953 while (--sequenceLength && localSource < iEnd)
00954 {
00955 if (!sIsContinuation(*ioCurrent++))
00956 {
00957 --ioCurrent;
00958 okay = false;
00959 break;
00960 }
00961 }
00962 if (okay)
00963 return;
00964 }
00965 ioCurrent = localSource;
00966 }
00967 }
00968
00969 template <class I>
00970 void Functions_Read_T<I, UTF8>::sInc(I& ioCurrent)
00971 {
00972 for (;;)
00973 {
00974 const uint8 firstByte = *ioCurrent++;
00975 size_t sequenceLength = sUTF8SequenceLength[firstByte];
00976 if (sequenceLength == 1)
00977 {
00978 return;
00979 }
00980 else if (sequenceLength == 0)
00981 {
00982
00983 }
00984 else
00985 {
00986 bool okay = true;
00987 while (--sequenceLength)
00988 {
00989 if (!sIsContinuation(*ioCurrent++))
00990 {
00991
00992 --ioCurrent;
00993 okay = false;
00994 break;
00995 }
00996 }
00997 if (okay)
00998 return;
00999 }
01000 }
01001 }
01002
01003 template <class I>
01004 bool Functions_Read_T<I, UTF8>::sInc(I& ioCurrent, I iEnd)
01005 {
01006 for (;;)
01007 {
01008 if (ioCurrent >= iEnd)
01009 {
01010
01011 return false;
01012 }
01013
01014 const uint8 firstByte = *ioCurrent++;
01015 size_t sequenceLength = sUTF8SequenceLength[firstByte];
01016 if (sequenceLength == 1)
01017 {
01018 return true;
01019 }
01020 else if (sequenceLength == 0)
01021 {
01022
01023 }
01024 else
01025 {
01026 if (ioCurrent + sequenceLength - 1 > iEnd)
01027 {
01028 --ioCurrent;
01029 return false;
01030 }
01031
01032 bool okay = true;
01033 while (--sequenceLength)
01034 {
01035 if (!sIsContinuation(*ioCurrent++))
01036 {
01037
01038 --ioCurrent;
01039 okay = false;
01040 break;
01041 }
01042 }
01043 if (okay)
01044 return true;
01045 }
01046 }
01047 }
01048
01049 template <class I>
01050 void Functions_Read_T<I, UTF8>::sDec(I& ioCurrent)
01051 {
01052 size_t continuationCount = 0;
01053 bool seenNonContinuation = false;
01054 for (;;)
01055 {
01056 const uint8 firstByte = *--ioCurrent;
01057 size_t sequenceLength = sUTF8SequenceLength[firstByte];
01058 if (sequenceLength == 0)
01059 {
01060 if (sIsContinuation(firstByte))
01061 {
01062
01063 ++continuationCount;
01064 }
01065 else
01066 {
01067
01068 continuationCount = 0;
01069 seenNonContinuation = true;
01070 }
01071 }
01072 else
01073 {
01074
01075 if (continuationCount + 1 >= sequenceLength)
01076 {
01077
01078 return;
01079 }
01080 else
01081 {
01082
01083 if (!seenNonContinuation)
01084 {
01085
01086 I current = ioCurrent + 1;
01087 while (--sequenceLength)
01088 {
01089 if (!sIsContinuation(*current++))
01090 break;
01091 }
01092 if (sequenceLength == 0)
01093 {
01094
01095
01096 return;
01097 }
01098 }
01099 continuationCount = 0;
01100 seenNonContinuation = true;
01101 }
01102 }
01103 }
01104 }
01105
01106 template <class I>
01107 bool Functions_Read_T<I, UTF8>::sDec(I iStart, I& ioCurrent, I iEnd)
01108 {
01109 size_t continuationCount = 0;
01110 bool seenNonContinuation = false;
01111 for (;;)
01112 {
01113 if (iStart >= ioCurrent)
01114 {
01115
01116 return false;
01117 }
01118
01119 const uint8 firstByte = *--ioCurrent;
01120 size_t sequenceLength = sUTF8SequenceLength[firstByte];
01121 if (sequenceLength == 0)
01122 {
01123 if (sIsContinuation(firstByte))
01124 {
01125
01126 ++continuationCount;
01127 }
01128 else
01129 {
01130
01131 continuationCount = 0;
01132 seenNonContinuation = true;
01133 }
01134 }
01135 else
01136 {
01137
01138 if (continuationCount + 1 >= sequenceLength)
01139 {
01140
01141 return true;
01142 }
01143 else
01144 {
01145
01146 if (!seenNonContinuation)
01147 {
01148
01149 if (ioCurrent + sequenceLength <= iEnd)
01150 {
01151
01152
01153 I current = ioCurrent + 1;
01154 while (--sequenceLength)
01155 {
01156 if (!sIsContinuation(*current++))
01157 break;
01158 }
01159 if (sequenceLength == 0)
01160 {
01161
01162
01163 return true;
01164 }
01165 }
01166 }
01167 continuationCount = 0;
01168 seenNonContinuation = true;
01169 }
01170 }
01171 }
01172 }
01173
01174 template <class I>
01175 UTF32 Functions_Read_T<I, UTF8>::sRead(I iCurrent)
01176 {
01177 return sReadInc(iCurrent);
01178 }
01179
01180 template <class I>
01181 bool Functions_Read_T<I, UTF8>::sRead(I iCurrent, I iEnd, UTF32& oCP)
01182 {
01183 return sReadInc(iCurrent, iEnd, oCP);
01184 }
01185
01186 template <class I>
01187 UTF32 Functions_Read_T<I, UTF8>::sReadInc(I& ioCurrent)
01188 {
01189 for (;;)
01190 {
01191 const uint8 firstByte = *ioCurrent++;
01192 size_t sequenceLength = sUTF8SequenceLength[firstByte];
01193 if (sequenceLength == 1)
01194 {
01195 return firstByte;
01196 }
01197 else if (sequenceLength == 0)
01198 {
01199
01200 }
01201 else
01202 {
01203 uint32 result = firstByte & sUTF8StartByteMask[sequenceLength];
01204 bool okay = true;
01205 while (--sequenceLength)
01206 {
01207 const uint8 curByte = *ioCurrent++;
01208 if (!sIsContinuation(curByte))
01209 {
01210
01211 --ioCurrent;
01212 okay = false;
01213 break;
01214 }
01215 sAppendContinuation(result, curByte);
01216 }
01217 if (okay)
01218 return result;
01219 }
01220 }
01221 }
01222
01223
01224 template <class I>
01225 bool Functions_Read_T<I, UTF8>::sReadInc(I& ioCurrent, I iEnd, UTF32& oCP)
01226 {
01227 for (;;)
01228 {
01229 if (ioCurrent >= iEnd)
01230 {
01231
01232 return false;
01233 }
01234
01235 const uint8 firstByte = *ioCurrent++;
01236 size_t sequenceLength = sUTF8SequenceLength[firstByte];
01237 if (sequenceLength == 1)
01238 {
01239 oCP = firstByte;
01240 return true;
01241 }
01242 else if (sequenceLength == 0)
01243 {
01244
01245 }
01246 else
01247 {
01248 if (ioCurrent + sequenceLength - 1 > iEnd)
01249 {
01250 --ioCurrent;
01251 return false;
01252 }
01253
01254 uint32 result = firstByte & sUTF8StartByteMask[sequenceLength];
01255 bool okay = true;
01256 while (--sequenceLength)
01257 {
01258 const uint8 curByte = *ioCurrent++;
01259 if (!sIsContinuation(curByte))
01260 {
01261
01262 --ioCurrent;
01263 okay = false;
01264 break;
01265 }
01266 sAppendContinuation(result, curByte);
01267 }
01268 if (okay)
01269 {
01270 oCP = result;
01271 return true;
01272 }
01273 }
01274 }
01275 }
01276
01277 template <class I>
01278 bool Functions_Read_T<I, UTF8>::sReadInc(I& ioCurrent, I iEnd, UTF32& oCP, size_t& ioCountSkipped)
01279 {
01280 for (;;)
01281 {
01282 if (ioCurrent >= iEnd)
01283 {
01284
01285 return false;
01286 }
01287
01288 const uint8 firstByte = *ioCurrent++;
01289 size_t sequenceLength = sUTF8SequenceLength[firstByte];
01290 if (sequenceLength == 1)
01291 {
01292 oCP = firstByte;
01293 return true;
01294 }
01295 else if (sequenceLength == 0)
01296 {
01297
01298 ++ioCountSkipped;
01299 }
01300 else
01301 {
01302 if (ioCurrent + sequenceLength - 1 > iEnd)
01303 {
01304 --ioCurrent;
01305 return false;
01306 }
01307
01308 uint32 result = firstByte & sUTF8StartByteMask[sequenceLength];
01309 bool okay = true;
01310 for (size_t countConsumed = 1; countConsumed < sequenceLength; ++countConsumed)
01311 {
01312 const uint8 curByte = *ioCurrent++;
01313 if (!sIsContinuation(curByte))
01314 {
01315
01316 --ioCurrent;
01317 okay = false;
01318 ioCountSkipped += countConsumed + 1;
01319 break;
01320 }
01321 sAppendContinuation(result, curByte);
01322 }
01323 if (okay)
01324 {
01325 oCP = result;
01326 return true;
01327 }
01328 }
01329 }
01330 }
01331
01332 template <class I>
01333 UTF32 Functions_Read_T<I, UTF8>::sDecRead(I& ioCurrent)
01334 {
01335 for (;;)
01336 {
01337 const uint8 firstByte = *--ioCurrent;
01338 if (size_t sequenceLength = sUTF8SequenceLength[firstByte])
01339 {
01340
01341 if (sequenceLength == 1)
01342 return firstByte;
01343 uint32 result = firstByte & sUTF8StartByteMask[sequenceLength];
01344 I localCurrent = ioCurrent;
01345 while (--sequenceLength)
01346 {
01347 const uint8 curByte = *++localCurrent;
01348 if (sIsContinuation(curByte))
01349 sAppendContinuation(result, curByte);
01350 else
01351 break;
01352 }
01353 if (sequenceLength == 0)
01354 return result;
01355 }
01356 }
01357 }
01358
01359 template <class I>
01360 bool Functions_Read_T<I, UTF8>::sDecRead(I iStart, I& ioCurrent, I iEnd, UTF32& oCP)
01361 {
01362 for (;;)
01363 {
01364 if (iStart >= ioCurrent)
01365 {
01366
01367 return false;
01368 }
01369
01370 const uint8 firstByte = *--ioCurrent;
01371 if (size_t sequenceLength = sUTF8SequenceLength[firstByte])
01372 {
01373
01374 if (ioCurrent + sequenceLength <= iEnd)
01375 {
01376
01377 if (sequenceLength == 1)
01378 {
01379
01380 oCP = firstByte;
01381 return true;
01382 }
01383
01384 uint32 result = firstByte & sUTF8StartByteMask[sequenceLength];
01385 I localCurrent = ioCurrent;
01386 while (--sequenceLength)
01387 {
01388 const uint8 curByte = *++localCurrent;
01389 if (sIsContinuation(curByte))
01390 sAppendContinuation(result, curByte);
01391 else
01392 break;
01393 }
01394 if (sequenceLength == 0)
01395 {
01396
01397 oCP = result;
01398 return true;
01399 }
01400 }
01401 }
01402 }
01403 }
01404
01405
01406 #pragma mark -
01407 #pragma mark * ZUnicode::Functions_Write_T<I, UTF8>
01408
01409 template <class I>
01410 struct Functions_Write_T<I, UTF8>
01411 {
01412 static bool sWrite(I iDest, I iEnd, UTF32 iCP);
01413 static bool sWriteInc(I& ioDest, I iEnd, UTF32 iCP);
01414 };
01415
01416 template <class I>
01417 bool Functions_Write_T<I, UTF8>::sWrite(I iDest, I iEnd, UTF32 iCP)
01418 {
01419 if (sIsValidCP(iCP))
01420 {
01421 size_t bytesToWrite;
01422 if (iCP < 0x80)
01423 {
01424 if (iDest <= iEnd)
01425 {
01426 *iDest = iCP;
01427 return true;
01428 }
01429 return false;
01430 }
01431 else if (iCP < 0x800) bytesToWrite = 2;
01432 else if (iCP < 0x10000) bytesToWrite = 3;
01433 else if (iCP < 0x200000) bytesToWrite = 4;
01434 else if (iCP < 0x4000000) bytesToWrite = 5;
01435 else bytesToWrite = 6;
01436
01437 iDest += bytesToWrite;
01438 if (iDest >= iEnd)
01439 return false;
01440
01441 const UTF32 byteMask = 0xBF;
01442 const UTF32 byteMark = 0x80;
01443 switch (bytesToWrite)
01444 {
01445
01446 case 6: *--iDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01447 case 5: *--iDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01448 case 4: *--iDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01449 case 3: *--iDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01450 case 2: *--iDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01451 }
01452 *--iDest = iCP | sUTF8StartByteMark[bytesToWrite];
01453 }
01454 return true;
01455 }
01456
01457 template <class I>
01458 bool Functions_Write_T<I, UTF8>::sWriteInc(I& ioDest, I iEnd, UTF32 iCP)
01459 {
01460 if (sIsValidCP(iCP))
01461 {
01462 size_t bytesToWrite;
01463 if (iCP < 0x80)
01464 {
01465 if (ioDest <= iEnd)
01466 {
01467 *ioDest++ = iCP;
01468 return true;
01469 }
01470 return false;
01471 }
01472 else if (iCP < 0x800) bytesToWrite = 2;
01473 else if (iCP < 0x10000) bytesToWrite = 3;
01474 else if (iCP < 0x200000) bytesToWrite = 4;
01475 else if (iCP < 0x4000000) bytesToWrite = 5;
01476 else bytesToWrite = 6;
01477
01478 I localDest = ioDest + bytesToWrite;
01479 if (localDest >= iEnd)
01480 return false;
01481 ioDest = localDest;
01482
01483 const UTF32 byteMask = 0xBF;
01484 const UTF32 byteMark = 0x80;
01485 switch (bytesToWrite)
01486 {
01487
01488 case 6: *--localDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01489 case 5: *--localDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01490 case 4: *--localDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01491 case 3: *--localDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01492 case 2: *--localDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01493 }
01494 *--localDest = iCP | sUTF8StartByteMark[bytesToWrite];
01495 }
01496 return true;
01497 }
01498
01499
01500 #pragma mark -
01501 #pragma mark * ZUnicode::Functions_Convert
01502
01503 template <class I>
01504 string32 Functions_Convert_T<I>::sAsUTF32(I iSource)
01505 {
01506 string32 result;
01507 for (;;)
01508 {
01509 if (UTF32 theCP = sReadInc(iSource))
01510 result += theCP;
01511 else
01512 break;
01513 }
01514 return result;
01515 }
01516
01517 template <class I>
01518 string32 Functions_Convert_T<I>::sAsUTF32(I iSource, size_t iCountCU)
01519 {
01520 #if 1
01521 string32 result;
01522 result.resize(iCountCU);
01523
01524 string32::iterator iter = result.begin();
01525 I theEnd = iSource + iCountCU;
01526 for (UTF32 theCP; sReadInc(iSource, theEnd, theCP); )
01527 *iter++ = theCP;
01528 result.resize(iter - result.begin());
01529 return result;
01530 #else
01531 string32 result;
01532 result.reserve(iCountCU);
01533
01534 I theEnd = iSource + iCountCU;
01535 for (UTF32 theCP; sReadInc(iSource, theEnd, theCP); result += theCP)
01536 {}
01537 return result;
01538 #endif
01539 }
01540
01541 template <class I>
01542 string32 Functions_Convert_T<I>::sAsUTF32(I iSource, I iEnd)
01543 {
01544 string32 result;
01545 for (UTF32 theCP; sReadInc(iSource, iEnd, theCP); result += theCP)
01546 {}
01547 return result;
01548 }
01549
01550 template <class I>
01551 string16 Functions_Convert_T<I>::sAsUTF16(I iSource)
01552 {
01553 string16 result;
01554 for (;;)
01555 {
01556 if (UTF32 theCP = sReadInc(iSource))
01557 result += theCP;
01558 else
01559 break;
01560 }
01561 return result;
01562 }
01563
01564 template <class I>
01565 string16 Functions_Convert_T<I>::sAsUTF16(I iSource, size_t iCountCU)
01566 {
01567 #if 1
01568 string16 result;
01569
01570 result.resize(iCountCU * 2);
01571 string16::iterator iter = result.begin();
01572 I theEnd = iSource + iCountCU;
01573 for (UTF32 theCP; sReadInc(iSource, theEnd, theCP); )
01574 {
01575 uint32 realCP = theCP;
01576 if (realCP <= ZUnicode::kCPMaxUCS2)
01577 {
01578 *iter++ = UTF16(realCP);
01579 }
01580 else
01581 {
01582 realCP -= 0x10000;
01583 *iter++ = UTF16(realCP / 0x400 + ZUnicode::kCPSurrogateHighBegin);
01584 *iter++ += UTF16(realCP & 0x3FF + ZUnicode::kCPSurrogateLowBegin);
01585 }
01586 }
01587 result.resize(iter - result.begin());
01588 return result;
01589 #else
01590 string16 result;
01591 result.reserve(iCountCU);
01592
01593 I theEnd = iSource + iCountCU;
01594 for (UTF32 theCP; sReadInc(iSource, theEnd, theCP); result += theCP)
01595 {}
01596 return result;
01597 #endif
01598 }
01599
01600 template <class I>
01601 string16 Functions_Convert_T<I>::sAsUTF16(I iSource, I iEnd)
01602 {
01603 string16 result;
01604 for (UTF32 theCP; sReadInc(iSource, iEnd, theCP); result += theCP)
01605 {}
01606 return result;
01607 }
01608
01609 template <class I>
01610 string8 Functions_Convert_T<I>::sAsUTF8(I iSource)
01611 {
01612 string8 result;
01613 for (;;)
01614 {
01615 if (UTF32 theCP = sReadInc(iSource))
01616 result += theCP;
01617 else
01618 break;
01619 }
01620 return result;
01621 }
01622
01623 template <class I>
01624 string8 Functions_Convert_T<I>::sAsUTF8(I iSource, size_t iCountCU)
01625 {
01626 string8 result;
01627 result.reserve(iCountCU);
01628
01629 I theEnd = iSource + iCountCU;
01630 for (UTF32 theCP; sReadInc(iSource, theEnd, theCP); result += theCP)
01631 {}
01632
01633 return result;
01634 }
01635
01636 template <class I>
01637 string8 Functions_Convert_T<I>::sAsUTF8(I iSource, I iEnd)
01638 {
01639 string8 result;
01640 for (UTF32 theCP; sReadInc(iSource, iEnd, theCP); result += theCP)
01641 {}
01642 return result;
01643 }
01644
01645 }
01646
01647 #endif // __ZUnicodePrivB__