src/foundation/ZUnicodePrivB.h

00001 /*  @(#) $Id: ZUnicodePrivB.h,v 1.19 2007/05/04 18:59:34 agreen Exp $ */
00002 
00003 /* ------------------------------------------------------------
00004 Copyright (c) 2004 Andrew Green and Learning in Motion, Inc.
00005 http://www.zoolib.org
00006 
00007 Permission is hereby granted, free of charge, to any person obtaining a copy
00008 of this software and associated documentation files (the "Software"), to deal
00009 in the Software without restriction, including without limitation the rights
00010 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00011 copies of the Software, and to permit persons to whom the Software is
00012 furnished to do so, subject to the following conditions:
00013 
00014 The above copyright notice and this permission notice shall be included in
00015 all copies or substantial portions of the Software.
00016 
00017 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00018 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00019 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00020 COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
00021 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
00022 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00023 ------------------------------------------------------------ */
00024 
00025 #ifndef __ZUnicodePrivB__
00026 #define __ZUnicodePrivB__ 1
00027 #include "zconfig.h"
00028 
00029 #include "ZUnicode.h"
00030 
00031 namespace ZUnicode {
00032 
00033 // =================================================================================================
00034 #pragma mark -
00035 #pragma mark * ZUnicode::Functions_CountCU
00036 
00037 template <class I>
00038 size_t Functions_CountCU_T<I>::sCountCU(I iSource)
00039         {
00040         I localSource = iSource;
00041         while (0 != *localSource)
00042                 ++localSource;
00043         return localSource - iSource;
00044         }
00045 
00046 // =================================================================================================
00047 #pragma mark -
00048 #pragma mark * ZUnicode::Functions_Count
00049 
00050 template <class I>
00051 size_t Functions_Count_T<I>::sCountCP(I iSource)
00052         {
00053         size_t count = 0;
00054         while (Functions_Read_T<I>::sReadInc(iSource))
00055                 ++count;
00056         return count;
00057         }
00058 
00059 template <class I>
00060 void Functions_Count_T<I>::sCount(I iSource, size_t* oCountCU, size_t* oCountCP)
00061         {
00062         if (oCountCU)
00063                 {
00064                 if (oCountCP)
00065                         {
00066                         size_t count = 0;
00067                         I localSource = iSource;
00068                         while (Functions_Read_T<I>::sReadInc(localSource))
00069                                 ++count;
00070                         *oCountCU = localSource - iSource;
00071                         *oCountCP = count;
00072                         }
00073                 else
00074                         {
00075                         *oCountCU = Functions_CountCU_T<I>::sCountCU(iSource);
00076                         }
00077                 }
00078         else
00079                 {
00080                 if (oCountCP)
00081                         *oCountCP = sCountCP(iSource);
00082                 }
00083         }
00084 
00085 template <class I>
00086 size_t Functions_Count_T<I>::sCPToCU(I iSource, size_t iCountCP)
00087         {
00088         I localSource = iSource;
00089         while (iCountCP--)
00090                 Functions_Read_T<I>::sInc(localSource);
00091         return localSource - iSource;
00092         }
00093 
00094 template <class I>
00095 size_t Functions_Count_T<I>::sCPToCU(I iSource, size_t iCountCU, size_t iCountCP, size_t* oCountCP)
00096         {
00097         I localSource = iSource;
00098         I localEnd = iSource + iCountCU;
00099         size_t countRemaining = iCountCP;
00100         if (countRemaining)
00101                 {
00102                 ++countRemaining;
00103                 while (--countRemaining)
00104                         {
00105                         if (!Functions_Read_T<I>::sInc(localSource, localEnd))
00106                                 break;
00107                         }
00108                 }
00109         if (oCountCP)
00110                 *oCountCP = iCountCP - countRemaining;
00111         return localSource - iSource;
00112         }
00113 
00114 template <class I>
00115 size_t Functions_Count_T<I>::sCPToCU(I iSource, I iEnd, size_t iCountCP, size_t* oCountCP)
00116         {
00117         I localSource = iSource;
00118         size_t countRemaining = iCountCP;
00119         if (countRemaining)
00120                 {
00121                 ++countRemaining;
00122                 while (--countRemaining)
00123                         {
00124                         if (!Functions_Read_T<I>::sInc(localSource, iEnd))
00125                                 break;
00126                         }
00127                 }
00128         if (oCountCP)
00129                 *oCountCP = iCountCP - countRemaining;
00130         return localSource - iSource;
00131         }
00132 
00133 template <class I>
00134 size_t Functions_Count_T<I>::sCUToCP(I iSource, size_t iCountCU)
00135         {
00136         size_t countCP = 0;
00137         I localEnd = iSource + iCountCU;
00138         for (;;)
00139                 {
00140                 if (!Functions_Read_T<I>::sInc(iSource, localEnd))
00141                         break;
00142                 ++countCP;
00143                 }
00144         return countCP;
00145         }
00146 
00147 template <class I>
00148 size_t Functions_Count_T<I>::sCUToCP(I iSource, I iEnd)
00149         {
00150         size_t countCP = 0;
00151         for (;;)
00152                 {
00153                 if (!Functions_Read_T<I>::sInc(iSource, iEnd))
00154                         break;
00155                 ++countCP;
00156                 }
00157         return countCP;
00158         }
00159 
00160 // =================================================================================================
00161 #pragma mark -
00162 #pragma mark * ZUnicode::Functions_Read_T<I, UTF32>
00163 
00164 template <class I>
00165 struct Functions_Read_T<I, UTF32>
00166         {
00167         static void sAlign(I& ioCurrent);
00168         static void sAlign(I& ioCurrent, I iEnd);
00169 
00170         static void sInc(I& ioCurrent);
00171         static bool sInc(I& ioCurrent, I iEnd);
00172 
00173         static void sDec(I& ioCurrent);
00174         static bool sDec(I iStart, I& ioCurrent, I iEnd);
00175 
00176         static UTF32 sRead(I iCurrent);
00177         static bool sRead(I iCurrent, I iEnd, UTF32& oCP);
00178 
00179         static UTF32 sReadInc(I& ioCurrent);
00180         static bool sReadInc(I& ioCurrent, I iEnd, UTF32& oCP);
00181         static bool sReadInc(I& ioCurrent, I iEnd, UTF32& oCP, size_t& ioCountSkipped);
00182 
00183         static UTF32 sDecRead(I& ioCurrent);
00184         static bool sDecRead(I iStart, I& ioCurrent, I iEnd, UTF32& oCP);
00185         };
00186 
00187 template <class I>
00188 void Functions_Read_T<I, UTF32>::sAlign(I& ioCurrent)
00189         {
00190         for (;;)
00191                 {
00192                 if (sIsValidCP(*ioCurrent))
00193                         break;
00194                 ++ioCurrent;
00195                 }
00196         }
00197 
00198 template <class I>
00199 void Functions_Read_T<I, UTF32>::sAlign(I& ioCurrent, I iEnd)
00200         {
00201         while (ioCurrent < iEnd)
00202                 {
00203                 if (sIsValidCP(*ioCurrent))
00204                         break;
00205                 ++ioCurrent;
00206                 }
00207         }
00208 
00209 template <class I>
00210 void Functions_Read_T<I, UTF32>::sInc(I& ioCurrent)
00211         {
00212         for (;;)
00213                 {
00214                 if (sIsValidCP(*ioCurrent++))
00215                         return;
00216                 }
00217         }
00218 
00219 template <class I>
00220 bool Functions_Read_T<I, UTF32>::sInc(I& ioCurrent, I iEnd)
00221         {
00222         for (;;)
00223                 {
00224                 if (ioCurrent >= iEnd)
00225                         {
00226                         // We've run off the end.
00227                         return false;
00228                         }
00229 
00230                 if (sIsValidCP(*ioCurrent++))
00231                         return true;
00232                 }
00233         }
00234 
00235 template <class I>
00236 void Functions_Read_T<I, UTF32>::sDec(I& ioCurrent)
00237         {
00238         for (;;)
00239                 {
00240                 if (sIsValidCP(*--ioCurrent))
00241                         return;
00242                 }
00243         }
00244 
00245 template <class I>
00246 bool Functions_Read_T<I, UTF32>::sDec(I iStart, I& ioCurrent, I iEnd)
00247         {
00248         for (;;)
00249                 {
00250                 if (iStart >= ioCurrent)
00251                         {
00252                         // We'd run off the start.
00253                         return false;
00254                         }
00255                 if (sIsValidCP(*--ioCurrent))
00256                         return true;
00257                 }
00258         }
00259 
00260 template <class I>
00261 UTF32 Functions_Read_T<I, UTF32>::sRead(I iCurrent)
00262         {
00263         return sReadInc(iCurrent);
00264         }
00265 
00266 template <class I>
00267 bool Functions_Read_T<I, UTF32>::sRead(I iCurrent, I iEnd, UTF32& oCP)
00268         {
00269         return sReadInc(iCurrent, iEnd, oCP);
00270         }
00271 
00272 template <class I>
00273 UTF32 Functions_Read_T<I, UTF32>::sReadInc(I& ioCurrent)
00274         {
00275         for (;;)
00276                 {
00277                 uint32 theCU = *ioCurrent++;
00278                 if (sIsValidCP(theCU))
00279                         return theCU;
00280                 }
00281         }
00282 
00283 template <class I>
00284 bool Functions_Read_T<I, UTF32>::sReadInc(I& ioCurrent, I iEnd, UTF32& oCP)
00285         {
00286         for (;;)
00287                 {
00288                 if (ioCurrent >= iEnd)
00289                         {
00290                         // We've run off the end.
00291                         return false;
00292                         }
00293 
00294                 uint32 theCU = *ioCurrent++;
00295                 if (sIsValidCP(theCU))
00296                         {
00297                         oCP = theCU;
00298                         return true;
00299                         }
00300                 }
00301         }
00302 
00303 template <class I>
00304 bool Functions_Read_T<I, UTF32>::sReadInc(I& ioCurrent, I iEnd, UTF32& oCP, size_t& ioCountSkipped)
00305         {
00306         for (;;)
00307                 {
00308                 if (ioCurrent >= iEnd)
00309                         {
00310                         // We've run off the end.
00311                         return false;
00312                         }
00313 
00314                 uint32 theCU = *ioCurrent++;
00315                 if (sIsValidCP(theCU))
00316                         {
00317                         oCP = theCU;
00318                         return true;
00319                         }
00320                 ++ioCountSkipped;
00321                 }
00322         }
00323 
00324 template <class I>
00325 UTF32 Functions_Read_T<I, UTF32>::sDecRead(I& ioCurrent)
00326         {
00327         for (;;)
00328                 {
00329                 uint32 theCP = *--ioCurrent;
00330                 if (sIsValidCP(theCP))
00331                         return theCP;
00332                 }
00333         }
00334 
00335 template <class I>
00336 bool Functions_Read_T<I, UTF32>::sDecRead(I iStart, I& ioCurrent, I iEnd, UTF32& oCP)
00337         {
00338         for (;;)
00339                 {
00340                 if (iStart >= ioCurrent)
00341                         {
00342                         // We'd run off the start.
00343                         return false;
00344                         }
00345                 uint32 theCP = *--ioCurrent;
00346                 if (sIsValidCP(theCP))
00347                         {
00348                         oCP = theCP;
00349                         return true;
00350                         }
00351                 }
00352         }
00353 
00354 // =================================================================================================
00355 #pragma mark -
00356 #pragma mark * ZUnicode::Functions_Write_T<I, UTF32>
00357 
00358 template <class I>
00359 struct Functions_Write_T<I, UTF32>
00360         {
00361         static bool sWrite(I iDest, I iEnd, UTF32 iCP);
00362         static bool sWriteInc(I& ioDest, I iEnd, UTF32 iCP);
00363         };
00364 
00365 template <class I>
00366 bool Functions_Write_T<I, UTF32>::sWrite(I iDest, I iEnd, UTF32 iCP)
00367         {
00368         return sWriteInc(iDest, iEnd, iCP);
00369         }
00370 
00371 template <class I>
00372 bool Functions_Write_T<I, UTF32>::sWriteInc(I& ioDest, I iEnd, UTF32 iCP)
00373         {
00374         if (sIsValidCP(iCP))
00375                 {
00376                 if (ioDest >= iEnd)
00377                         return false;
00378                 *ioDest++ = iCP;
00379                 }
00380         return true;
00381         }
00382 
00383 // =================================================================================================
00384 #pragma mark -
00385 #pragma mark * ZUnicode::Functions_Read_T<I, UTF16>
00386 
00387 template <class I>
00388 struct Functions_Read_T<I, UTF16>
00389         {
00390         static void sAlign(I& ioCurrent);
00391         static void sAlign(I& ioCurrent, I iEnd);
00392 
00393         static void sInc(I& ioCurrent);
00394         static bool sInc(I& ioCurrent, I iEnd);
00395 
00396         static void sDec(I& ioCurrent);
00397         static bool sDec(I iStart, I& ioCurrent, I iEnd);
00398 
00399         static UTF32 sRead(I iCurrent);
00400         static bool sRead(I iCurrent, I iEnd, UTF32& oCP);
00401 
00402         static UTF32 sReadInc(I& ioCurrent);
00403         static bool sReadInc(I& ioCurrent, I iEnd, UTF32& oCP);
00404         static bool sReadInc(I& ioCurrent, I iEnd, UTF32& oCP, size_t& ioCountSkipped);
00405 
00406         static UTF32 sDecRead(I& ioCurrent);
00407         static bool sDecRead(I iStart, I& ioCurrent, I iEnd, UTF32& oCP);
00408         };
00409 
00410 template <class I>
00411 void Functions_Read_T<I, UTF16>::sAlign(I& ioCurrent)
00412         {
00413         for (;;)
00414                 {
00415                 uint16 theCU = *ioCurrent;
00416                 if (sIsSmallNormal(theCU))
00417                         {
00418                         break;
00419                         }
00420                 else if (sIsSmallNormalOrHighSurrogate(theCU))
00421                         {
00422                         // Must be a high surrogate as it's not a small normal.
00423                         if (sIsLowSurrogate(uint16(ioCurrent[1])))
00424                                 break;
00425                         // It's not followed by a low surrogate, so move on.
00426                         ++ioCurrent;
00427                         }
00428                 else if (sIsBigNormalOrBeyond(theCU))
00429                         {
00430                         break;
00431                         }
00432                 else
00433                         {
00434                         // Must be an out of order low surrogate.
00435                         ++ioCurrent;
00436                         }
00437                 }
00438         }
00439 
00440 template <class I>
00441 void Functions_Read_T<I, UTF16>::sAlign(I& ioCurrent, I iEnd)
00442         {
00443         while (ioCurrent < iEnd)
00444                 {
00445                 uint16 theCU = *ioCurrent;
00446                 if (sIsSmallNormal(theCU))
00447                         {
00448                         break;
00449                         }
00450                 else if (sIsSmallNormalOrHighSurrogate(theCU))
00451                         {
00452                         // Must be a high surrogate as it's not a small normal.
00453                         if (ioCurrent + 1 >= iEnd)
00454                                 {
00455                                 // But we'd run off the end.
00456                                 break;
00457                                 }
00458 
00459                         if (sIsLowSurrogate(uint16(ioCurrent[1])))
00460                                 break;
00461 
00462                         // It's not followed by a low surrogate, so move on.
00463                         ++ioCurrent;
00464                         }
00465                 else if (sIsBigNormalOrBeyond(theCU))
00466                         {
00467                         break;
00468                         }
00469                 else
00470                         {
00471                         // Must be an out of order low surrogate.
00472                         ++ioCurrent;
00473                         }
00474                 }
00475         }
00476 
00477 template <class I>
00478 void Functions_Read_T<I, UTF16>::sInc(I& ioCurrent)
00479         {
00480         for (;;)
00481                 {
00482                 uint16 theCU = uint16(*ioCurrent++);
00483                 if (sIsSmallNormal(theCU))
00484                         {
00485                         return;
00486                         }
00487                 else if (sIsSmallNormalOrHighSurrogate(theCU))
00488                         {
00489                         // Must be a high surrogate as it's not a small normal.
00490                         if (sIsLowSurrogate(uint16(*ioCurrent++)))
00491                                 return;
00492                         --ioCurrent;
00493                         }
00494                 else if (sIsBigNormalOrBeyond(theCU))
00495                         {
00496                         return;
00497                         }
00498                 else
00499                         {
00500                         // Must be an out of order low surrogate.
00501                         }
00502                 }
00503         }
00504 
00505 template <class I>
00506 bool Functions_Read_T<I, UTF16>::sInc(I& ioCurrent, I iEnd)
00507         {
00508         for (;;)
00509                 {
00510                 if (ioCurrent >= iEnd)
00511                         {
00512                         // We've run off the end.
00513                         return false;
00514                         }
00515 
00516                 uint16 theCU = uint16(*ioCurrent++);
00517                 if (sIsSmallNormal(theCU))
00518                         {
00519                         return true;
00520                         }
00521                 else if (sIsSmallNormalOrHighSurrogate(theCU))
00522                         {
00523                         // Must be a high surrogate as it's not a small normal.
00524                         if (ioCurrent >= iEnd)
00525                                 {
00526                                 // But we'd run off the end. Restore the value of ioCurrent to indicate this.
00527                                 --ioCurrent;
00528                                 return false;
00529                                 }
00530 
00531                         if (sIsLowSurrogate(uint16(*ioCurrent++)))
00532                                 return true;
00533                         --ioCurrent;
00534                         }
00535                 else if (sIsBigNormalOrBeyond(theCU))
00536                         {
00537                         return true;
00538                         }
00539                 else
00540                         {
00541                         // Must be an out of order low surrogate.
00542                         }
00543                 }
00544         }
00545 
00546 template <class I>
00547 void Functions_Read_T<I, UTF16>::sDec(I& ioCurrent)
00548         {
00549         bool lastWasLowSurrogate = false;
00550         for (;;)
00551                 {
00552                 uint16 theCU = uint16(*--ioCurrent);
00553                 if (sIsSmallNormal(theCU))
00554                         {
00555                         // We've moved on to a small normal.
00556                         break;
00557                         }
00558                 else if (sIsSmallNormalOrHighSurrogate(theCU))
00559                         {
00560                         // It's not a small normal, so must be a high surrogate.
00561                         if (lastWasLowSurrogate)
00562                                 break;
00563                         lastWasLowSurrogate = false;
00564                         }
00565                 else if (sIsBigNormalOrBeyond(theCU))
00566                         {
00567                         // We've moved on to a big normal.
00568                         break;
00569                         }
00570                 else
00571                         {
00572                         // We must be on a low surrogate. Remember the fact.
00573                         lastWasLowSurrogate = true;
00574                         }
00575                 }
00576         }
00577 
00578 template <class I>
00579 bool Functions_Read_T<I, UTF16>::sDec(I iStart, I& ioCurrent, I iEnd)
00580         {
00581         bool lastWasLowSurrogate = false;
00582         for (;;)
00583                 {
00584                 if (iStart >= ioCurrent)
00585                         {
00586                         // We'd run off the start.
00587                         return false;
00588                         }
00589 
00590                 uint16 theCU = uint16(*--ioCurrent);
00591                 if (sIsSmallNormal(theCU))
00592                         {
00593                         // We've moved on to a small normal.
00594                         return true;
00595                         }
00596                 else if (sIsSmallNormalOrHighSurrogate(theCU))
00597                         {
00598                         // It's not a small normal, so must be a high surrogate.
00599                         if (lastWasLowSurrogate)
00600                                 return true;
00601                         lastWasLowSurrogate = false;
00602                         }
00603                 else if (sIsBigNormalOrBeyond(theCU))
00604                         {
00605                         // We've moved on to a big normal.
00606                         return true;
00607                         }
00608                 else
00609                         {
00610                         // We must be on a low surrogate. Remember the fact.
00611                         lastWasLowSurrogate = true;
00612                         }
00613                 }
00614         }
00615 
00616 template <class I>
00617 UTF32 Functions_Read_T<I, UTF16>::sRead(I iCurrent)
00618         {
00619         return sReadInc(iCurrent);
00620         }
00621 
00622 template <class I>
00623 bool Functions_Read_T<I, UTF16>::sRead(I iCurrent, I iEnd, UTF32& oCP)
00624         {
00625         return sReadInc(iCurrent, iEnd, oCP);
00626         }
00627 
00628 template <class I>
00629 UTF32 Functions_Read_T<I, UTF16>::sReadInc(I& ioCurrent)
00630         {
00631         for (;;)
00632                 {
00633                 uint16 theCU = *ioCurrent++;
00634                 if (sIsSmallNormal(theCU))
00635                         {
00636                         return theCU;
00637                         }
00638                 else if (sIsSmallNormalOrHighSurrogate(theCU))
00639                         {
00640                         // Must be a high surrogate as it's not a small normal.
00641                         uint16 theCU2 = *ioCurrent++;
00642                         if (sIsLowSurrogate(theCU2))
00643                                 return sUTF32FromSurrogates(theCU, theCU2);
00644                         --ioCurrent;
00645                         }
00646                 else if (sIsBigNormalOrBeyond(theCU))
00647                         {
00648                         return theCU;
00649                         }
00650                 else
00651                         {
00652                         // Must be an out of order low surrogate.
00653                         }
00654                 }
00655         }
00656 
00657 template <class I>
00658 bool Functions_Read_T<I, UTF16>::sReadInc(I& ioCurrent, I iEnd, UTF32& oCP)
00659         {
00660         for (;;)
00661                 {
00662                 if (ioCurrent >= iEnd)
00663                         {
00664                         // We've run off the end.
00665                         return false;
00666                         }
00667 
00668                 uint16 theCU = *ioCurrent++;
00669                 if (sIsSmallNormal(theCU))
00670                         {
00671                         oCP = theCU;
00672                         return true;
00673                         }
00674                 else if (sIsSmallNormalOrHighSurrogate(theCU))
00675                         {
00676                         // Must be a high surrogate as it's not a small normal.
00677                         if (ioCurrent >= iEnd)
00678                                 {
00679                                 // But we'd run off the end. Restore the value of ioCurrent to indicate this.
00680                                 --ioCurrent;
00681                                 return false;
00682                                 }
00683 
00684                         uint16 theCU2 = *ioCurrent++;
00685                         if (sIsLowSurrogate(theCU2))
00686                                 {
00687                                 oCP = sUTF32FromSurrogates(theCU, theCU2);
00688                                 return true;
00689                                 }
00690                         --ioCurrent;
00691                         }
00692                 else if (sIsBigNormalOrBeyond(theCU))
00693                         {
00694                         oCP = theCU;
00695                         return true;
00696                         }
00697                 else
00698                         {
00699                         // Must be an out of order low surrogate.
00700                         }
00701                 }
00702         }
00703 
00704 template <class I>
00705 bool Functions_Read_T<I, UTF16>::sReadInc(I& ioCurrent, I iEnd, UTF32& oCP, size_t& ioCountSkipped)
00706         {
00707         for (;;)
00708                 {
00709                 if (ioCurrent >= iEnd)
00710                         {
00711                         // We've run off the end.
00712                         return false;
00713                         }
00714 
00715                 uint16 theCU = *ioCurrent++;
00716                 if (sIsSmallNormal(theCU))
00717                         {
00718                         oCP = theCU;
00719                         return true;
00720                         }
00721                 else if (sIsSmallNormalOrHighSurrogate(theCU))
00722                         {
00723                         // Must be a high surrogate as it's not a small normal.
00724                         if (ioCurrent >= iEnd)
00725                                 {
00726                                 // But we'd run off the end. Restore the value of ioCurrent to indicate this.
00727                                 --ioCurrent;
00728                                 return false;
00729                                 }
00730 
00731                         uint16 theCU2 = *ioCurrent++;
00732                         if (sIsLowSurrogate(theCU2))
00733                                 {
00734                                 oCP = sUTF32FromSurrogates(theCU, theCU2);
00735                                 return true;
00736                                 }
00737                         // It's a high surrogate with no following low surrogate.
00738                         ++ioCountSkipped;
00739                         --ioCurrent;
00740                         }
00741                 else if (sIsBigNormalOrBeyond(theCU))
00742                         {
00743                         oCP = theCU;
00744                         return true;
00745                         }
00746                 else
00747                         {
00748                         // Must be an out of order low surrogate.
00749                         ++ioCountSkipped;
00750                         }
00751                 }
00752         }
00753 
00754 template <class I>
00755 UTF32 Functions_Read_T<I, UTF16>::sDecRead(I& ioCurrent)
00756         {
00757         uint16 priorLowSurrogate = 0;
00758         for (;;)
00759                 {
00760                 uint16 theCU = *--ioCurrent;
00761                 if (sIsSmallNormal(theCU))
00762                         {
00763                         // We've moved on to a small normal.
00764                         return theCU;
00765                         }
00766                 else if (sIsSmallNormalOrHighSurrogate(theCU))
00767                         {
00768                         // It's not a small normal, so must be a high surrogate.
00769                         if (priorLowSurrogate)
00770                                 return sUTF32FromSurrogates(theCU, priorLowSurrogate);
00771                         priorLowSurrogate = 0;
00772                         }
00773                 else if (sIsBigNormalOrBeyond(theCU))
00774                         {
00775                         return theCU;
00776                         }
00777                 else
00778                         {
00779                         // We must be on a low surrogate. Remember the fact.
00780                         priorLowSurrogate = theCU;
00781                         }
00782                 }
00783         }
00784 
00785 template <class I>
00786 bool Functions_Read_T<I, UTF16>::sDecRead(I iStart, I& ioCurrent, I iEnd, UTF32& oCP)
00787         {
00788         uint16 priorLowSurrogate = 0;
00789         for (;;)
00790                 {
00791                 if (iStart >= ioCurrent)
00792                         {
00793                         // We'd run off the start.
00794                         return false;
00795                         }
00796 
00797                 uint16 theCU = *--ioCurrent;
00798                 if (sIsSmallNormal(theCU))
00799                         {
00800                         // We've moved on to a small normal.
00801                         oCP = theCU;
00802                         return true;
00803                         }
00804                 else if (sIsSmallNormalOrHighSurrogate(theCU))
00805                         {
00806                         // It's not a small normal, so must be a high surrogate.
00807                         if (priorLowSurrogate)
00808                                 {
00809                                 oCP = sUTF32FromSurrogates(theCU, priorLowSurrogate);
00810                                 return true;
00811                                 }
00812                         priorLowSurrogate = 0;
00813                         }
00814                 else if (sIsBigNormalOrBeyond(theCU))
00815                         {
00816                         oCP = theCU;
00817                         return true;
00818                         }
00819                 else
00820                         {
00821                         // We must be on a low surrogate. Remember the fact.
00822                         priorLowSurrogate = theCU;
00823                         }
00824                 }
00825         }
00826 
00827 // =================================================================================================
00828 #pragma mark -
00829 #pragma mark * ZUnicode::Functions_Write_T<I, UTF16>
00830 
00831 template <class I>
00832 struct Functions_Write_T<I, UTF16>
00833         {
00834         static bool sWrite(I iDest, I iEnd, UTF32 iCP);
00835         static bool sWriteInc(I& ioDest, I iEnd, UTF32 iCP);
00836         };
00837 
00838 template <class I>
00839 bool Functions_Write_T<I, UTF16>::sWrite(I iDest, I iEnd, UTF32 iCP)
00840         {
00841         return sWriteInc(iDest, iEnd, iCP);
00842         }
00843 
00844 template <class I>
00845 bool Functions_Write_T<I, UTF16>::sWriteInc(I& ioDest, I iEnd, UTF32 iCP)
00846         {
00847         if (sIsValidCP(iCP))
00848                 {
00849                 if (iCP > kCPMaxUCS2)
00850                         {
00851                         // It's beyond the BMP, so we need to write surrogates.
00852                         if (ioDest + 1 >= iEnd)
00853                                 {
00854                                 // There's no room.
00855                                 return false;
00856                                 }
00857                         iCP -= 0x10000;
00858                         *ioDest++ = iCP / 0x400 + kCPSurrogateHighBegin;
00859                         *ioDest++ = iCP & 0x3FF + kCPSurrogateLowBegin;
00860                         }
00861                 else
00862                         {
00863                         if (ioDest >= iEnd)
00864                                 return false;
00865                         *ioDest++ = iCP;
00866                         }
00867                 }
00868         return true;
00869         }
00870 
00871 // =================================================================================================
00872 #pragma mark -
00873 #pragma mark * ZUnicode::Functions_Read_T<I, UTF8>
00874 template <class I>
00875 struct Functions_Read_T<I, UTF8>
00876         {
00877         static void sAlign(I& ioCurrent);
00878         static void sAlign(I& ioCurrent, I iEnd);
00879 
00880         static void sInc(I& ioCurrent);
00881         static bool sInc(I& ioCurrent, I iEnd);
00882 
00883         static void sDec(I& ioCurrent);
00884         static bool sDec(I iStart, I& ioCurrent, I iEnd);
00885 
00886         static UTF32 sRead(I iCurrent);
00887         static bool sRead(I iCurrent, I iEnd, UTF32& oCP);
00888 
00889         static UTF32 sReadInc(I& ioCurrent);
00890         static bool sReadInc(I& ioCurrent, I iEnd, UTF32& oCP);
00891         static bool sReadInc(I& ioCurrent, I iEnd, UTF32& oCP, size_t& ioCountSkipped);
00892 
00893         static UTF32 sDecRead(I& ioCurrent);
00894         static bool sDecRead(I iStart, I& ioCurrent, I iEnd, UTF32& oCP);
00895         };
00896 
00897 template <class I>
00898 void Functions_Read_T<I, UTF8>::sAlign(I& ioCurrent)
00899         {
00900         for (;;)
00901                 {
00902                 I localSource = ioCurrent;
00903                 const uint8 firstByte = *localSource++;
00904                 size_t sequenceLength = sUTF8SequenceLength[firstByte];
00905                 if (sequenceLength == 1)
00906                         {
00907                         // It's a standalone character, we can return.
00908                         break;
00909                         }
00910                 else if (sequenceLength == 0)
00911                         {
00912                         // It's a continuation or illegal, ignore it.
00913                         }
00914                 else
00915                         {
00916                         bool okay = true;
00917                         while (--sequenceLength)
00918                                 {
00919                                 if (!sIsContinuation(*localSource++))
00920                                         {
00921                                         --localSource;
00922                                         okay = false;
00923                                         break;
00924                                         }
00925                                 }
00926                         if (okay)
00927                                 return;
00928                         }
00929                 ioCurrent = localSource;
00930                 }
00931         }
00932 
00933 template <class I>
00934 void Functions_Read_T<I, UTF8>::sAlign(I& ioCurrent, I iEnd)
00935         {
00936         while (ioCurrent < iEnd)
00937                 {
00938                 I localSource = ioCurrent;
00939                 const uint8 firstByte = *localSource++;
00940                 size_t sequenceLength = sUTF8SequenceLength[firstByte];
00941                 if (sequenceLength == 1)
00942                         {
00943                         // It's a standalone character, we can return.
00944                         break;
00945                         }
00946                 else if (sequenceLength == 0)
00947                         {
00948                         // It's a continuation or illegal, ignore it.
00949                         }
00950                 else
00951                         {
00952                         bool okay = true;
00953                         while (--sequenceLength && localSource < iEnd)
00954                                 {
00955                                 if (!sIsContinuation(*ioCurrent++))
00956                                         {
00957                                         --ioCurrent;
00958                                         okay = false;
00959                                         break;
00960                                         }
00961                                 }
00962                         if (okay)
00963                                 return;
00964                         }
00965                 ioCurrent = localSource;
00966                 }
00967         }
00968 
00969 template <class I>
00970 void Functions_Read_T<I, UTF8>::sInc(I& ioCurrent)
00971         {
00972         for (;;)
00973                 {
00974                 const uint8 firstByte = *ioCurrent++;
00975                 size_t sequenceLength = sUTF8SequenceLength[firstByte];
00976                 if (sequenceLength == 1)
00977                         {
00978                         return;
00979                         }
00980                 else if (sequenceLength == 0)
00981                         {
00982                         // It's a continuation or illegal, ignore it.
00983                         }
00984                 else
00985                         {
00986                         bool okay = true;
00987                         while (--sequenceLength)
00988                                 {
00989                                 if (!sIsContinuation(*ioCurrent++))
00990                                         {
00991                                         // It's not a legal continuation byte.
00992                                         --ioCurrent;
00993                                         okay = false;
00994                                         break;
00995                                         }
00996                                 }
00997                         if (okay)
00998                                 return;
00999                         }
01000                 }
01001         }
01002 
01003 template <class I>
01004 bool Functions_Read_T<I, UTF8>::sInc(I& ioCurrent, I iEnd)
01005         {
01006         for (;;)
01007                 {
01008                 if (ioCurrent >= iEnd)
01009                         {
01010                         // We've run off the end.
01011                         return false;
01012                         }
01013 
01014                 const uint8 firstByte = *ioCurrent++;
01015                 size_t sequenceLength = sUTF8SequenceLength[firstByte];
01016                 if (sequenceLength == 1)
01017                         {
01018                         return true;
01019                         }
01020                 else if (sequenceLength == 0)
01021                         {
01022                         // It's a continuation or illegal, ignore it.
01023                         }
01024                 else
01025                         {
01026                         if (ioCurrent + sequenceLength - 1 > iEnd)
01027                                 {
01028                                 --ioCurrent;
01029                                 return false;
01030                                 }
01031 
01032                         bool okay = true;
01033                         while (--sequenceLength)
01034                                 {
01035                                 if (!sIsContinuation(*ioCurrent++))
01036                                         {
01037                                         // It's not a legal continuation byte.
01038                                         --ioCurrent;
01039                                         okay = false;
01040                                         break;
01041                                         }
01042                                 }
01043                         if (okay)
01044                                 return true;
01045                         }
01046                 }
01047         }
01048 
01049 template <class I>
01050 void Functions_Read_T<I, UTF8>::sDec(I& ioCurrent)
01051         {
01052         size_t continuationCount = 0;
01053         bool seenNonContinuation = false;
01054         for (;;)
01055                 {
01056                 const uint8 firstByte = *--ioCurrent;
01057                 size_t sequenceLength = sUTF8SequenceLength[firstByte];
01058                 if (sequenceLength == 0)
01059                         {
01060                         if (sIsContinuation(firstByte))
01061                                 {
01062                                 // It's a continuation.
01063                                 ++continuationCount;
01064                                 }
01065                         else
01066                                 {
01067                                 // It's illegal.
01068                                 continuationCount = 0;
01069                                 seenNonContinuation = true;
01070                                 }
01071                         }
01072                 else
01073                         {
01074                         // It's a start byte or is normal.
01075                         if (continuationCount + 1 >= sequenceLength)
01076                                 {
01077                                 // We've seen enough continuation bytes.
01078                                 return;
01079                                 }
01080                         else
01081                                 {
01082                                 // We haven't seen enough continuation bytes.
01083                                 if (!seenNonContinuation)
01084                                         {
01085                                         // We've only seen continuation bytes.
01086                                         I current = ioCurrent + 1;
01087                                         while (--sequenceLength)
01088                                                 {
01089                                                 if (!sIsContinuation(*current++))
01090                                                         break;
01091                                                 }
01092                                         if (sequenceLength == 0)
01093                                                 {
01094                                                 // We found sequenceLength continuation bytes subsequent to the
01095                                                 // start byte at ioCurrent.
01096                                                 return;
01097                                                 }
01098                                         }
01099                                 continuationCount = 0;
01100                                 seenNonContinuation = true;
01101                                 }
01102                         }
01103                 }
01104         }
01105 
01106 template <class I>
01107 bool Functions_Read_T<I, UTF8>::sDec(I iStart, I& ioCurrent, I iEnd)
01108         {
01109         size_t continuationCount = 0;
01110         bool seenNonContinuation = false;
01111         for (;;)
01112                 {
01113                 if (iStart >= ioCurrent)
01114                         {
01115                         // We'd run off the start.
01116                         return false;
01117                         }
01118 
01119                 const uint8 firstByte = *--ioCurrent;
01120                 size_t sequenceLength = sUTF8SequenceLength[firstByte];
01121                 if (sequenceLength == 0)
01122                         {
01123                         if (sIsContinuation(firstByte))
01124                                 {
01125                                 // It's a continuation.
01126                                 ++continuationCount;
01127                                 }
01128                         else
01129                                 {
01130                                 // It's illegal.
01131                                 continuationCount = 0;
01132                                 seenNonContinuation = true;
01133                                 }
01134                         }
01135                 else
01136                         {
01137                         // It's a start byte or is normal.
01138                         if (continuationCount + 1 >= sequenceLength)
01139                                 {
01140                                 // We've seen enough continuation bytes.
01141                                 return true;
01142                                 }
01143                         else
01144                                 {
01145                                 // We haven't seen enough continuation bytes.
01146                                 if (!seenNonContinuation)
01147                                         {
01148                                         // We've only seen continuation bytes.
01149                                         if (ioCurrent + sequenceLength <= iEnd)
01150                                                 {
01151                                                 // If there were enough continuation bytes, they would not extend
01152                                                 // beyond the end of the buffer.
01153                                                 I current = ioCurrent + 1;
01154                                                 while (--sequenceLength)
01155                                                         {
01156                                                         if (!sIsContinuation(*current++))
01157                                                                 break;
01158                                                         }
01159                                                 if (sequenceLength == 0)
01160                                                         {
01161                                                         // We found sequenceLength continuation bytes subsequent to the
01162                                                         // start byte at ioCurrent.
01163                                                         return true;
01164                                                         }
01165                                                 }
01166                                         }
01167                                 continuationCount = 0;
01168                                 seenNonContinuation = true;
01169                                 }
01170                         }
01171                 }
01172         }
01173 
01174 template <class I>
01175 UTF32 Functions_Read_T<I, UTF8>::sRead(I iCurrent)
01176         {
01177         return sReadInc(iCurrent);
01178         }
01179 
01180 template <class I>
01181 bool Functions_Read_T<I, UTF8>::sRead(I iCurrent, I iEnd, UTF32& oCP)
01182         {
01183         return sReadInc(iCurrent, iEnd, oCP);
01184         }
01185 
01186 template <class I>
01187 UTF32 Functions_Read_T<I, UTF8>::sReadInc(I& ioCurrent)
01188         {
01189         for (;;)
01190                 {
01191                 const uint8 firstByte = *ioCurrent++;
01192                 size_t sequenceLength = sUTF8SequenceLength[firstByte];
01193                 if (sequenceLength == 1)
01194                         {
01195                         return firstByte;
01196                         }
01197                 else if (sequenceLength == 0)
01198                         {
01199                         // It's a continuation or illegal, ignore it.
01200                         }
01201                 else
01202                         {
01203                         uint32 result = firstByte & sUTF8StartByteMask[sequenceLength];
01204                         bool okay = true;
01205                         while (--sequenceLength)
01206                                 {
01207                                 const uint8 curByte = *ioCurrent++;
01208                                 if (!sIsContinuation(curByte))
01209                                         {
01210                                         // It's not a legal continuation byte.
01211                                         --ioCurrent;
01212                                         okay = false;
01213                                         break;
01214                                         }
01215                                 sAppendContinuation(result, curByte);
01216                                 }
01217                         if (okay)
01218                                 return result;
01219                         }
01220                 }
01221         }
01222 
01223 
01224 template <class I>
01225 bool Functions_Read_T<I, UTF8>::sReadInc(I& ioCurrent, I iEnd, UTF32& oCP)
01226         {
01227         for (;;)
01228                 {
01229                 if (ioCurrent >= iEnd)
01230                         {
01231                         // We've run off the end.
01232                         return false;
01233                         }
01234 
01235                 const uint8 firstByte = *ioCurrent++;
01236                 size_t sequenceLength = sUTF8SequenceLength[firstByte];
01237                 if (sequenceLength == 1)
01238                         {
01239                         oCP = firstByte;
01240                         return true;
01241                         }
01242                 else if (sequenceLength == 0)
01243                         {
01244                         // It's a continuation or illegal, ignore it.
01245                         }
01246                 else
01247                         {
01248                         if (ioCurrent + sequenceLength - 1 > iEnd)
01249                                 {
01250                                 --ioCurrent;
01251                                 return false;
01252                                 }
01253 
01254                         uint32 result = firstByte & sUTF8StartByteMask[sequenceLength];
01255                         bool okay = true;
01256                         while (--sequenceLength)
01257                                 {
01258                                 const uint8 curByte = *ioCurrent++;
01259                                 if (!sIsContinuation(curByte))
01260                                         {
01261                                         // It's not a legal continuation byte.
01262                                         --ioCurrent;
01263                                         okay = false;
01264                                         break;
01265                                         }
01266                                 sAppendContinuation(result, curByte);
01267                                 }
01268                         if (okay)
01269                                 {
01270                                 oCP = result;
01271                                 return true;
01272                                 }
01273                         }
01274                 }
01275         }
01276 
01277 template <class I>
01278 bool Functions_Read_T<I, UTF8>::sReadInc(I& ioCurrent, I iEnd, UTF32& oCP, size_t& ioCountSkipped)
01279         {
01280         for (;;)
01281                 {
01282                 if (ioCurrent >= iEnd)
01283                         {
01284                         // We've run off the end.
01285                         return false;
01286                         }
01287 
01288                 const uint8 firstByte = *ioCurrent++;
01289                 size_t sequenceLength = sUTF8SequenceLength[firstByte];
01290                 if (sequenceLength == 1)
01291                         {
01292                         oCP = firstByte;
01293                         return true;
01294                         }
01295                 else if (sequenceLength == 0)
01296                         {
01297                         // It's a continuation or illegal, ignore it.
01298                         ++ioCountSkipped;
01299                         }
01300                 else
01301                         {
01302                         if (ioCurrent + sequenceLength - 1 > iEnd)
01303                                 {
01304                                 --ioCurrent;
01305                                 return false;
01306                                 }
01307 
01308                         uint32 result = firstByte & sUTF8StartByteMask[sequenceLength];
01309                         bool okay = true;
01310                         for (size_t countConsumed = 1; countConsumed < sequenceLength; ++countConsumed)
01311                                 {
01312                                 const uint8 curByte = *ioCurrent++;
01313                                 if (!sIsContinuation(curByte))
01314                                         {
01315                                         // It's not a legal continuation byte.
01316                                         --ioCurrent;
01317                                         okay = false;
01318                                         ioCountSkipped += countConsumed + 1;
01319                                         break;
01320                                         }
01321                                 sAppendContinuation(result, curByte);
01322                                 }
01323                         if (okay)
01324                                 {
01325                                 oCP = result;
01326                                 return true;
01327                                 }
01328                         }
01329                 }
01330         }
01331 
01332 template <class I>
01333 UTF32 Functions_Read_T<I, UTF8>::sDecRead(I& ioCurrent)
01334         {
01335         for (;;)
01336                 {
01337                 const uint8 firstByte = *--ioCurrent;
01338                 if (size_t sequenceLength = sUTF8SequenceLength[firstByte])
01339                         {
01340                         // It's a start byte or is normal.
01341                         if (sequenceLength == 1)
01342                                 return firstByte;
01343                         uint32 result = firstByte & sUTF8StartByteMask[sequenceLength];
01344                         I localCurrent = ioCurrent;
01345                         while (--sequenceLength)
01346                                 {
01347                                 const uint8 curByte = *++localCurrent;
01348                                 if (sIsContinuation(curByte))
01349                                         sAppendContinuation(result, curByte);
01350                                 else
01351                                         break;
01352                                 }
01353                         if (sequenceLength == 0)
01354                                 return result;
01355                         }
01356                 }
01357         }
01358 
01359 template <class I>
01360 bool Functions_Read_T<I, UTF8>::sDecRead(I iStart, I& ioCurrent, I iEnd, UTF32& oCP)
01361         {
01362         for (;;)
01363                 {
01364                 if (iStart >= ioCurrent)
01365                         {
01366                         // We'd run off the start.
01367                         return false;
01368                         }
01369 
01370                 const uint8 firstByte = *--ioCurrent;
01371                 if (size_t sequenceLength = sUTF8SequenceLength[firstByte])
01372                         {
01373                         // It's a start byte or is normal.
01374                         if (ioCurrent + sequenceLength <= iEnd)
01375                                 {
01376                                 // And the sequence fits within the available space.
01377                                 if (sequenceLength == 1)
01378                                         {
01379                                         // Special case single bytes.
01380                                         oCP = firstByte;
01381                                         return true;
01382                                         }
01383                                         
01384                                 uint32 result = firstByte & sUTF8StartByteMask[sequenceLength];
01385                                 I localCurrent = ioCurrent;
01386                                 while (--sequenceLength)
01387                                         {
01388                                         const uint8 curByte = *++localCurrent;
01389                                         if (sIsContinuation(curByte))
01390                                                 sAppendContinuation(result, curByte);
01391                                         else
01392                                                 break;
01393                                         }
01394                                 if (sequenceLength == 0)
01395                                         {
01396                                         // We found enough continuations.
01397                                         oCP = result;
01398                                         return true;
01399                                         }
01400                                 }
01401                         }
01402                 }
01403         }
01404 
01405 // =================================================================================================
01406 #pragma mark -
01407 #pragma mark * ZUnicode::Functions_Write_T<I, UTF8>
01408 
01409 template <class I>
01410 struct Functions_Write_T<I, UTF8>
01411         {
01412         static bool sWrite(I iDest, I iEnd, UTF32 iCP);
01413         static bool sWriteInc(I& ioDest, I iEnd, UTF32 iCP);
01414         };
01415 
01416 template <class I>
01417 bool Functions_Write_T<I, UTF8>::sWrite(I iDest, I iEnd, UTF32 iCP)
01418         {
01419         if (sIsValidCP(iCP))
01420                 {
01421                 size_t bytesToWrite;
01422                 if (iCP < 0x80)
01423                         {
01424                         if (iDest <= iEnd)
01425                                 {
01426                                 *iDest = iCP;
01427                                 return true;
01428                                 }
01429                         return false;
01430                         }
01431                 else if (iCP < 0x800) bytesToWrite = 2;
01432                 else if (iCP < 0x10000) bytesToWrite = 3;
01433                 else if (iCP < 0x200000) bytesToWrite = 4;
01434                 else if (iCP < 0x4000000) bytesToWrite = 5;
01435                 else bytesToWrite = 6;
01436 
01437                 iDest += bytesToWrite;
01438                 if (iDest >= iEnd)
01439                         return false;
01440 
01441                 const UTF32 byteMask = 0xBF;
01442                 const UTF32 byteMark = 0x80;
01443                 switch (bytesToWrite)
01444                         {
01445                         // note: code falls through cases
01446                         case 6: *--iDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01447                         case 5: *--iDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01448                         case 4: *--iDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01449                         case 3: *--iDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01450                         case 2: *--iDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01451                         }
01452                 *--iDest = iCP | sUTF8StartByteMark[bytesToWrite];
01453                 }
01454         return true;
01455         }
01456 
01457 template <class I>
01458 bool Functions_Write_T<I, UTF8>::sWriteInc(I& ioDest, I iEnd, UTF32 iCP)
01459         {
01460         if (sIsValidCP(iCP))
01461                 {
01462                 size_t bytesToWrite;
01463                 if (iCP < 0x80)
01464                         {
01465                         if (ioDest <= iEnd)
01466                                 {
01467                                 *ioDest++ = iCP;
01468                                 return true;
01469                                 }
01470                         return false;
01471                         }
01472                 else if (iCP < 0x800) bytesToWrite = 2;
01473                 else if (iCP < 0x10000) bytesToWrite = 3;
01474                 else if (iCP < 0x200000) bytesToWrite = 4;
01475                 else if (iCP < 0x4000000) bytesToWrite = 5;
01476                 else bytesToWrite = 6;
01477 
01478                 I localDest = ioDest + bytesToWrite;
01479                 if (localDest >= iEnd)
01480                         return false;
01481                 ioDest = localDest;
01482 
01483                 const UTF32 byteMask = 0xBF;
01484                 const UTF32 byteMark = 0x80;
01485                 switch (bytesToWrite)
01486                         {
01487                         // note: code falls through cases
01488                         case 6: *--localDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01489                         case 5: *--localDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01490                         case 4: *--localDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01491                         case 3: *--localDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01492                         case 2: *--localDest = (iCP | byteMark) & byteMask; iCP >>= 6;
01493                         }
01494                 *--localDest = iCP | sUTF8StartByteMark[bytesToWrite];
01495                 }
01496         return true;
01497         }
01498 
01499 // =================================================================================================
01500 #pragma mark -
01501 #pragma mark * ZUnicode::Functions_Convert
01502 
01503 template <class I>
01504 string32 Functions_Convert_T<I>::sAsUTF32(I iSource)
01505         {
01506         string32 result;
01507         for (;;)
01508                 {
01509                 if (UTF32 theCP = sReadInc(iSource))
01510                         result += theCP;
01511                 else
01512                         break;
01513                 }
01514         return result;
01515         }
01516 
01517 template <class I>
01518 string32 Functions_Convert_T<I>::sAsUTF32(I iSource, size_t iCountCU)
01519         {
01520 #if 1
01521         string32 result;
01522         result.resize(iCountCU);
01523 
01524         string32::iterator iter = result.begin();
01525         I theEnd = iSource + iCountCU;
01526         for (UTF32 theCP; sReadInc(iSource, theEnd, theCP); /*no inc*/)
01527                 *iter++ = theCP;                
01528         result.resize(iter - result.begin());
01529         return result;  
01530 #else
01531         string32 result;
01532         result.reserve(iCountCU);
01533 
01534         I theEnd = iSource + iCountCU;
01535         for (UTF32 theCP; sReadInc(iSource, theEnd, theCP); result += theCP)
01536                 {}
01537         return result;
01538 #endif
01539         }
01540 
01541 template <class I>
01542 string32 Functions_Convert_T<I>::sAsUTF32(I iSource, I iEnd)
01543         {
01544         string32 result;
01545         for (UTF32 theCP; sReadInc(iSource, iEnd, theCP); result += theCP)
01546                 {}
01547         return result;
01548         }
01549 
01550 template <class I>
01551 string16 Functions_Convert_T<I>::sAsUTF16(I iSource)
01552         {
01553         string16 result;
01554         for (;;)
01555                 {
01556                 if (UTF32 theCP = sReadInc(iSource))
01557                         result += theCP;
01558                 else
01559                         break;
01560                 }
01561         return result;
01562         }
01563 
01564 template <class I>
01565 string16 Functions_Convert_T<I>::sAsUTF16(I iSource, size_t iCountCU)
01566         {
01567 #if 1
01568         string16 result;
01569         // V. conservative, for UTF32->UTF16
01570         result.resize(iCountCU * 2);
01571         string16::iterator iter = result.begin();
01572         I theEnd = iSource + iCountCU;
01573         for (UTF32 theCP; sReadInc(iSource, theEnd, theCP); /*no inc*/)
01574                 {
01575                 uint32 realCP = theCP;
01576                 if (realCP <= ZUnicode::kCPMaxUCS2)
01577                         {
01578                         *iter++ = UTF16(realCP);
01579                         }
01580                 else
01581                         {
01582                         realCP -= 0x10000;
01583                         *iter++ = UTF16(realCP / 0x400 + ZUnicode::kCPSurrogateHighBegin);
01584                         *iter++ += UTF16(realCP & 0x3FF + ZUnicode::kCPSurrogateLowBegin);
01585                         }
01586                 }
01587         result.resize(iter - result.begin());
01588         return result;  
01589 #else
01590         string16 result;
01591         result.reserve(iCountCU);
01592 
01593         I theEnd = iSource + iCountCU;
01594         for (UTF32 theCP; sReadInc(iSource, theEnd, theCP); result += theCP)
01595                 {}
01596         return result;
01597 #endif
01598         }
01599 
01600 template <class I>
01601 string16 Functions_Convert_T<I>::sAsUTF16(I iSource, I iEnd)
01602         {
01603         string16 result;
01604         for (UTF32 theCP; sReadInc(iSource, iEnd, theCP); result += theCP)
01605                 {}
01606         return result;
01607         }
01608 
01609 template <class I>
01610 string8 Functions_Convert_T<I>::sAsUTF8(I iSource)
01611         {
01612         string8 result;
01613         for (;;)
01614                 {
01615                 if (UTF32 theCP = sReadInc(iSource))
01616                         result += theCP;
01617                 else
01618                         break;
01619                 }
01620         return result;
01621         }
01622 
01623 template <class I>
01624 string8 Functions_Convert_T<I>::sAsUTF8(I iSource, size_t iCountCU)
01625         {
01626         string8 result;
01627         result.reserve(iCountCU);
01628 
01629         I theEnd = iSource + iCountCU;
01630         for (UTF32 theCP; sReadInc(iSource, theEnd, theCP); result += theCP)
01631                 {}
01632 
01633         return result;
01634         }
01635 
01636 template <class I>
01637 string8 Functions_Convert_T<I>::sAsUTF8(I iSource, I iEnd)
01638         {
01639         string8 result;
01640         for (UTF32 theCP; sReadInc(iSource, iEnd, theCP); result += theCP)
01641                 {}
01642         return result;
01643         }
01644 
01645 } // namespace ZUnicode
01646 
01647 #endif // __ZUnicodePrivB__

Generated on Thu Jul 26 11:21:52 2007 for ZooLib by  doxygen 1.4.7