documentation/opennurbs__unicode_8h_source.html

 /* $NoKeywords: $ */

 /*

 //

 // Copyright (c) 1993-2012 Robert McNeel & Associates. All rights reserved.

 // OpenNURBS, Rhinoceros, and Rhino3D are registered trademarks of Robert

 // McNeel & Associates.

 //

 // THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.

 // ALL IMPLIED WARRANTIES OF FITNESS FOR ANY PARTICULAR PURPOSE AND OF

 // MERCHANTABILITY ARE HEREBY DISCLAIMED.

 //

 // For complete openNURBS copyright information see <http://www.opennurbs.org>.

 //

 ////////////////////////////////////////////////////////////////

 */


 #if !defined(OPENNURBS_UNICODE_INC_)

 #define OPENNURBS_UNICODE_INC_


 ON_BEGIN_EXTERNC


 struct ON_UnicodeErrorParameters

 {

   /*

   If an error occurs, then bits of error_status are

   set to indicate what type of error occured.


   Error types:

     1: The input parameters were invalid.

        This error cannot be masked.


     2: The output buffer was not large enough to hold the converted

        string. As much conversion as possible is performed in this

        case and the error cannot be masked.


     4: When parsing a UTF-8 or UTF-32 string, the values of two

        consecutive encoding sequences formed a valid UTF-16

        surrogate pair.


        This error is masked if 0 != (4 & m_error_mask).

        If the error is masked, then the surrogate pair is

        decoded, the value of the resulting unicode code point

        is used, and parsing continues.


     8: An overlong UTF-8 encoding sequence was encountered and

        the value of the overlong sUTF-8 equence was a valid unicode

        code point.


        This error is masked if 0 != (8 & m_error_mask).

        If the error is masked, then the unicode code point is

        used and parsing continues.


    16: An illegal UTF-8 encoding sequence occured or an invalid

        unicode code point value resulted from decoding a

        UTF-8 sequence.


        This error is masked if 0 != (16 & m_error_mask).

        If the error is masked and the value of m_error_code_point is

        a valid unicode code point, then m_error_code_point is used

        and parsing continues.

   */

   unsigned int m_error_status;


   /*

   If 0 != (error_mask & 4), then type 4 errors are masked.

   If 0 != (error_mask & 8), then type 8 errors are masked.

   If 0 != (error_mask & 16) and m_error_code_point is a valid unicode

   code point value, then type 16 errors are masked.

   */

   unsigned int m_error_mask;


   /*

   Unicode code point value to use in when masking type 16 errors.

   If 0 == (error_mask & 16), then this parameter is ignored.

   0xFFFD is a popular choice for the m_error_code_point value.

   */

   ON__UINT32 m_error_code_point;

 };


 /*

 Description:

   Test a value to determine if it is a valid unicode code point value.

 Parameters:

   u - [in] value to test

 Returns:

   true: u is a valid unicode code point

   false: u is not a valid unicode code point

 Remarks:

   Valid unicode code points are

   (0 <= u && u <= 0xD7FF) || (0xE000 <= u && u <= 0x10FFFF)

 */

 ON_DECL

 int ON_IsValidUnicodeCodePoint( ON__UINT32 u );


 /*

 Description:

   Convert an integer to its UTF-8 form.

 Parameters:

   u - [in]

     Interger in the CPU's native byte order that can be

     converted to UTF-8 form.

     Valid values are in the interval [0,2147483647].

   sUTF8 - [out]

     sUTF8 is a buffer of 6 ON__UINT8 elements and the UTF-8 form

     is returned in sUTF8[]. The returned value specifies how

     many elements of sUTF8[] are set.

 Returns:

   0: u is too large (>=2^31) to be encode as a UTF-8 string.

      No changes are made to the sUTF8[] values.

   1: the UTF-8 form of u is 1 byte returned in sUTF8[0].

   2: the UTF-8 form of u is 2 byts returned in sUTF8[0],sUTF8[1].

   3: the UTF-8 form of u is 3 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2].

   4: the UTF-8 form of u is 4 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3].

   5: the UTF-8 form of u is 5 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3],sUTF8[4].

   6: the UTF-8 form of u is 6 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3],sUTF8[4],sUTF8[5].

   For return values requiring less than 6 bytes, no changes

   are made to the unused bytes in sUTF8[].

 Remarks:

   Any integer in the range 0 to 2^31 - 1 can be encoded as a UTF-8 string.

   When a unicode string is being encoded take steps to ensure that

   u is a valid unicode code point value.  The function ON_IsValidUnicodeCodePoint()

   can be used to determine if u is a valid unicode code point value.

 */

 ON_DECL

 int ON_EncodeUTF8( ON__UINT32 u, ON__UINT8 sUTF8[6] );


 /*

 Description:

   Decode a UTF-8 encode string to get a single unicode code point.

 Parameters:

   sUTF8 - [in]

     UTF-8 string to convert.


   sUTF8_count - [in]

     number of ON__UINT8 elements in sUTF8[].


    e - [in/out]

     If e is null, errors are not masked and parsing is performed

     to the point where the first error occurs.

     If e is not null, all errors are reported by setting the appropriate

     e->m_error_status bits and errors are handled as described in the

     definition of the ON_UnicodeErrorParameters struct.


   unicode_code_point - [out]

     The unicode_code_point pointer must not be null.

     If a nonzero value is returned, then *unicode_code_point is

     a valid unicode code point value.

 Returns:

   Number of elements of sUTF8 that were parsed.

   0 indicates failure.

 */

 ON_DECL

 int ON_DecodeUTF8(

     const ON__UINT8* sUTF8,

     int sUTF8_count,

     struct ON_UnicodeErrorParameters* e,

     ON__UINT32* unicode_code_point

     );


 /*

 Description:

   Convert a 4 byte unicode code point value to its UTF-16 form.

 Parameters:

   unicode_code_point - [in]

     4 byte unicode code point value in the CPU's native byte order.

     Valid values are in the interval [0,0xD7FF] or the

     interval [0xE000,0x10FFFF].

   sUTF16 - [out]

     sUTF16 is buffer of 2 ON__UINT16 elements. If the UTF-16 form

     is a single value, it is returned in sUTF16[0]. If the UTF-16

     is a surrogate pair, the first code unit (high surrogate)

     is returned sUTF16[0] and the second unit (low surrogate) is

     returned in sUTF16[1].  The returned values are in

     the CPU's native byte order.

 Returns:

   0: u is not a valid Unicode code point. No changes are

      made to the w[] values.

   1: u is a valie Unicode code point with a UTF-16 form

      consisting of the single value returned in w[0].

   2: u is a valid Unicode code point with a UTF-16 form

      consisting of a surrogate pair returned in w[0] and w[1].

 */

 ON_DECL

 int ON_EncodeUTF16( ON__UINT32 unicode_code_point, ON__UINT16 sUTF16[2] );


 /*

 Description:

   Decode a UTF-16 string to get a single unicode code point.

 Parameters:

   sUTF16 - [in]

     UTF-16 string to convert.


   sUTF16_count - [in]

     number of ON__UINT16 elements in sUTF16[].


   e - [in/out]

     If e is null, errors are not masked and parsing is performed

     to the point where the first error occurs.

     If e is not null, all errors are reported by setting the appropriate

     e->m_error_status bits and errors are handled as described in the

     definition of the ON_UnicodeErrorParameters struct.


   unicode_code_point - [out]

     The unicode_code_point pointer must not be null.

     If a nonzero value is returned, then *unicode_code_point is

     a valid unicode code point value in the CPU's native byte order.

 Returns:

   Number of elements of sUTF16 that were parsed.

   0 indicates failure.

 */

 ON_DECL

 int ON_DecodeUTF16(

     const ON__UINT16* sUTF16,

     int sUTF16_count,

     struct ON_UnicodeErrorParameters* e,

     ON__UINT32* unicode_code_point

     );


 /*

 Description:

   Decode a UTF-16 encode string whose elements have byte order

   opposite the native CPU's to get a single unicode code point.

 Parameters:

   sUTF16 - [in]

     UTF-16 string to convert with byte order opposite the

     CPU's native byte order.


   sUTF16_count - [in]

     number of ON__UINT16 elements in sUTF16[].


   e - [in/out]

     If e is null, errors are not masked and parsing is performed

     to the point where the first error occurs.

     If e is not null, all errors are reported by setting the appropriate

     e->m_error_status bits and errors are handled as described in the

     definition of the ON_UnicodeErrorParameters struct.


   unicode_code_point - [out]

     The unicode_code_point pointer must not be null.

     If a nonzero value is returned, then *unicode_code_point is

     a valid unicode code point value in the CPU's native byte order.

 Returns:

   Number of elements of sUTF16 that were parsed.

   0 indicates failure.

 */

 ON_DECL

 int ON_DecodeSwapByteUTF16(

     const ON__UINT16* sUTF16,

     int sUTF16_count,

     struct ON_UnicodeErrorParameters* e,

     ON__UINT32* unicode_code_point

     );


 /*

 Description:

   Convert a unicode string from a UTF-8 encoded ON__UINT8 array

   into a UTF-16 encoded ON__UINT16 array.


 Parameters:

   sUTF8 - [in]

     UTF-8 string to convert.


   sUTF8_count - [in]

     If sUTF8_count >= 0, then it specifies the number of

     ON__UINT8 elements in sUTF8[] to convert.


     If sUTF8_count == -1, then sUTF8 must be a null terminated

     string and all the elements up to the first null element are

     converted.


   sUTF16 - [out]

     If sUTF16 is not null and sUTF16_count > 0, then the UTF-16

     encoded string is returned in this buffer. If there is room

     for the null terminator, the converted string will be null

     terminated. The null terminator is never included in the count

     of returned by this function. The converted string is in the

     CPU's native byte order. No byte order mark is prepended.


   sUTF16_count - [in]

     If sUTF16_count > 0, then it specifies the number of available

     ON__UINT16 elements in the sUTF16[] buffer.


     If sUTF16_count == 0, then the sUTF16 parameter is ignored.


   error_status - [out]

     If error_status is not null, then bits of *error_status are

     set to indicate the success or failure of the conversion.

     When the error_mask parameter is used to used to mask some

     conversion errors, multiple bits may be set.

        0: Successful conversion with no errors.

        1: Invalid input parameters. This error cannot be masked.

        2: The sUTF16 output buffer was not large enough to hold

           the converted string. This error cannot be masked.

        4: The values of two UTF-8 encoding sequences formed a valid

           UTF-16 surrogate pair. This error can be masked.  If the

           error is masked, then the surrogate pair is added

           to the UTF-16 output string and parsing continues.

        8: An overlong UTF-8 encoding sequence was encountered.

           The value of the overlong sequence was a valid unicode

           code point. This error can be masked. If the error is masked,

           then the unicode code point is encoded and added to the

           UTF-16 output string and parsing continues.

       16: An illegal UTF-8 encoding sequence occured or an invalid

           unicode code point value resulted from decoding a

           UTF-8 sequence. This error can be masked. If the error is

           masked and error_code_point is a valid unicode code point,

           then its UTF-16 encoding is added to the UTF-16 output

           string and parsing continues.


   error_mask - [in]

     If 0 != (error_mask & 4), then type 4 errors are masked.

     If 0 != (error_mask & 8), then type 8 errors are masked.

     If 0 != (error_mask & 16) and error_code_point is a valid unicode

     code point value, then type 16 errors are masked.


   error_code_point - [in]

     Unicode code point value to use in when masking type 16 errors.

     If 0 == (error_mask & 16), then this parameter is ignored.

     0xFFFD is a popular choice for the error_code_point value.


   sNextUTF8 - [out]

     If sNextUTF8 is not null, then *sNextUTF8 points to the first

     element in the input sUTF8[] buffer that was not converted.


     If an error occurs and is not masked, then *sNextUTF8 points to

     the element of sUTF8[] where the conversion failed.  If no errors

     occur or all errors are masked, then *sNextUTF8 points to

     sUTF8 + sUTF8_count.


 Returns:

   If sUTF16_count > 0, the return value is the number of ON__UINT16

   elements written to sUTF16[].  When the return value < sUTF16_count,

   a null terminator is written to sUTF16[return value].


   If sUTF16_count == 0, the return value is the minimum number of

   ON__UINT16 elements that are needed to hold the converted string.

   The return value does not include room for a null terminator.

   Increment the return value by one if you want to have an element

   to use for a null terminator.

 */

 ON_DECL

 int ON_ConvertUTF8ToUTF16(

     const ON__UINT8* sUTF8,

     int sUTF8_count,

     ON__UINT16* sUTF16,

     int sUTF16_count,

     unsigned int* error_status,

     unsigned int error_mask,

     ON__UINT32 error_code_point,

     const ON__UINT8** sNextUTF8

     );


 /*

 Description:

   Convert a unicode string from a UTF-8 encoded ON__UINT8 array

   into a UTF-32 encoded ON__UINT32 array.


 Parameters:

   sUTF8 - [in]

     UTF-8 string to convert.


   sUTF8_count - [in]

     If sUTF8_count >= 0, then it specifies the number of

     ON__UINT8 elements in sUTF8[] to convert.


     If sUTF8_count == -1, then sUTF8 must be a null terminated

     string and all the elements up to the first null element are

     converted.


   sUTF32 - [out]

     If sUTF32 is not null and sUTF32_count > 0, then the UTF-32

     encoded string is returned in this buffer. If there is room

     for the null terminator, the converted string will be null

     terminated. The null terminator is never included in the count

     of returned by this function. The converted string is in the

     CPU's native byte order. No byte order mark is prepended.


   sUTF32_count - [in]

     If sUTF32_count > 0, then it specifies the number of available

     ON__UINT32 elements in the sUTF32[] buffer.


     If sUTF32_count == 0, then the sUTF32 parameter is ignored.


   error_status - [out]

     If error_status is not null, then bits of *error_status are

     set to indicate the success or failure of the conversion.

     When the error_mask parameter is used to used to mask some

     conversion errors, multiple bits may be set.

        0: Successful conversion with no errors.

        1: Invalid input parameters. This error cannot be masked.

        2: The sUTF32 output buffer was not large enough to hold

           the converted string. This error cannot be masked.

        4: The values of two UTF-8 encoding sequences formed a valid

           UTF-16 surrogate pair. This error can be masked.  If the

           error is masked, then the surrogate pair is decoded,

           the code point value is added to the UTF-32 output

           string and parsing continues.

        8: An overlong UTF-8 encoding sequence was encountered.

           The value of the overlong sequence was a valid unicode

           code point. This error can be masked. If the error is masked,

           then the unicode code point is added to the UTF-32

           output string and parsing continues.

       16: An illegal UTF-8 encoding sequence occured or an invalid

           unicode code point value resulted from decoding a

           UTF-8 sequence. This error can be masked. If the error is

           masked and error_code_point is a valid unicode code point,

           then its value is added to the UTF-32 output string and

           parsing continues.


   error_mask - [in]

     If 0 != (error_mask & 4), then type 4 errors are masked.

     If 0 != (error_mask & 8), then type 8 errors are masked.

     If 0 != (error_mask & 16) and error_code_point is a valid unicode

     code point value, then type 16 errors are masked.


   error_code_point - [in]

     Unicode code point value to use in when masking type 16 errors.

     If 0 == (error_mask & 16), then this parameter is ignored.

     0xFFFD is a popular choice for the error_code_point value.


   sNextUTF8 - [out]

     If sNextUTF8 is not null, then *sNextUTF8 points to the first

     element in the input sUTF8[] buffer that was not converted.


     If an error occurs and is not masked, then *sNextUTF8 points to

     the element of sUTF8[] where the conversion failed.  If no errors

     occur or all errors are masked, then *sNextUTF8 points to

     sUTF8 + sUTF8_count.


 Returns:

   If sUTF32_count > 0, the return value is the number of ON__UINT32

   elements written to sUTF32[].  When the return value < sUTF32_count,

   a null terminator is written to sUTF32[return value].


   If sUTF32_count == 0, the return value is the minimum number of

   ON__UINT32 elements that are needed to hold the converted string.

   The return value does not include room for a null terminator.

   Increment the return value by one if you want to have an element

   to use for a null terminator.

 */

 ON_DECL

 int ON_ConvertUTF8ToUTF32(

     const ON__UINT8* sUTF8,

     int sUTF8_count,

     ON__UINT32* sUTF32,

     int sUTF32_count,

     unsigned int* error_status,

     unsigned int error_mask,

     ON__UINT32 error_code_point,

     const ON__UINT8** sNextUTF8

     );


 /*

 Description:

   Convert a unicode string from a UTF-16 encoded ON__UINT16 array

   into a UTF-8 encoded ON__UINT8 array.


 Parameters:

   bTestByteOrder - [in]

     If bTestByteOrder is true and the first element of sUTF16[]

     is 0xFEFF, then this element is ignored.


     If bTestByteOrder is true and the first element of sUTF16[]

     is 0xFFFE, then this element is ignored and the subsequent

     elements of sUTF16[] have their bytes swapped before the

     conversion is calculated.


     In all other cases the first element of sUTF16[] is

     converted and no byte swapping is performed.


   sUTF16 - [in]

     UTF-16 string to convert.


     If bTestByteOrder is true and the first element of sUTF16[]

     is 0xFEFF, then this element is skipped and it is assumed

     that sUTF16[] is in the CPU's native byte order.


     If bTestByteOrder is true and the first element of sUTF16[]

     is 0xFFFE, then this element is skipped and it is assumed

     that sUTF16[] is not in the CPU's native byte order and bytes

     are swapped before characters are converted.


     If bTestByteOrder is false or the first character of sUTF16[]

     is neither 0xFEFF nor 0xFFFE, then the sUTF16 string must match

     the CPU's byte order.


   sUTF16_count - [in]

     If sUTF16_count >= 0, then it specifies the number of

     ON__UINT16 elements in sUTF16[] to convert.


     If sUTF16_count == -1, then sUTF16 must be a null terminated

     string and all the elements up to the first null element are

     converted.


   sUTF8 - [out]

     If sUTF8 is not null and sUTF8_count > 0, then the UTF-8

     encoded string is returned in this buffer. If there is room

     for the null terminator, the converted string will be null

     terminated. The null terminator is never included in the count

     of returned by this function. The converted string is in the

     CPU's native byte order. No byte order mark is prepended.


   sUTF8_count - [in]

     If sUTF8_count > 0, then it specifies the number of available

     ON__UINT8 elements in the sUTF8[] buffer.


     If sUTF8_count == 0, then the sUTF8 parameter is ignored.


   error_status - [out]

     If error_status is not null, then bits of *error_status are

     set to indicate the success or failure of the conversion.

     When the error_mask parameter is used to used to mask some

     conversion errors, multiple bits may be set.

        0: Successful conversion with no errors.

        1: Invalid input parameters. This error cannot be masked.

        2: The sUTF8 output buffer was not large enough to hold

           the converted string. This error cannot be masked.

       16: An illegal UTF-16 encoding sequence occured or an invalid

           unicode code point value resulted from decoding a

           UTF-16 sequence. This error can be masked. If the error is

           masked and error_code_point is a valid unicode code point,

           then its UTF-8 encoding is added to the UTF-8 output

           string and parsing continues.


   error_mask - [in]

     If 0 != (error_mask & 16) and error_code_point is a valid unicode

     code point value, then type 16 errors are masked.


   error_code_point - [in]

     Unicode code point value to use in when masking type 16 errors.

     If 0 == (error_mask & 16), then this parameter is ignored.

     0xFFFD is a popular choice for the error_code_point value.


   sNextUTF16 - [out]

     If sNextUTF16 is not null, then *sNextUTF16 points to the first

     element in the input sUTF16[] buffer that was not converted.


     If an error occurs and is not masked, then *sNextUTF16 points to

     the element of sUTF16[] where the conversion failed.  If no errors

     occur or all errors are masked, then *sNextUTF16 points to

     sUTF16 + sUTF16_count.


   If sUTF8_count > 0, the return value is the number of ON__UINT8

   elements written to sUTF8[].  When the return value < sUTF8_count,

   a null terminator is written to sUTF8[return value].


   If sUTF8_count == 0, the return value is the minimum number of

   ON__UINT8 elements that are needed to hold the converted string.

   The return value does not include room for a null terminator.

   Increment the return value by one if you want to have an element

   to use for a null terminator.

 */

 ON_DECL

 int ON_ConvertUTF16ToUTF8(

     int bTestByteOrder,

     const ON__UINT16* sUTF16,

     int sUTF16_count,

     ON__UINT8* sUTF8,

     int sUTF8_count,

     unsigned int* error_status,

     unsigned int error_mask,

     ON__UINT32 error_code_point,

     const ON__UINT16** sNextUTF16

     );


 /*

 Description:

   Convert a unicode string from a UTF-16 encoded ON__UINT16 array

   into a UTF-32 encoded ON__UINT32 array.


 Parameters:

   bTestByteOrder - [in]

     If bTestByteOrder is true and the first element of sUTF16[]

     is 0xFEFF, then this element is ignored.


     If bTestByteOrder is true and the first element of sUTF16[]

     is 0xFFFE, then this element is ignored and the subsequent

     elements of sUTF16[] have their bytes swapped before the

     conversion is calculated.


     In all other cases the first element of sUTF16[] is

     converted and no byte swapping is performed.


   sUTF16 - [in]

     UTF-16 string to convert.


     If bTestByteOrder is true and the first element of sUTF16[]

     is 0xFEFF, then this element is skipped and it is assumed

     that sUTF16[] is in the CPU's native byte order.


     If bTestByteOrder is true and the first element of sUTF16[]

     is 0xFFFE, then this element is skipped and it is assumed

     that sUTF16[] is not in the CPU's native byte order and bytes

     are swapped before characters are converted.


     If bTestByteOrder is false or the first character of sUTF16[]

     is neither 0xFEFF nor 0xFFFE, then the sUTF16 string must match

     the CPU's byte order.


   sUTF16_count - [in]

     If sUTF16_count >= 0, then it specifies the number of

     ON__UINT16 elements in sUTF16[] to convert.


     If sUTF16_count == -1, then sUTF16 must be a null terminated

     string and all the elements up to the first null element are

     converted.


   sUTF32 - [out]

     If sUTF32 is not null and sUTF32_count > 0, then the UTF-32

     encoded string is returned in this buffer. If there is room

     for the null terminator, the converted string will be null

     terminated. The null terminator is never included in the count

     of returned by this function. The converted string is in the

     CPU's native byte order. No byte order mark is prepended.


   sUTF32_count - [in]

     If sUTF32_count > 0, then it specifies the number of available

     ON__UINT32 elements in the sUTF32[] buffer.


     If sUTF32_count == 0, then the sUTF32 parameter is ignored.


   error_status - [out]

     If error_status is not null, then bits of *error_status are

     set to indicate the success or failure of the conversion.

     When the error_mask parameter is used to used to mask some

     conversion errors, multiple bits may be set.

        0: Successful conversion with no errors.

        1: Invalid input parameters. This error cannot be masked.

        2: The sUTF32 output buffer was not large enough to hold

           the converted string. This error cannot be masked.

       16: An illegal UTF-16 encoding sequence occured or an invalid

           unicode code point value resulted from decoding a

           UTF-16 sequence. This error can be masked. If the error is

           masked and error_code_point is a valid unicode code point,

           then its value is added to the UTF-32 output string and

           parsing continues.


   error_mask - [in]

     If 0 != (error_mask & 16) and error_code_point is a valid unicode

     code point value, then type 16 errors are masked.


   error_code_point - [in]

     Unicode code point value to use in when masking type 16 errors.

     If 0 == (error_mask & 16), then this parameter is ignored.

     0xFFFD is a popular choice for the error_code_point value.


   sNextUTF16 - [out]

     If sNextUTF16 is not null, then *sNextUTF16 points to the first

     element in the input sUTF16[] buffer that was not converted.


     If an error occurs and is not masked, then *sNextUTF16 points to

     the element of sUTF16[] where the conversion failed.  If no errors

     occur or all errors are masked, then *sNextUTF16 points to

     sUTF16 + sUTF16_count.


 Returns:

   If sUTF32_count > 0, the return value is the number of ON__UINT32

   elements written to sUTF32[].  When the return value < sUTF32_count,

   a null terminator is written to sUTF32[return value].


   If sUTF32_count == 0, the return value is the minimum number of

   ON__UINT32 elements that are needed to hold the converted string.

   The return value does not include room for a null terminator.

   Increment the return value by one if you want to have an element

   to use for a null terminator.

 */

 ON_DECL

 int ON_ConvertUTF16ToUTF32(

     int bTestByteOrder,

     const ON__UINT16* sUTF16,

     int sUTF16_count,

     unsigned int* sUTF32,

     int sUTF32_count,

     unsigned int* error_status,

     unsigned int error_mask,

     ON__UINT32 error_code_point,

     const ON__UINT16** sNextUTF16

     );


 /*

 Description:

   Convert a unicode string from a UTF-32 encoded ON__UINT32 array

   into a UTF-8 encoded ON__UINT8 array.


 Parameters:

   bTestByteOrder - [in]

     If bTestByteOrder is true and the first element of sUTF32[]

     is 0x0000FEFF, then this element is ignored.


     If bTestByteOrder is true and the first element of sUTF32[]

     is 0xFFFE0000, then this element is ignored and the subsequent

     elements of sUTF32[] have their bytes swapped before the

     conversion is calculated.


     In all other cases the first element of sUTF32[] is

     converted and no byte swapping is performed.


   sUTF32 - [in]

     UTF-32 string to convert.


     If bTestByteOrder is true and the first element of sUTF32[]

     is 0x0000FEFF, then this element is skipped and it is assumed

     that sUTF32[] is in the CPU's native byte order.


     If bTestByteOrder is true and the first element of sUTF32[]

     is 0xFFFE0000, then this element is skipped and it is assumed

     that sUTF32[] is not in the CPU's native byte order and bytes

     are swapped before characters are converted.


     If bTestByteOrder is false or the first character of sUTF32[]

     is neither 0x0000FEFF nor 0xFFFE0000, then the sUTF32 string

     must match the CPU's byte order.


   sUTF32_count - [in]

     If sUTF32_count >= 0, then it specifies the number of

     ON__UINT32 elements in sUTF32[] to convert.


     If sUTF32_count == -1, then sUTF32 must be a null terminated

     string and all the elements up to the first null element are

     converted.


   sUTF8 - [out]

     If sUTF8 is not null and sUTF8_count > 0, then the UTF-8

     encoded string is returned in this buffer. If there is room

     for the null terminator, the converted string will be null

     terminated. The null terminator is never included in the count

     of returned by this function. The converted string is in the

     CPU's native byte order. No byte order mark is prepended.


   sUTF8_count - [in]

     If sUTF8_count > 0, then it specifies the number of available

     ON__UINT8 elements in the sUTF8[] buffer.


     If sUTF8_count == 0, then the sUTF8 parameter is ignored.


   error_status - [out]

     If error_status is not null, then bits of *error_status are

     set to indicate the success or failure of the conversion.

     When the error_mask parameter is used to used to mask some

     conversion errors, multiple bits may be set.

        0: Successful conversion with no errors.

        1: Invalid input parameters. This error cannot be masked.

        2: The sUTF8 output buffer was not large enough to hold

           the converted string. This error cannot be masked.

        4: The values of two UTF-32 elements form a valid

           UTF-16 surrogate pair. This error can be masked. If the

           error is masked, then the surrogate pair is converted

           to a valid unicode code point, its UTF-8 encoding is

           added to the UTF-8 output string and parsing continues.

       16: An invalid unicode code point occured in sUTF32[].

           This error can be masked. If the error is masked and

           error_code_point is a valid unicode code point,

           then its UTF-8 encoding is added to the UTF-8 output

           string and parsing continues.


   error_mask - [in]

     If 0 != (error_mask & 4), then type 4 errors are masked.

     If 0 != (error_mask & 16) and error_code_point is a valid unicode

     code point value, then type 16 errors are masked.


   error_code_point - [in]

     Unicode code point value to use in when masking type 16 errors.

     If 0 == (error_mask & 16), then this parameter is ignored.

     0xFFFD is a popular choice for the error_code_point value.


   sNextUTF32 - [out]

     If sNextUTF32 is not null, then *sNextUTF32 points to the first

     element in the input sUTF32[] buffer that was not converted.


     If an error occurs and is not masked, then *sNextUTF32 points to

     the element of sUTF32[] where the conversion failed.  If no errors

     occur or all errors are masked, then *sNextUTF32 points to

     sUTF32 + sUTF32_count.


 Returns:

   If sUTF8_count > 0, the return value is the number of ON__UINT8

   elements written to sUTF8[].  When the return value < sUTF8_count,

   a null terminator is written to sUTF8[return value].


   If sUTF8_count == 0, the return value is the minimum number of

   ON__UINT8 elements that are needed to hold the converted string.

   The return value does not include room for a null terminator.

   Increment the return value by one if you want to have an element

   to use for a null terminator.

 */

 ON_DECL

 int ON_ConvertUTF32ToUTF8(

     int bTestByteOrder,

     const ON__UINT32* sUTF32,

     int sUTF32_count,

     ON__UINT8* sUTF8,

     int sUTF8_count,

     unsigned int* error_status,

     unsigned int error_mask,

     ON__UINT32 error_code_point,

     const ON__UINT32** sNextUTF32

     );


 /*

 Description:

   Convert a unicode string from a UTF-32 encoded ON__UINT32 array

   into a UTF-16 encoded ON__UINT16 array.


 Parameters:

   bTestByteOrder - [in]

     If bTestByteOrder is true and the first element of sUTF32[]

     is 0x0000FEFF, then this element is ignored.


     If bTestByteOrder is true and the first element of sUTF32[]

     is 0xFFFE0000, then this element is ignored and the subsequent

     elements of sUTF32[] have their bytes swapped before the

     conversion is calculated.


     In all other cases the first element of sUTF32[] is

     converted and no byte swapping is performed.


   sUTF32 - [in]

     UTF-32 string to convert.


     If bTestByteOrder is true and the first element of sUTF32[]

     is 0x0000FEFF, then this element is skipped and it is assumed

     that sUTF32[] is in the CPU's native byte order.


     If bTestByteOrder is true and the first element of sUTF32[]

     is 0xFFFE0000, then this element is skipped and it is assumed

     that sUTF32[] is not in the CPU's native byte order and bytes

     are swapped before characters are converted.


     If bTestByteOrder is false or the first character of sUTF32[]

     is neither 0x0000FEFF nor 0xFFFE0000, then the sUTF32 string

     must match the CPU's byte order.


   sUTF32_count - [in]

     If sUTF32_count >= 0, then it specifies the number of

     ON__UINT32 elements in sUTF32[] to convert.


     If sUTF32_count == -1, then sUTF32 must be a null terminated

     string and all the elements up to the first null element are

     converted.


   sUTF16 - [out]

     If sUTF16 is not null and sUTF16_count > 0, then the UTF-16

     encoded string is returned in this buffer. If there is room

     for the null terminator, the converted string will be null

     terminated. The null terminator is never included in the count

     of returned by this function. The converted string is in the

     CPU's native byte order. No byte order mark is prepended.


   sUTF16_count - [in]

     If sUTF16_count > 0, then it specifies the number of available

     ON__UINT16 elements in the sUTF16[] buffer.


     If sUTF16_count == 0, then the sUTF16 parameter is ignored.


   error_status - [out]

     If error_status is not null, then bits of *error_status are

     set to indicate the success or failure of the conversion.

     When the error_mask parameter is used to used to mask some

     conversion errors, multiple bits may be set.

        0: Successful conversion with no errors.

        1: Invalid input parameters. This error cannot be masked.

        2: The sUTF16 output buffer was not large enough to hold

           the converted string. This error cannot be masked.

        4: The values of two UTF-32 elements form a valid

           UTF-16 surrogate pair. This error can be masked. If the

           error is masked, then the surrogate pair is added to

           the UTF-16 output string and parsing continues.

       16: An invalid unicode code point occured in sUTF32[].

           This error can be masked. If the error is masked and

           error_code_point is a valid unicode code point,

           then its UTF-16 encoding is added to the UTF-16 output

           string and parsing continues.


   error_mask - [in]

     If 0 != (error_mask & 4), then type 4 errors are masked.

     If 0 != (error_mask & 16) and error_code_point is a valid unicode

     code point value, then type 16 errors are masked.


   error_code_point - [in]

     Unicode code point value to use in when masking type 16 errors.

     If 0 == (error_mask & 16), then this parameter is ignored.

     0xFFFD is a popular choice for the error_code_point value.


   sNextUnicode - [out]

     If sNextUnicode is not null, then *sNextUnicode points to the first

     byte in the input sNextUnicode[] buffer that was not converted.


     If an error occurs and is not masked, then this unsigned int

     will be an illegal unicode code point value.


     If an error does not occur, then (*sNextUnicode - sUnicode)

     is the number of values converted.


 Returns:

   If sUTF16_count > 0, the return value is the number of ON__UINT16

   elements written to sUTF16[].  When the return value < sUTF16_count,

   a null terminator is written to sUTF16[return value].


   If sUTF16_count == 0, the return value is the minimum number of

   ON__UINT16 elements that are needed to hold the converted string.

   The return value does not include room for a null terminator.

   Increment the return value by one if you want to have an element

   to use for a null terminator.

 */

 ON_DECL

 int ON_ConvertUTF32ToUTF16(

     int bTestByteOrder,

     const ON__UINT32* sUTF32,

     int sUTF32_count,

     ON__UINT16* sUTF16,

     int sUTF16_count,

     unsigned int* error_status,

     unsigned int error_mask,

     ON__UINT32 error_code_point,

     const ON__UINT32** sNextUTF32

     );


 /*

 Description:

   Convert a wchar_t string using the native platform's most common

   encoding into a unicode string encoded as a UTF-8 char array.


   If 2 = sizeof(wchar_t), then the wchar_t array is assumed to be

   a UTF-16 encoded string. This is the case with current versions

   of Microsoft Windows.


   If 4 = sizeof(wchar)t), then the wchar_t array is assumed to be

   a UTF-32 encoded string. This is the case with current versions

   of Apple OSX.


 Parameters:

   bTestByteOrder - [in]

     If bTestByteOrder is true and the first element of sWideChar[]

     is 0xFEFF, then this element is ignored.


     If bTestByteOrder is true and the first element of sWideChar[]

     is 0xFFFE, then this element is ignored and the subsequent

     elements of sWideChar[] have their bytes swapped before the

     conversion is calculated.


     In all other cases the first element of sWideChar[] is

     converted and no byte swapping is performed.


   sWideChar - [in]

     wchar_t string to convert.


     If bTestByteOrder is true and the first element of sWideChar[]

     is 0xFEFF, then this element is skipped and it is assumed

     that sWideChar[] is in the CPU's native byte order.


     If bTestByteOrder is true and the first element of sWideChar[]

     is 0xFFFE, then this element is skipped and it is assumed

     that sWideChar[] is not in the CPU's native byte order and bytes

     are swapped before characters are converted.


     If bTestByteOrder is false or the first character of sWideChar[]

     is neither 0xFEFF nor 0xFFFE, then the sWideChar string must match

     the CPU's byte order.


   sWideChar_count - [in]

     If sWideChar_count >= 0, then it specifies the number of

     wchar_t elements in sWideChar[] to convert.


     If sWideChar_count == -1, then sWideChar must be a null terminated

     string and all the elements up to the first null element are

     converted.


   sUTF8 - [out]

     If sUTF8 is not null and sUTF8_count > 0, then the UTF-8

     encoded string is returned in this buffer. If there is room

     for the null terminator, the converted string will be null

     terminated. The null terminator is never included in the count

     of returned by this function. The converted string is in the

     CPU's native byte order. No byte order mark is prepended.


   sUTF8_count - [in]

     If sUTF8_count > 0, then it specifies the number of available

     ON__UINT8 elements in the sUTF8[] buffer.


     If sUTF8_count == 0, then the sUTF8 parameter is ignored.


   error_status - [out]

     If error_status is not null, then bits of *error_status are

     set to indicate the success or failure of the conversion.

     When the error_mask parameter is used to used to mask some

     conversion errors, multiple bits may be set.

        0: Successful conversion with no errors.

        1: Invalid input parameters. This error cannot be masked.

        2: The sUTF8 output buffer was not large enough to hold

           the converted string. This error cannot be masked.

       16: An illegal wchar_t encoding sequence occured or an invalid

           unicode code point value resulted from decoding a

           wchar_t sequence. This error can be masked. If the error is

           masked and error_code_point is a valid unicode code point,

           then its UTF-8 encoding is added to the UTF-8 output

           string and parsing continues.


   error_mask - [in]

     If 0 != (error_mask & 16) and error_code_point is a valid unicode

     code point value, then type 16 errors are masked.


   error_code_point - [in]

     Unicode code point value to use in when masking type 16 errors.

     If 0 == (error_mask & 16), then this parameter is ignored.

     0xFFFD is a popular choice for the error_code_point value.


   sNextWideChar - [out]

     If sNextWideChar is not null, then *sNextWideChar points to the first

     element in the input sWideChar[] buffer that was not converted.


     If an error occurs and is not masked, then *sNextWideChar points to

     the element of sWideChar[] where the conversion failed.  If no errors

     occur or all errors are masked, then *sNextWideChar points to

     sWideChar + sWideChar_count.


   If sUTF8_count > 0, the return value is the number of ON__UINT8

   elements written to sUTF8[].  When the return value < sUTF8_count,

   a null terminator is written to sUTF8[return value].


   If sUTF8_count == 0, the return value is the minimum number of

   ON__UINT8 elements that are needed to hold the converted string.

   The return value does not include room for a null terminator.

   Increment the return value by one if you want to have an element

   to use for a null terminator.

 */

 ON_DECL

 int ON_ConvertWideCharToUTF8(

     int bTestByteOrder,

     const wchar_t* sWideChar,

     int sWideChar_count,

     char* sUTF8,

     int sUTF8_count,

     unsigned int* error_status,

     unsigned int error_mask,

     ON__UINT32 error_code_point,

     const wchar_t** sNextWideChar

     );


 /*

 Description:

   Convert a UTF-8 encoded char string to wchar_t string using

   the native platform's most common encoding.


   If 2 = sizeof(wchar_t), then UTF-16 encoding is used for the

   output string. This is the case with current versions of

   Microsoft Windows.


   If 4 = sizeof(wchar_t), then UTF-32 encoding is used for the

   output string. This is the case with current versions of

   Apple OSX.


 Parameters:

   sUTF8 - [in]

     UTF-8 string to convert.


   sUTF8_count - [in]

     If sUTF8_count >= 0, then it specifies the number of

     ON__UINT8 elements in sUTF8[] to convert.


     If sUTF8_count == -1, then sUTF8 must be a null terminated

     string and all the elements up to the first null element are

     converted.


   sWideChar - [out]

     If sWideChar is not null and sWideChar_count > 0, then the

     output string is returned in this buffer. If there is room

     for the null terminator, the converted string will be null

     terminated. The null terminator is never included in the count

     of returned by this function. The converted string is in the

     CPU's native byte order. No byte order mark is prepended.


   sWideChar_count - [in]

     If sWideChar_count > 0, then it specifies the number of available

     wchar_t elements in the sWideChar[] buffer.


     If sWideChar_count == 0, then the sWideChar parameter is ignored.


   error_status - [out]

     If error_status is not null, then bits of *error_status are

     set to indicate the success or failure of the conversion.

     When the error_mask parameter is used to used to mask some

     conversion errors, multiple bits may be set.

        0: Successful conversion with no errors.

        1: Invalid input parameters. This error cannot be masked.

        2: The sWideChar output buffer was not large enough to hold

           the converted string. This error cannot be masked.

        4: The values of two UTF-8 encoding sequences formed a valid

           UTF-16 surrogate pair. This error can be masked.  If the

           error is masked, then the surrogate pair is added

           to the UTF-16 output string and parsing continues.

        8: An overlong UTF-8 encoding sequence was encountered.

           The value of the overlong sequence was a valid unicode

           code point. This error can be masked. If the error is masked,

           then the unicode code point is encoded and added to the

           UTF-16 output string and parsing continues.

       16: An illegal UTF-8 encoding sequence occured or an invalid

           unicode code point value resulted from decoding a

           UTF-8 sequence. This error can be masked. If the error is

           masked and error_code_point is a valid unicode code point,

           then its encoding is added to the output string and parsing

           continues.


   error_mask - [in]

     If 0 != (error_mask & 4), then type 4 errors are masked.

     If 0 != (error_mask & 8), then type 8 errors are masked.

     If 0 != (error_mask & 16) and error_code_point is a valid unicode

     code point value, then type 16 errors are masked.


   error_code_point - [in]

     Unicode code point value to use in when masking type 16 errors.

     If 0 == (error_mask & 16), then this parameter is ignored.

     0xFFFD is a popular choice for the error_code_point value.


   sNextUTF8 - [out]

     If sNextUTF8 is not null, then *sNextUTF8 points to the first

     element in the input sUTF8[] buffer that was not converted.


     If an error occurs and is not masked, then *sNextUTF8 points to

     the element of sUTF8[] where the conversion failed.  If no errors

     occur or all errors are masked, then *sNextUTF8 points to

     sUTF8 + sUTF8_count.


 Returns:

   If sWideChar_count > 0, the return value is the number of wchar_t

   elements written to sWideChar[].  When the return value < sWideChar_count,

   a null terminator is written to sWideChar[return value].


   If sWideChar_count == 0, the return value is the minimum number of

   wchar_t elements that are needed to hold the converted string.

   The return value does not include room for a null terminator.

   Increment the return value by one if you want to have an element

   to use for a null terminator.

 */

 ON_DECL

 int ON_ConvertUTF8ToWideChar(

     const char* sUTF8,

     int sUTF8_count,

     wchar_t* sWideChar,

     int sWideChar_count,

     unsigned int* error_status,

     unsigned int error_mask,

     ON__UINT32 error_code_point,

     const char** sNextUTF8

     );


 ON_END_EXTERNC


 #endif

ON_UnicodeErrorParameters
Definition: opennurbs_unicode.h:23

ON_UnicodeErrorParameters::m_error_status
unsigned int m_error_status
Definition: opennurbs_unicode.h:62

ON_UnicodeErrorParameters::m_error_mask
unsigned int m_error_mask
Definition: opennurbs_unicode.h:70

ON_UnicodeErrorParameters::m_error_code_point
ON__UINT32 m_error_code_point
Definition: opennurbs_unicode.h:77