Point Cloud Library (PCL)  1.14.1-dev
opennurbs_unicode.h
1 /* $NoKeywords: $ */
2 /*
3 //
4 // Copyright (c) 1993-2012 Robert McNeel & Associates. All rights reserved.
5 // OpenNURBS, Rhinoceros, and Rhino3D are registered trademarks of Robert
6 // McNeel & Associates.
7 //
8 // THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
9 // ALL IMPLIED WARRANTIES OF FITNESS FOR ANY PARTICULAR PURPOSE AND OF
10 // MERCHANTABILITY ARE HEREBY DISCLAIMED.
11 //
12 // For complete openNURBS copyright information see <http://www.opennurbs.org>.
13 //
14 ////////////////////////////////////////////////////////////////
15 */
16 
17 #if !defined(OPENNURBS_UNICODE_INC_)
18 #define OPENNURBS_UNICODE_INC_
19 
20 ON_BEGIN_EXTERNC
21 
23 {
24  /*
25  If an error occurs, then bits of error_status are
26  set to indicate what type of error occured.
27 
28  Error types:
29  1: The input parameters were invalid.
30  This error cannot be masked.
31 
32  2: The output buffer was not large enough to hold the converted
33  string. As much conversion as possible is performed in this
34  case and the error cannot be masked.
35 
36  4: When parsing a UTF-8 or UTF-32 string, the values of two
37  consecutive encoding sequences formed a valid UTF-16
38  surrogate pair.
39 
40  This error is masked if 0 != (4 & m_error_mask).
41  If the error is masked, then the surrogate pair is
42  decoded, the value of the resulting unicode code point
43  is used, and parsing continues.
44 
45  8: An overlong UTF-8 encoding sequence was encountered and
46  the value of the overlong sUTF-8 equence was a valid unicode
47  code point.
48 
49  This error is masked if 0 != (8 & m_error_mask).
50  If the error is masked, then the unicode code point is
51  used and parsing continues.
52 
53  16: An illegal UTF-8 encoding sequence occured or an invalid
54  unicode code point value resulted from decoding a
55  UTF-8 sequence.
56 
57  This error is masked if 0 != (16 & m_error_mask).
58  If the error is masked and the value of m_error_code_point is
59  a valid unicode code point, then m_error_code_point is used
60  and parsing continues.
61  */
62  unsigned int m_error_status;
63 
64  /*
65  If 0 != (error_mask & 4), then type 4 errors are masked.
66  If 0 != (error_mask & 8), then type 8 errors are masked.
67  If 0 != (error_mask & 16) and m_error_code_point is a valid unicode
68  code point value, then type 16 errors are masked.
69  */
70  unsigned int m_error_mask;
71 
72  /*
73  Unicode code point value to use in when masking type 16 errors.
74  If 0 == (error_mask & 16), then this parameter is ignored.
75  0xFFFD is a popular choice for the m_error_code_point value.
76  */
77  ON__UINT32 m_error_code_point;
78 };
79 
80 
81 /*
82 Description:
83  Test a value to determine if it is a valid unicode code point value.
84 Parameters:
85  u - [in] value to test
86 Returns:
87  true: u is a valid unicode code point
88  false: u is not a valid unicode code point
89 Remarks:
90  Valid unicode code points are
91  (0 <= u && u <= 0xD7FF) || (0xE000 <= u && u <= 0x10FFFF)
92 */
93 ON_DECL
94 int ON_IsValidUnicodeCodePoint( ON__UINT32 u );
95 
96 /*
97 Description:
98  Convert an integer to its UTF-8 form.
99 Parameters:
100  u - [in]
101  Interger in the CPU's native byte order that can be
102  converted to UTF-8 form.
103  Valid values are in the interval [0,2147483647].
104  sUTF8 - [out]
105  sUTF8 is a buffer of 6 ON__UINT8 elements and the UTF-8 form
106  is returned in sUTF8[]. The returned value specifies how
107  many elements of sUTF8[] are set.
108 Returns:
109  0: u is too large (>=2^31) to be encode as a UTF-8 string.
110  No changes are made to the sUTF8[] values.
111  1: the UTF-8 form of u is 1 byte returned in sUTF8[0].
112  2: the UTF-8 form of u is 2 byts returned in sUTF8[0],sUTF8[1].
113  3: the UTF-8 form of u is 3 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2].
114  4: the UTF-8 form of u is 4 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3].
115  5: the UTF-8 form of u is 5 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3],sUTF8[4].
116  6: the UTF-8 form of u is 6 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3],sUTF8[4],sUTF8[5].
117  For return values requiring less than 6 bytes, no changes
118  are made to the unused bytes in sUTF8[].
119 Remarks:
120  Any integer in the range 0 to 2^31 - 1 can be encoded as a UTF-8 string.
121  When a unicode string is being encoded take steps to ensure that
122  u is a valid unicode code point value. The function ON_IsValidUnicodeCodePoint()
123  can be used to determine if u is a valid unicode code point value.
124 */
125 ON_DECL
126 int ON_EncodeUTF8( ON__UINT32 u, ON__UINT8 sUTF8[6] );
127 
128 /*
129 Description:
130  Decode a UTF-8 encode string to get a single unicode code point.
131 Parameters:
132  sUTF8 - [in]
133  UTF-8 string to convert.
134 
135  sUTF8_count - [in]
136  number of ON__UINT8 elements in sUTF8[].
137 
138  e - [in/out]
139  If e is null, errors are not masked and parsing is performed
140  to the point where the first error occurs.
141  If e is not null, all errors are reported by setting the appropriate
142  e->m_error_status bits and errors are handled as described in the
143  definition of the ON_UnicodeErrorParameters struct.
144 
145  unicode_code_point - [out]
146  The unicode_code_point pointer must not be null.
147  If a nonzero value is returned, then *unicode_code_point is
148  a valid unicode code point value.
149 Returns:
150  Number of elements of sUTF8 that were parsed.
151  0 indicates failure.
152 */
153 ON_DECL
154 int ON_DecodeUTF8(
155  const ON__UINT8* sUTF8,
156  int sUTF8_count,
157  struct ON_UnicodeErrorParameters* e,
158  ON__UINT32* unicode_code_point
159  );
160 
161 /*
162 Description:
163  Convert a 4 byte unicode code point value to its UTF-16 form.
164 Parameters:
165  unicode_code_point - [in]
166  4 byte unicode code point value in the CPU's native byte order.
167  Valid values are in the interval [0,0xD7FF] or the
168  interval [0xE000,0x10FFFF].
169  sUTF16 - [out]
170  sUTF16 is buffer of 2 ON__UINT16 elements. If the UTF-16 form
171  is a single value, it is returned in sUTF16[0]. If the UTF-16
172  is a surrogate pair, the first code unit (high surrogate)
173  is returned sUTF16[0] and the second unit (low surrogate) is
174  returned in sUTF16[1]. The returned values are in
175  the CPU's native byte order.
176 Returns:
177  0: u is not a valid Unicode code point. No changes are
178  made to the w[] values.
179  1: u is a valie Unicode code point with a UTF-16 form
180  consisting of the single value returned in w[0].
181  2: u is a valid Unicode code point with a UTF-16 form
182  consisting of a surrogate pair returned in w[0] and w[1].
183 */
184 ON_DECL
185 int ON_EncodeUTF16( ON__UINT32 unicode_code_point, ON__UINT16 sUTF16[2] );
186 
187 /*
188 Description:
189  Decode a UTF-16 string to get a single unicode code point.
190 Parameters:
191  sUTF16 - [in]
192  UTF-16 string to convert.
193 
194  sUTF16_count - [in]
195  number of ON__UINT16 elements in sUTF16[].
196 
197  e - [in/out]
198  If e is null, errors are not masked and parsing is performed
199  to the point where the first error occurs.
200  If e is not null, all errors are reported by setting the appropriate
201  e->m_error_status bits and errors are handled as described in the
202  definition of the ON_UnicodeErrorParameters struct.
203 
204  unicode_code_point - [out]
205  The unicode_code_point pointer must not be null.
206  If a nonzero value is returned, then *unicode_code_point is
207  a valid unicode code point value in the CPU's native byte order.
208 Returns:
209  Number of elements of sUTF16 that were parsed.
210  0 indicates failure.
211 */
212 ON_DECL
213 int ON_DecodeUTF16(
214  const ON__UINT16* sUTF16,
215  int sUTF16_count,
216  struct ON_UnicodeErrorParameters* e,
217  ON__UINT32* unicode_code_point
218  );
219 
220 /*
221 Description:
222  Decode a UTF-16 encode string whose elements have byte order
223  opposite the native CPU's to get a single unicode code point.
224 Parameters:
225  sUTF16 - [in]
226  UTF-16 string to convert with byte order opposite the
227  CPU's native byte order.
228 
229  sUTF16_count - [in]
230  number of ON__UINT16 elements in sUTF16[].
231 
232  e - [in/out]
233  If e is null, errors are not masked and parsing is performed
234  to the point where the first error occurs.
235  If e is not null, all errors are reported by setting the appropriate
236  e->m_error_status bits and errors are handled as described in the
237  definition of the ON_UnicodeErrorParameters struct.
238 
239  unicode_code_point - [out]
240  The unicode_code_point pointer must not be null.
241  If a nonzero value is returned, then *unicode_code_point is
242  a valid unicode code point value in the CPU's native byte order.
243 Returns:
244  Number of elements of sUTF16 that were parsed.
245  0 indicates failure.
246 */
247 ON_DECL
248 int ON_DecodeSwapByteUTF16(
249  const ON__UINT16* sUTF16,
250  int sUTF16_count,
251  struct ON_UnicodeErrorParameters* e,
252  ON__UINT32* unicode_code_point
253  );
254 
255 /*
256 Description:
257  Convert a unicode string from a UTF-8 encoded ON__UINT8 array
258  into a UTF-16 encoded ON__UINT16 array.
259 
260 Parameters:
261  sUTF8 - [in]
262  UTF-8 string to convert.
263 
264  sUTF8_count - [in]
265  If sUTF8_count >= 0, then it specifies the number of
266  ON__UINT8 elements in sUTF8[] to convert.
267 
268  If sUTF8_count == -1, then sUTF8 must be a null terminated
269  string and all the elements up to the first null element are
270  converted.
271 
272  sUTF16 - [out]
273  If sUTF16 is not null and sUTF16_count > 0, then the UTF-16
274  encoded string is returned in this buffer. If there is room
275  for the null terminator, the converted string will be null
276  terminated. The null terminator is never included in the count
277  of returned by this function. The converted string is in the
278  CPU's native byte order. No byte order mark is prepended.
279 
280  sUTF16_count - [in]
281  If sUTF16_count > 0, then it specifies the number of available
282  ON__UINT16 elements in the sUTF16[] buffer.
283 
284  If sUTF16_count == 0, then the sUTF16 parameter is ignored.
285 
286  error_status - [out]
287  If error_status is not null, then bits of *error_status are
288  set to indicate the success or failure of the conversion.
289  When the error_mask parameter is used to used to mask some
290  conversion errors, multiple bits may be set.
291  0: Successful conversion with no errors.
292  1: Invalid input parameters. This error cannot be masked.
293  2: The sUTF16 output buffer was not large enough to hold
294  the converted string. This error cannot be masked.
295  4: The values of two UTF-8 encoding sequences formed a valid
296  UTF-16 surrogate pair. This error can be masked. If the
297  error is masked, then the surrogate pair is added
298  to the UTF-16 output string and parsing continues.
299  8: An overlong UTF-8 encoding sequence was encountered.
300  The value of the overlong sequence was a valid unicode
301  code point. This error can be masked. If the error is masked,
302  then the unicode code point is encoded and added to the
303  UTF-16 output string and parsing continues.
304  16: An illegal UTF-8 encoding sequence occured or an invalid
305  unicode code point value resulted from decoding a
306  UTF-8 sequence. This error can be masked. If the error is
307  masked and error_code_point is a valid unicode code point,
308  then its UTF-16 encoding is added to the UTF-16 output
309  string and parsing continues.
310 
311  error_mask - [in]
312  If 0 != (error_mask & 4), then type 4 errors are masked.
313  If 0 != (error_mask & 8), then type 8 errors are masked.
314  If 0 != (error_mask & 16) and error_code_point is a valid unicode
315  code point value, then type 16 errors are masked.
316 
317  error_code_point - [in]
318  Unicode code point value to use in when masking type 16 errors.
319  If 0 == (error_mask & 16), then this parameter is ignored.
320  0xFFFD is a popular choice for the error_code_point value.
321 
322  sNextUTF8 - [out]
323  If sNextUTF8 is not null, then *sNextUTF8 points to the first
324  element in the input sUTF8[] buffer that was not converted.
325 
326  If an error occurs and is not masked, then *sNextUTF8 points to
327  the element of sUTF8[] where the conversion failed. If no errors
328  occur or all errors are masked, then *sNextUTF8 points to
329  sUTF8 + sUTF8_count.
330 
331 Returns:
332  If sUTF16_count > 0, the return value is the number of ON__UINT16
333  elements written to sUTF16[]. When the return value < sUTF16_count,
334  a null terminator is written to sUTF16[return value].
335 
336  If sUTF16_count == 0, the return value is the minimum number of
337  ON__UINT16 elements that are needed to hold the converted string.
338  The return value does not include room for a null terminator.
339  Increment the return value by one if you want to have an element
340  to use for a null terminator.
341 */
342 ON_DECL
343 int ON_ConvertUTF8ToUTF16(
344  const ON__UINT8* sUTF8,
345  int sUTF8_count,
346  ON__UINT16* sUTF16,
347  int sUTF16_count,
348  unsigned int* error_status,
349  unsigned int error_mask,
350  ON__UINT32 error_code_point,
351  const ON__UINT8** sNextUTF8
352  );
353 
354 /*
355 Description:
356  Convert a unicode string from a UTF-8 encoded ON__UINT8 array
357  into a UTF-32 encoded ON__UINT32 array.
358 
359 Parameters:
360  sUTF8 - [in]
361  UTF-8 string to convert.
362 
363  sUTF8_count - [in]
364  If sUTF8_count >= 0, then it specifies the number of
365  ON__UINT8 elements in sUTF8[] to convert.
366 
367  If sUTF8_count == -1, then sUTF8 must be a null terminated
368  string and all the elements up to the first null element are
369  converted.
370 
371  sUTF32 - [out]
372  If sUTF32 is not null and sUTF32_count > 0, then the UTF-32
373  encoded string is returned in this buffer. If there is room
374  for the null terminator, the converted string will be null
375  terminated. The null terminator is never included in the count
376  of returned by this function. The converted string is in the
377  CPU's native byte order. No byte order mark is prepended.
378 
379  sUTF32_count - [in]
380  If sUTF32_count > 0, then it specifies the number of available
381  ON__UINT32 elements in the sUTF32[] buffer.
382 
383  If sUTF32_count == 0, then the sUTF32 parameter is ignored.
384 
385  error_status - [out]
386  If error_status is not null, then bits of *error_status are
387  set to indicate the success or failure of the conversion.
388  When the error_mask parameter is used to used to mask some
389  conversion errors, multiple bits may be set.
390  0: Successful conversion with no errors.
391  1: Invalid input parameters. This error cannot be masked.
392  2: The sUTF32 output buffer was not large enough to hold
393  the converted string. This error cannot be masked.
394  4: The values of two UTF-8 encoding sequences formed a valid
395  UTF-16 surrogate pair. This error can be masked. If the
396  error is masked, then the surrogate pair is decoded,
397  the code point value is added to the UTF-32 output
398  string and parsing continues.
399  8: An overlong UTF-8 encoding sequence was encountered.
400  The value of the overlong sequence was a valid unicode
401  code point. This error can be masked. If the error is masked,
402  then the unicode code point is added to the UTF-32
403  output string and parsing continues.
404  16: An illegal UTF-8 encoding sequence occured or an invalid
405  unicode code point value resulted from decoding a
406  UTF-8 sequence. This error can be masked. If the error is
407  masked and error_code_point is a valid unicode code point,
408  then its value is added to the UTF-32 output string and
409  parsing continues.
410 
411  error_mask - [in]
412  If 0 != (error_mask & 4), then type 4 errors are masked.
413  If 0 != (error_mask & 8), then type 8 errors are masked.
414  If 0 != (error_mask & 16) and error_code_point is a valid unicode
415  code point value, then type 16 errors are masked.
416 
417  error_code_point - [in]
418  Unicode code point value to use in when masking type 16 errors.
419  If 0 == (error_mask & 16), then this parameter is ignored.
420  0xFFFD is a popular choice for the error_code_point value.
421 
422  sNextUTF8 - [out]
423  If sNextUTF8 is not null, then *sNextUTF8 points to the first
424  element in the input sUTF8[] buffer that was not converted.
425 
426  If an error occurs and is not masked, then *sNextUTF8 points to
427  the element of sUTF8[] where the conversion failed. If no errors
428  occur or all errors are masked, then *sNextUTF8 points to
429  sUTF8 + sUTF8_count.
430 
431 Returns:
432  If sUTF32_count > 0, the return value is the number of ON__UINT32
433  elements written to sUTF32[]. When the return value < sUTF32_count,
434  a null terminator is written to sUTF32[return value].
435 
436  If sUTF32_count == 0, the return value is the minimum number of
437  ON__UINT32 elements that are needed to hold the converted string.
438  The return value does not include room for a null terminator.
439  Increment the return value by one if you want to have an element
440  to use for a null terminator.
441 */
442 ON_DECL
443 int ON_ConvertUTF8ToUTF32(
444  const ON__UINT8* sUTF8,
445  int sUTF8_count,
446  ON__UINT32* sUTF32,
447  int sUTF32_count,
448  unsigned int* error_status,
449  unsigned int error_mask,
450  ON__UINT32 error_code_point,
451  const ON__UINT8** sNextUTF8
452  );
453 
454 /*
455 Description:
456  Convert a unicode string from a UTF-16 encoded ON__UINT16 array
457  into a UTF-8 encoded ON__UINT8 array.
458 
459 Parameters:
460  bTestByteOrder - [in]
461  If bTestByteOrder is true and the first element of sUTF16[]
462  is 0xFEFF, then this element is ignored.
463 
464  If bTestByteOrder is true and the first element of sUTF16[]
465  is 0xFFFE, then this element is ignored and the subsequent
466  elements of sUTF16[] have their bytes swapped before the
467  conversion is calculated.
468 
469  In all other cases the first element of sUTF16[] is
470  converted and no byte swapping is performed.
471 
472  sUTF16 - [in]
473  UTF-16 string to convert.
474 
475  If bTestByteOrder is true and the first element of sUTF16[]
476  is 0xFEFF, then this element is skipped and it is assumed
477  that sUTF16[] is in the CPU's native byte order.
478 
479  If bTestByteOrder is true and the first element of sUTF16[]
480  is 0xFFFE, then this element is skipped and it is assumed
481  that sUTF16[] is not in the CPU's native byte order and bytes
482  are swapped before characters are converted.
483 
484  If bTestByteOrder is false or the first character of sUTF16[]
485  is neither 0xFEFF nor 0xFFFE, then the sUTF16 string must match
486  the CPU's byte order.
487 
488  sUTF16_count - [in]
489  If sUTF16_count >= 0, then it specifies the number of
490  ON__UINT16 elements in sUTF16[] to convert.
491 
492  If sUTF16_count == -1, then sUTF16 must be a null terminated
493  string and all the elements up to the first null element are
494  converted.
495 
496  sUTF8 - [out]
497  If sUTF8 is not null and sUTF8_count > 0, then the UTF-8
498  encoded string is returned in this buffer. If there is room
499  for the null terminator, the converted string will be null
500  terminated. The null terminator is never included in the count
501  of returned by this function. The converted string is in the
502  CPU's native byte order. No byte order mark is prepended.
503 
504  sUTF8_count - [in]
505  If sUTF8_count > 0, then it specifies the number of available
506  ON__UINT8 elements in the sUTF8[] buffer.
507 
508  If sUTF8_count == 0, then the sUTF8 parameter is ignored.
509 
510  error_status - [out]
511  If error_status is not null, then bits of *error_status are
512  set to indicate the success or failure of the conversion.
513  When the error_mask parameter is used to used to mask some
514  conversion errors, multiple bits may be set.
515  0: Successful conversion with no errors.
516  1: Invalid input parameters. This error cannot be masked.
517  2: The sUTF8 output buffer was not large enough to hold
518  the converted string. This error cannot be masked.
519  16: An illegal UTF-16 encoding sequence occured or an invalid
520  unicode code point value resulted from decoding a
521  UTF-16 sequence. This error can be masked. If the error is
522  masked and error_code_point is a valid unicode code point,
523  then its UTF-8 encoding is added to the UTF-8 output
524  string and parsing continues.
525 
526  error_mask - [in]
527  If 0 != (error_mask & 16) and error_code_point is a valid unicode
528  code point value, then type 16 errors are masked.
529 
530  error_code_point - [in]
531  Unicode code point value to use in when masking type 16 errors.
532  If 0 == (error_mask & 16), then this parameter is ignored.
533  0xFFFD is a popular choice for the error_code_point value.
534 
535  sNextUTF16 - [out]
536  If sNextUTF16 is not null, then *sNextUTF16 points to the first
537  element in the input sUTF16[] buffer that was not converted.
538 
539  If an error occurs and is not masked, then *sNextUTF16 points to
540  the element of sUTF16[] where the conversion failed. If no errors
541  occur or all errors are masked, then *sNextUTF16 points to
542  sUTF16 + sUTF16_count.
543 
544  If sUTF8_count > 0, the return value is the number of ON__UINT8
545  elements written to sUTF8[]. When the return value < sUTF8_count,
546  a null terminator is written to sUTF8[return value].
547 
548  If sUTF8_count == 0, the return value is the minimum number of
549  ON__UINT8 elements that are needed to hold the converted string.
550  The return value does not include room for a null terminator.
551  Increment the return value by one if you want to have an element
552  to use for a null terminator.
553 */
554 ON_DECL
555 int ON_ConvertUTF16ToUTF8(
556  int bTestByteOrder,
557  const ON__UINT16* sUTF16,
558  int sUTF16_count,
559  ON__UINT8* sUTF8,
560  int sUTF8_count,
561  unsigned int* error_status,
562  unsigned int error_mask,
563  ON__UINT32 error_code_point,
564  const ON__UINT16** sNextUTF16
565  );
566 
567 /*
568 Description:
569  Convert a unicode string from a UTF-16 encoded ON__UINT16 array
570  into a UTF-32 encoded ON__UINT32 array.
571 
572 Parameters:
573  bTestByteOrder - [in]
574  If bTestByteOrder is true and the first element of sUTF16[]
575  is 0xFEFF, then this element is ignored.
576 
577  If bTestByteOrder is true and the first element of sUTF16[]
578  is 0xFFFE, then this element is ignored and the subsequent
579  elements of sUTF16[] have their bytes swapped before the
580  conversion is calculated.
581 
582  In all other cases the first element of sUTF16[] is
583  converted and no byte swapping is performed.
584 
585  sUTF16 - [in]
586  UTF-16 string to convert.
587 
588  If bTestByteOrder is true and the first element of sUTF16[]
589  is 0xFEFF, then this element is skipped and it is assumed
590  that sUTF16[] is in the CPU's native byte order.
591 
592  If bTestByteOrder is true and the first element of sUTF16[]
593  is 0xFFFE, then this element is skipped and it is assumed
594  that sUTF16[] is not in the CPU's native byte order and bytes
595  are swapped before characters are converted.
596 
597  If bTestByteOrder is false or the first character of sUTF16[]
598  is neither 0xFEFF nor 0xFFFE, then the sUTF16 string must match
599  the CPU's byte order.
600 
601  sUTF16_count - [in]
602  If sUTF16_count >= 0, then it specifies the number of
603  ON__UINT16 elements in sUTF16[] to convert.
604 
605  If sUTF16_count == -1, then sUTF16 must be a null terminated
606  string and all the elements up to the first null element are
607  converted.
608 
609  sUTF32 - [out]
610  If sUTF32 is not null and sUTF32_count > 0, then the UTF-32
611  encoded string is returned in this buffer. If there is room
612  for the null terminator, the converted string will be null
613  terminated. The null terminator is never included in the count
614  of returned by this function. The converted string is in the
615  CPU's native byte order. No byte order mark is prepended.
616 
617  sUTF32_count - [in]
618  If sUTF32_count > 0, then it specifies the number of available
619  ON__UINT32 elements in the sUTF32[] buffer.
620 
621  If sUTF32_count == 0, then the sUTF32 parameter is ignored.
622 
623  error_status - [out]
624  If error_status is not null, then bits of *error_status are
625  set to indicate the success or failure of the conversion.
626  When the error_mask parameter is used to used to mask some
627  conversion errors, multiple bits may be set.
628  0: Successful conversion with no errors.
629  1: Invalid input parameters. This error cannot be masked.
630  2: The sUTF32 output buffer was not large enough to hold
631  the converted string. This error cannot be masked.
632  16: An illegal UTF-16 encoding sequence occured or an invalid
633  unicode code point value resulted from decoding a
634  UTF-16 sequence. This error can be masked. If the error is
635  masked and error_code_point is a valid unicode code point,
636  then its value is added to the UTF-32 output string and
637  parsing continues.
638 
639  error_mask - [in]
640  If 0 != (error_mask & 16) and error_code_point is a valid unicode
641  code point value, then type 16 errors are masked.
642 
643  error_code_point - [in]
644  Unicode code point value to use in when masking type 16 errors.
645  If 0 == (error_mask & 16), then this parameter is ignored.
646  0xFFFD is a popular choice for the error_code_point value.
647 
648  sNextUTF16 - [out]
649  If sNextUTF16 is not null, then *sNextUTF16 points to the first
650  element in the input sUTF16[] buffer that was not converted.
651 
652  If an error occurs and is not masked, then *sNextUTF16 points to
653  the element of sUTF16[] where the conversion failed. If no errors
654  occur or all errors are masked, then *sNextUTF16 points to
655  sUTF16 + sUTF16_count.
656 
657 Returns:
658  If sUTF32_count > 0, the return value is the number of ON__UINT32
659  elements written to sUTF32[]. When the return value < sUTF32_count,
660  a null terminator is written to sUTF32[return value].
661 
662  If sUTF32_count == 0, the return value is the minimum number of
663  ON__UINT32 elements that are needed to hold the converted string.
664  The return value does not include room for a null terminator.
665  Increment the return value by one if you want to have an element
666  to use for a null terminator.
667 */
668 ON_DECL
669 int ON_ConvertUTF16ToUTF32(
670  int bTestByteOrder,
671  const ON__UINT16* sUTF16,
672  int sUTF16_count,
673  unsigned int* sUTF32,
674  int sUTF32_count,
675  unsigned int* error_status,
676  unsigned int error_mask,
677  ON__UINT32 error_code_point,
678  const ON__UINT16** sNextUTF16
679  );
680 
681 /*
682 Description:
683  Convert a unicode string from a UTF-32 encoded ON__UINT32 array
684  into a UTF-8 encoded ON__UINT8 array.
685 
686 Parameters:
687  bTestByteOrder - [in]
688  If bTestByteOrder is true and the first element of sUTF32[]
689  is 0x0000FEFF, then this element is ignored.
690 
691  If bTestByteOrder is true and the first element of sUTF32[]
692  is 0xFFFE0000, then this element is ignored and the subsequent
693  elements of sUTF32[] have their bytes swapped before the
694  conversion is calculated.
695 
696  In all other cases the first element of sUTF32[] is
697  converted and no byte swapping is performed.
698 
699  sUTF32 - [in]
700  UTF-32 string to convert.
701 
702  If bTestByteOrder is true and the first element of sUTF32[]
703  is 0x0000FEFF, then this element is skipped and it is assumed
704  that sUTF32[] is in the CPU's native byte order.
705 
706  If bTestByteOrder is true and the first element of sUTF32[]
707  is 0xFFFE0000, then this element is skipped and it is assumed
708  that sUTF32[] is not in the CPU's native byte order and bytes
709  are swapped before characters are converted.
710 
711  If bTestByteOrder is false or the first character of sUTF32[]
712  is neither 0x0000FEFF nor 0xFFFE0000, then the sUTF32 string
713  must match the CPU's byte order.
714 
715  sUTF32_count - [in]
716  If sUTF32_count >= 0, then it specifies the number of
717  ON__UINT32 elements in sUTF32[] to convert.
718 
719  If sUTF32_count == -1, then sUTF32 must be a null terminated
720  string and all the elements up to the first null element are
721  converted.
722 
723  sUTF8 - [out]
724  If sUTF8 is not null and sUTF8_count > 0, then the UTF-8
725  encoded string is returned in this buffer. If there is room
726  for the null terminator, the converted string will be null
727  terminated. The null terminator is never included in the count
728  of returned by this function. The converted string is in the
729  CPU's native byte order. No byte order mark is prepended.
730 
731  sUTF8_count - [in]
732  If sUTF8_count > 0, then it specifies the number of available
733  ON__UINT8 elements in the sUTF8[] buffer.
734 
735  If sUTF8_count == 0, then the sUTF8 parameter is ignored.
736 
737  error_status - [out]
738  If error_status is not null, then bits of *error_status are
739  set to indicate the success or failure of the conversion.
740  When the error_mask parameter is used to used to mask some
741  conversion errors, multiple bits may be set.
742  0: Successful conversion with no errors.
743  1: Invalid input parameters. This error cannot be masked.
744  2: The sUTF8 output buffer was not large enough to hold
745  the converted string. This error cannot be masked.
746  4: The values of two UTF-32 elements form a valid
747  UTF-16 surrogate pair. This error can be masked. If the
748  error is masked, then the surrogate pair is converted
749  to a valid unicode code point, its UTF-8 encoding is
750  added to the UTF-8 output string and parsing continues.
751  16: An invalid unicode code point occured in sUTF32[].
752  This error can be masked. If the error is masked and
753  error_code_point is a valid unicode code point,
754  then its UTF-8 encoding is added to the UTF-8 output
755  string and parsing continues.
756 
757  error_mask - [in]
758  If 0 != (error_mask & 4), then type 4 errors are masked.
759  If 0 != (error_mask & 16) and error_code_point is a valid unicode
760  code point value, then type 16 errors are masked.
761 
762  error_code_point - [in]
763  Unicode code point value to use in when masking type 16 errors.
764  If 0 == (error_mask & 16), then this parameter is ignored.
765  0xFFFD is a popular choice for the error_code_point value.
766 
767  sNextUTF32 - [out]
768  If sNextUTF32 is not null, then *sNextUTF32 points to the first
769  element in the input sUTF32[] buffer that was not converted.
770 
771  If an error occurs and is not masked, then *sNextUTF32 points to
772  the element of sUTF32[] where the conversion failed. If no errors
773  occur or all errors are masked, then *sNextUTF32 points to
774  sUTF32 + sUTF32_count.
775 
776 Returns:
777  If sUTF8_count > 0, the return value is the number of ON__UINT8
778  elements written to sUTF8[]. When the return value < sUTF8_count,
779  a null terminator is written to sUTF8[return value].
780 
781  If sUTF8_count == 0, the return value is the minimum number of
782  ON__UINT8 elements that are needed to hold the converted string.
783  The return value does not include room for a null terminator.
784  Increment the return value by one if you want to have an element
785  to use for a null terminator.
786 */
787 ON_DECL
788 int ON_ConvertUTF32ToUTF8(
789  int bTestByteOrder,
790  const ON__UINT32* sUTF32,
791  int sUTF32_count,
792  ON__UINT8* sUTF8,
793  int sUTF8_count,
794  unsigned int* error_status,
795  unsigned int error_mask,
796  ON__UINT32 error_code_point,
797  const ON__UINT32** sNextUTF32
798  );
799 
800 /*
801 Description:
802  Convert a unicode string from a UTF-32 encoded ON__UINT32 array
803  into a UTF-16 encoded ON__UINT16 array.
804 
805 Parameters:
806  bTestByteOrder - [in]
807  If bTestByteOrder is true and the first element of sUTF32[]
808  is 0x0000FEFF, then this element is ignored.
809 
810  If bTestByteOrder is true and the first element of sUTF32[]
811  is 0xFFFE0000, then this element is ignored and the subsequent
812  elements of sUTF32[] have their bytes swapped before the
813  conversion is calculated.
814 
815  In all other cases the first element of sUTF32[] is
816  converted and no byte swapping is performed.
817 
818  sUTF32 - [in]
819  UTF-32 string to convert.
820 
821  If bTestByteOrder is true and the first element of sUTF32[]
822  is 0x0000FEFF, then this element is skipped and it is assumed
823  that sUTF32[] is in the CPU's native byte order.
824 
825  If bTestByteOrder is true and the first element of sUTF32[]
826  is 0xFFFE0000, then this element is skipped and it is assumed
827  that sUTF32[] is not in the CPU's native byte order and bytes
828  are swapped before characters are converted.
829 
830  If bTestByteOrder is false or the first character of sUTF32[]
831  is neither 0x0000FEFF nor 0xFFFE0000, then the sUTF32 string
832  must match the CPU's byte order.
833 
834  sUTF32_count - [in]
835  If sUTF32_count >= 0, then it specifies the number of
836  ON__UINT32 elements in sUTF32[] to convert.
837 
838  If sUTF32_count == -1, then sUTF32 must be a null terminated
839  string and all the elements up to the first null element are
840  converted.
841 
842  sUTF16 - [out]
843  If sUTF16 is not null and sUTF16_count > 0, then the UTF-16
844  encoded string is returned in this buffer. If there is room
845  for the null terminator, the converted string will be null
846  terminated. The null terminator is never included in the count
847  of returned by this function. The converted string is in the
848  CPU's native byte order. No byte order mark is prepended.
849 
850  sUTF16_count - [in]
851  If sUTF16_count > 0, then it specifies the number of available
852  ON__UINT16 elements in the sUTF16[] buffer.
853 
854  If sUTF16_count == 0, then the sUTF16 parameter is ignored.
855 
856  error_status - [out]
857  If error_status is not null, then bits of *error_status are
858  set to indicate the success or failure of the conversion.
859  When the error_mask parameter is used to used to mask some
860  conversion errors, multiple bits may be set.
861  0: Successful conversion with no errors.
862  1: Invalid input parameters. This error cannot be masked.
863  2: The sUTF16 output buffer was not large enough to hold
864  the converted string. This error cannot be masked.
865  4: The values of two UTF-32 elements form a valid
866  UTF-16 surrogate pair. This error can be masked. If the
867  error is masked, then the surrogate pair is added to
868  the UTF-16 output string and parsing continues.
869  16: An invalid unicode code point occured in sUTF32[].
870  This error can be masked. If the error is masked and
871  error_code_point is a valid unicode code point,
872  then its UTF-16 encoding is added to the UTF-16 output
873  string and parsing continues.
874 
875  error_mask - [in]
876  If 0 != (error_mask & 4), then type 4 errors are masked.
877  If 0 != (error_mask & 16) and error_code_point is a valid unicode
878  code point value, then type 16 errors are masked.
879 
880  error_code_point - [in]
881  Unicode code point value to use in when masking type 16 errors.
882  If 0 == (error_mask & 16), then this parameter is ignored.
883  0xFFFD is a popular choice for the error_code_point value.
884 
885  sNextUnicode - [out]
886  If sNextUnicode is not null, then *sNextUnicode points to the first
887  byte in the input sNextUnicode[] buffer that was not converted.
888 
889  If an error occurs and is not masked, then this unsigned int
890  will be an illegal unicode code point value.
891 
892  If an error does not occur, then (*sNextUnicode - sUnicode)
893  is the number of values converted.
894 
895 Returns:
896  If sUTF16_count > 0, the return value is the number of ON__UINT16
897  elements written to sUTF16[]. When the return value < sUTF16_count,
898  a null terminator is written to sUTF16[return value].
899 
900  If sUTF16_count == 0, the return value is the minimum number of
901  ON__UINT16 elements that are needed to hold the converted string.
902  The return value does not include room for a null terminator.
903  Increment the return value by one if you want to have an element
904  to use for a null terminator.
905 */
906 ON_DECL
907 int ON_ConvertUTF32ToUTF16(
908  int bTestByteOrder,
909  const ON__UINT32* sUTF32,
910  int sUTF32_count,
911  ON__UINT16* sUTF16,
912  int sUTF16_count,
913  unsigned int* error_status,
914  unsigned int error_mask,
915  ON__UINT32 error_code_point,
916  const ON__UINT32** sNextUTF32
917  );
918 
919 /*
920 Description:
921  Convert a wchar_t string using the native platform's most common
922  encoding into a unicode string encoded as a UTF-8 char array.
923 
924  If 2 = sizeof(wchar_t), then the wchar_t array is assumed to be
925  a UTF-16 encoded string. This is the case with current versions
926  of Microsoft Windows.
927 
928  If 4 = sizeof(wchar)t), then the wchar_t array is assumed to be
929  a UTF-32 encoded string. This is the case with current versions
930  of Apple OSX.
931 
932 Parameters:
933  bTestByteOrder - [in]
934  If bTestByteOrder is true and the first element of sWideChar[]
935  is 0xFEFF, then this element is ignored.
936 
937  If bTestByteOrder is true and the first element of sWideChar[]
938  is 0xFFFE, then this element is ignored and the subsequent
939  elements of sWideChar[] have their bytes swapped before the
940  conversion is calculated.
941 
942  In all other cases the first element of sWideChar[] is
943  converted and no byte swapping is performed.
944 
945  sWideChar - [in]
946  wchar_t string to convert.
947 
948  If bTestByteOrder is true and the first element of sWideChar[]
949  is 0xFEFF, then this element is skipped and it is assumed
950  that sWideChar[] is in the CPU's native byte order.
951 
952  If bTestByteOrder is true and the first element of sWideChar[]
953  is 0xFFFE, then this element is skipped and it is assumed
954  that sWideChar[] is not in the CPU's native byte order and bytes
955  are swapped before characters are converted.
956 
957  If bTestByteOrder is false or the first character of sWideChar[]
958  is neither 0xFEFF nor 0xFFFE, then the sWideChar string must match
959  the CPU's byte order.
960 
961  sWideChar_count - [in]
962  If sWideChar_count >= 0, then it specifies the number of
963  wchar_t elements in sWideChar[] to convert.
964 
965  If sWideChar_count == -1, then sWideChar must be a null terminated
966  string and all the elements up to the first null element are
967  converted.
968 
969  sUTF8 - [out]
970  If sUTF8 is not null and sUTF8_count > 0, then the UTF-8
971  encoded string is returned in this buffer. If there is room
972  for the null terminator, the converted string will be null
973  terminated. The null terminator is never included in the count
974  of returned by this function. The converted string is in the
975  CPU's native byte order. No byte order mark is prepended.
976 
977  sUTF8_count - [in]
978  If sUTF8_count > 0, then it specifies the number of available
979  ON__UINT8 elements in the sUTF8[] buffer.
980 
981  If sUTF8_count == 0, then the sUTF8 parameter is ignored.
982 
983  error_status - [out]
984  If error_status is not null, then bits of *error_status are
985  set to indicate the success or failure of the conversion.
986  When the error_mask parameter is used to used to mask some
987  conversion errors, multiple bits may be set.
988  0: Successful conversion with no errors.
989  1: Invalid input parameters. This error cannot be masked.
990  2: The sUTF8 output buffer was not large enough to hold
991  the converted string. This error cannot be masked.
992  16: An illegal wchar_t encoding sequence occured or an invalid
993  unicode code point value resulted from decoding a
994  wchar_t sequence. This error can be masked. If the error is
995  masked and error_code_point is a valid unicode code point,
996  then its UTF-8 encoding is added to the UTF-8 output
997  string and parsing continues.
998 
999  error_mask - [in]
1000  If 0 != (error_mask & 16) and error_code_point is a valid unicode
1001  code point value, then type 16 errors are masked.
1002 
1003  error_code_point - [in]
1004  Unicode code point value to use in when masking type 16 errors.
1005  If 0 == (error_mask & 16), then this parameter is ignored.
1006  0xFFFD is a popular choice for the error_code_point value.
1007 
1008  sNextWideChar - [out]
1009  If sNextWideChar is not null, then *sNextWideChar points to the first
1010  element in the input sWideChar[] buffer that was not converted.
1011 
1012  If an error occurs and is not masked, then *sNextWideChar points to
1013  the element of sWideChar[] where the conversion failed. If no errors
1014  occur or all errors are masked, then *sNextWideChar points to
1015  sWideChar + sWideChar_count.
1016 
1017  If sUTF8_count > 0, the return value is the number of ON__UINT8
1018  elements written to sUTF8[]. When the return value < sUTF8_count,
1019  a null terminator is written to sUTF8[return value].
1020 
1021  If sUTF8_count == 0, the return value is the minimum number of
1022  ON__UINT8 elements that are needed to hold the converted string.
1023  The return value does not include room for a null terminator.
1024  Increment the return value by one if you want to have an element
1025  to use for a null terminator.
1026 */
1027 ON_DECL
1028 int ON_ConvertWideCharToUTF8(
1029  int bTestByteOrder,
1030  const wchar_t* sWideChar,
1031  int sWideChar_count,
1032  char* sUTF8,
1033  int sUTF8_count,
1034  unsigned int* error_status,
1035  unsigned int error_mask,
1036  ON__UINT32 error_code_point,
1037  const wchar_t** sNextWideChar
1038  );
1039 
1040 /*
1041 Description:
1042  Convert a UTF-8 encoded char string to wchar_t string using
1043  the native platform's most common encoding.
1044 
1045  If 2 = sizeof(wchar_t), then UTF-16 encoding is used for the
1046  output string. This is the case with current versions of
1047  Microsoft Windows.
1048 
1049  If 4 = sizeof(wchar_t), then UTF-32 encoding is used for the
1050  output string. This is the case with current versions of
1051  Apple OSX.
1052 
1053 Parameters:
1054  sUTF8 - [in]
1055  UTF-8 string to convert.
1056 
1057  sUTF8_count - [in]
1058  If sUTF8_count >= 0, then it specifies the number of
1059  ON__UINT8 elements in sUTF8[] to convert.
1060 
1061  If sUTF8_count == -1, then sUTF8 must be a null terminated
1062  string and all the elements up to the first null element are
1063  converted.
1064 
1065  sWideChar - [out]
1066  If sWideChar is not null and sWideChar_count > 0, then the
1067  output string is returned in this buffer. If there is room
1068  for the null terminator, the converted string will be null
1069  terminated. The null terminator is never included in the count
1070  of returned by this function. The converted string is in the
1071  CPU's native byte order. No byte order mark is prepended.
1072 
1073  sWideChar_count - [in]
1074  If sWideChar_count > 0, then it specifies the number of available
1075  wchar_t elements in the sWideChar[] buffer.
1076 
1077  If sWideChar_count == 0, then the sWideChar parameter is ignored.
1078 
1079  error_status - [out]
1080  If error_status is not null, then bits of *error_status are
1081  set to indicate the success or failure of the conversion.
1082  When the error_mask parameter is used to used to mask some
1083  conversion errors, multiple bits may be set.
1084  0: Successful conversion with no errors.
1085  1: Invalid input parameters. This error cannot be masked.
1086  2: The sWideChar output buffer was not large enough to hold
1087  the converted string. This error cannot be masked.
1088  4: The values of two UTF-8 encoding sequences formed a valid
1089  UTF-16 surrogate pair. This error can be masked. If the
1090  error is masked, then the surrogate pair is added
1091  to the UTF-16 output string and parsing continues.
1092  8: An overlong UTF-8 encoding sequence was encountered.
1093  The value of the overlong sequence was a valid unicode
1094  code point. This error can be masked. If the error is masked,
1095  then the unicode code point is encoded and added to the
1096  UTF-16 output string and parsing continues.
1097  16: An illegal UTF-8 encoding sequence occured or an invalid
1098  unicode code point value resulted from decoding a
1099  UTF-8 sequence. This error can be masked. If the error is
1100  masked and error_code_point is a valid unicode code point,
1101  then its encoding is added to the output string and parsing
1102  continues.
1103 
1104  error_mask - [in]
1105  If 0 != (error_mask & 4), then type 4 errors are masked.
1106  If 0 != (error_mask & 8), then type 8 errors are masked.
1107  If 0 != (error_mask & 16) and error_code_point is a valid unicode
1108  code point value, then type 16 errors are masked.
1109 
1110  error_code_point - [in]
1111  Unicode code point value to use in when masking type 16 errors.
1112  If 0 == (error_mask & 16), then this parameter is ignored.
1113  0xFFFD is a popular choice for the error_code_point value.
1114 
1115  sNextUTF8 - [out]
1116  If sNextUTF8 is not null, then *sNextUTF8 points to the first
1117  element in the input sUTF8[] buffer that was not converted.
1118 
1119  If an error occurs and is not masked, then *sNextUTF8 points to
1120  the element of sUTF8[] where the conversion failed. If no errors
1121  occur or all errors are masked, then *sNextUTF8 points to
1122  sUTF8 + sUTF8_count.
1123 
1124 Returns:
1125  If sWideChar_count > 0, the return value is the number of wchar_t
1126  elements written to sWideChar[]. When the return value < sWideChar_count,
1127  a null terminator is written to sWideChar[return value].
1128 
1129  If sWideChar_count == 0, the return value is the minimum number of
1130  wchar_t elements that are needed to hold the converted string.
1131  The return value does not include room for a null terminator.
1132  Increment the return value by one if you want to have an element
1133  to use for a null terminator.
1134 */
1135 ON_DECL
1136 int ON_ConvertUTF8ToWideChar(
1137  const char* sUTF8,
1138  int sUTF8_count,
1139  wchar_t* sWideChar,
1140  int sWideChar_count,
1141  unsigned int* error_status,
1142  unsigned int error_mask,
1143  ON__UINT32 error_code_point,
1144  const char** sNextUTF8
1145  );
1146 
1147 ON_END_EXTERNC
1148 
1149 #endif