+captive_validate_ucs2_fixlen()
[captive.git] / src / libcaptive / rtl / unicode.c
1 /* $Id$
2  * Unicode add-ons to reactos ntoskrnl/rtl/unicode.c for libcaptive
3  * Copyright (C) 2002 Jan Kratochvil <project-captive@jankratochvil.net>
4  * 
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; exactly version 2 of June 1991 is required
8  * 
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  * 
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17  */
18
19
20 #include "config.h"
21
22 #include "captive/unicode.h"    /* self */
23 #include "captive/unicode_reactos.h"    /* for captive_ucs2 */
24 #include <glib/gtypes.h>
25 #include <glib/gmessages.h>
26 #include <glib/gunicode.h>
27 #include <glib/gmem.h>
28 #include "reactos/napi/types.h"  /* for PUNICODE_STRING etc. */
29 #include "reactos/unicode.h"
30 #include "captive/macros.h"
31 #include <glib/gstrfuncs.h>
32 #include <wchar.h>      /* for wcslen() */
33 #include <glib/ghash.h>
34 #include <string.h>
35
36
37 /* CONFIG: */
38 /* Use simplified g_malloc() functions as wrappers around g_alloca() ones.
39  */
40 #define FUNCMALLOC_FROM_ALLOCA 1
41
42
43 /* compiler sanity */
44 static gboolean captive_validate_unicode_types(void)
45 {
46         g_return_val_if_fail(4==sizeof(gunichar),FALSE);
47         g_return_val_if_fail(2==sizeof(WCHAR),FALSE);
48         g_return_val_if_fail(1==sizeof(CHAR),FALSE);
49
50         return TRUE;
51 }
52
53
54 /**
55  * captive_validate_ucs4:
56  * @string_ucs4: #const #gunichar * type string to validate.
57  * Invalid string input is forbidden.
58  *
59  * Checks the validity of all 32-bit unicharacters of 0-terminated string.
60  * It is required to have characters complying to g_unichar_validate().
61  *
62  * Returns: %TRUE if the string is valid.
63  */ 
64 gboolean captive_validate_ucs4(const gunichar *string_ucs4)
65 {
66 const gunichar *cs_ucs4;
67
68         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
69         g_return_val_if_fail(string_ucs4!=NULL,FALSE);
70
71         for (cs_ucs4=string_ucs4;*cs_ucs4;cs_ucs4++)
72                 g_return_val_if_fail(g_unichar_validate(*cs_ucs4),FALSE);
73
74         return TRUE;
75 }
76
77
78 /**
79  * captive_validate_ucs2_fixlen:
80  * @string_ucs2: #const #captive_ucs2 * type string to validate.
81  * Invalid string input is forbidden.
82  * UTF-16 encoded strings are forbidden.
83  * @string_ucs2_fixlen: Number of characters from @string_ucs2 to check.
84  * captive_ucs2_strlen(@string_ucs2)>=@string_ucs2_fixlen is required.
85  * Negative value is forbidden.
86  *
87  * Checks the validity of first @string_ucs2_fixlen 16-bit unicharacters of @string_ucs2.
88  * It is required to have characters complying to g_unichar_validate().
89  * String length must be equal or larger than @string_ucs2_fixlen;
90  *
91  * Returns: %TRUE if the string is valid.
92  */ 
93 gboolean captive_validate_ucs2_fixlen(const captive_ucs2 *string_ucs2,glong string_ucs2_fixlen)
94 {
95 const captive_ucs2 *cs_ucs2;
96
97         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
98         g_return_val_if_fail(string_ucs2!=NULL,FALSE);
99         g_return_val_if_fail(string_ucs2_fixlen>=0,FALSE);
100
101         /* g_unichar_validate() will reject surrogates (G_UNICODE_SURROGATE) */
102         for (cs_ucs2=string_ucs2;cs_ucs2<string_ucs2+string_ucs2_fixlen;cs_ucs2++) {
103                 g_return_val_if_fail(*cs_ucs2!=0,FALSE);
104                 g_return_val_if_fail(g_unichar_validate(*cs_ucs2),FALSE);
105                 }
106
107         return TRUE;
108 }
109
110
111 /**
112  * captive_validate_ucs2:
113  * @string_ucs2: #const #captive_ucs2 * type string to validate.
114  * Invalid string input is forbidden.
115  * UTF-16 encoded strings are forbidden.
116  *
117  * Checks the validity of all 16-bit unicharacters of 0-terminated string.
118  * It is required to have characters complying to g_unichar_validate().
119  *
120  * Returns: %TRUE if the string is valid.
121  */ 
122 gboolean captive_validate_ucs2(const captive_ucs2 *string_ucs2)
123 {
124         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
125         g_return_val_if_fail(string_ucs2!=NULL,FALSE);
126
127         return captive_validate_ucs2_fixlen(string_ucs2,captive_ucs2_strlen(string_ucs2));
128 }
129
130
131 /**
132  * captive_validate_utf8:
133  * @string_utf8: #const #gchar * utf8 type string to validate.
134  * Invalid string input is forbidden.
135  *
136  * Checks the validity of all utf8 of 0-terminated string.
137  * It is required to have characters complying to g_utf8_validate().
138  *
139  * Returns: %TRUE if the string is valid.
140  */ 
141 gboolean captive_validate_utf8(const gchar *string_utf8)
142 {
143         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
144         g_return_val_if_fail(string_utf8!=NULL,FALSE);
145
146         g_return_val_if_fail(g_utf8_validate(
147                                         string_utf8,    /* str */
148                                         -1,     /* max_len; -1 means '\0'-terminated */
149                                         NULL),  /* end */
150                         FALSE);
151
152         return TRUE;
153 }
154
155
156 /**
157  * captive_ucs2_strlen:
158  * @string_ucs2: String of type #const #gunichar2 * in pure UCS-2
159  * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
160  *
161  * Counts the number of characters (=2bytes) in @strings_ucs2.
162  *
163  * Returns: @string_ucs2 length in UCS-2 characters.
164  */
165 glong captive_ucs2_strlen(const captive_ucs2 *string_ucs2)
166 {
167 glong r;
168
169         /* Do not call captive_validate_ucs2(string_ucs2) as we would be looping! */
170         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
171         g_return_val_if_fail(string_ucs2!=NULL,FALSE);
172
173         for (r=0;*string_ucs2;string_ucs2++)
174                 r++;
175
176         return r;
177 }
178
179
180 /**
181  * captive_validate_UnicodeString:
182  * @string_UnicodeString: #PUNICODE_STRING type string to validate.
183  * Invalid string input is forbidden.
184  *
185  * Checks the internal consistency of the given @string_UnicodeString.
186  * It is required to have characters complying to g_unichar_validate().
187  * @string_UnicodeString MUST be zero-terminated.
188  *
189  * Returns: %TRUE if the string is valid.
190  */
191 gboolean captive_validate_UnicodeString(const UNICODE_STRING *string_UnicodeString)
192 {
193         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
194         g_return_val_if_fail(sizeof(WCHAR)==sizeof(*string_UnicodeString->Buffer),FALSE);
195         g_return_val_if_fail(string_UnicodeString!=NULL,FALSE);
196         g_return_val_if_fail(string_UnicodeString->Length%sizeof(*string_UnicodeString->Buffer)==0,FALSE);
197         g_return_val_if_fail(string_UnicodeString->MaximumLength
198                         >=string_UnicodeString->Length+sizeof(*string_UnicodeString->Buffer),FALSE);
199         g_return_val_if_fail(string_UnicodeString->Length==sizeof(*string_UnicodeString->Buffer)*
200                         captive_ucs2_strlen(string_UnicodeString->Buffer)
201                         ,FALSE);
202
203         g_return_val_if_fail(captive_validate_ucs2(string_UnicodeString->Buffer),FALSE);
204
205         return TRUE;
206 }
207
208
209 /**
210  * captive_validate_UnicodeString_noterm:
211  * @string_UnicodeString_noterm: #PUNICODE_STRING type string to validate.
212  * Invalid string input is forbidden.
213  *
214  * Checks the internal consistency of the given @string_UnicodeString.
215  * It is required to have characters complying to g_unichar_validate().
216  * @string_UnicodeString_noterm does not neet to be zero-terminated.
217  *
218  * Returns: %TRUE if the string is valid.
219  */
220 gboolean captive_validate_UnicodeString_noterm(const UNICODE_STRING *string_UnicodeString_noterm)
221 {
222 const WCHAR *cwp;
223
224         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
225         g_return_val_if_fail(sizeof(WCHAR)==sizeof(*string_UnicodeString_noterm->Buffer),FALSE);
226         g_return_val_if_fail(string_UnicodeString_noterm!=NULL,FALSE);
227         g_return_val_if_fail(string_UnicodeString_noterm->Length%sizeof(*string_UnicodeString_noterm->Buffer)==0,FALSE);
228         g_return_val_if_fail(string_UnicodeString_noterm->MaximumLength>=string_UnicodeString_noterm->Length,FALSE);
229
230         for (
231                         cwp=string_UnicodeString_noterm->Buffer;
232                         cwp<string_UnicodeString_noterm->Buffer
233                                         +(string_UnicodeString_noterm->Length/sizeof(*string_UnicodeString_noterm->Buffer));
234                         cwp++)
235                 g_return_val_if_fail(*cwp!=0,FALSE);
236
237         g_return_val_if_fail(captive_validate_ucs2_fixlen(string_UnicodeString_noterm->Buffer,
238                                         string_UnicodeString_noterm->Length/sizeof(*string_UnicodeString_noterm->Buffer)),
239                         FALSE);
240
241         return TRUE;
242 }
243
244
245 /**
246  * captive_validate_AnsiString:
247  * @string_AnsiString: #PANSI_STRING type string to validate.
248  * Invalid string input is forbidden.
249  *
250  * Checks the internal consistency of the given @string_AnsiString.
251  *
252  * Returns: %TRUE if the string is valid.
253  */
254 gboolean captive_validate_AnsiString(const ANSI_STRING *string_AnsiString)
255 {
256         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
257         g_return_val_if_fail(sizeof(CHAR)==sizeof(*string_AnsiString->Buffer),FALSE);
258         g_return_val_if_fail(string_AnsiString!=NULL,FALSE);
259         g_return_val_if_fail(string_AnsiString->MaximumLength>=string_AnsiString->Length+1,FALSE);
260         g_return_val_if_fail(string_AnsiString->Length==strlen(string_AnsiString->Buffer),FALSE);
261
262         return TRUE;
263 }
264
265
266 /* detect required memory size for g_alloca() */
267 size_t _captive_UnicodeString_to_utf8_alloca_internal_sizeof(const UNICODE_STRING *string_UnicodeString)
268 {
269 glong length;
270 size_t r;
271 const WCHAR *cwcharp;
272
273         g_return_val_if_fail(captive_validate_UnicodeString(string_UnicodeString),1);
274
275         /* measure 'string_UnicodeString->Buffer' length in UTF-8 to 'r' */
276         cwcharp=string_UnicodeString->Buffer;
277         r=0;
278         for (length=string_UnicodeString->Length/sizeof(*string_UnicodeString->Buffer);length;length--) {
279 gint utf8len;
280
281                 utf8len=g_unichar_to_utf8(
282                                 *cwcharp++,     /* c */
283                                 NULL);  /* outbuf=NULL => just the length will be computed */
284                 g_assert(utf8len>=0);
285                 r+=utf8len;
286                 }
287         g_assert(*cwcharp==0);
288         r++;    /* '\0'-termination */
289
290         /* utf8 byte-size */
291         return r;
292 }
293
294 /* transfer 'string_UnicodeString' to memory in 'mem' as utf8 w/o any further allocations */
295 void _captive_UnicodeString_to_utf8_alloca_internal_fill(gchar *mem,const UNICODE_STRING *string_UnicodeString)
296 {
297 const WCHAR *cwcharp;
298 #ifndef G_DISABLE_ASSERT
299 gchar *mem_orig=mem;
300 #endif /* G_DISABLE_ASSERT */
301
302         g_return_if_fail(mem!=NULL);
303         if (!captive_validate_UnicodeString(string_UnicodeString)) {
304                 *mem='\0';
305                 g_return_if_reached();
306                 }
307
308         /* We can't use any glib string conversions as UNICODE_STRING uses ucs2! */
309         /* We can't use any glib string conversions as we need to write the string
310          * to our supplied memory storage but glib always g_malloc()s it
311          */
312         /* copy 'string_UnicodeString->Buffer' to 'mem' */
313         for (cwcharp=string_UnicodeString->Buffer;*cwcharp;cwcharp++) {
314 gint utf8len;
315
316                 utf8len=g_unichar_to_utf8(
317                                 (gunichar)*cwcharp,     /* c */
318                                 mem);   /* outbuf */
319                 g_assert(utf8len>=0);
320                 mem+=utf8len;
321                 }
322         *mem='\0';
323
324         g_assert((size_t)((mem+1)-mem_orig) == _captive_UnicodeString_to_utf8_alloca_internal_sizeof(string_UnicodeString));
325         g_assert(captive_validate_utf8(mem_orig));
326 }
327
328
329 /**
330  * captive_UnicodeString_to_utf8_malloc:
331  * @string_UnicodeString: #PUNICODE_STRING type of string to convert.
332  *
333  * g_malloc()-based conversion from #PUNICODE_STRING to plain #utf8 string.
334  * You must free the result with g_free() function.
335  *
336  * Returns: #const #gchar * g_malloc()ed converted string @string_UnicodeString.
337  */
338 gchar *captive_UnicodeString_to_utf8_malloc(const UNICODE_STRING *string_UnicodeString)
339 {
340 gchar *r;
341 #ifndef FUNCMALLOC_FROM_ALLOCA
342 glong utf16_read,utf8_written;
343 GError *err;
344 #endif /* !FUNCMALLOC_FROM_ALLOCA */
345
346         g_return_val_if_fail(captive_validate_UnicodeString(string_UnicodeString),g_strdup(""));
347
348 #ifdef FUNCMALLOC_FROM_ALLOCA
349
350         r=g_malloc(_captive_UnicodeString_to_utf8_alloca_internal_sizeof(string_UnicodeString));
351         _captive_UnicodeString_to_utf8_alloca_internal_fill(r,string_UnicodeString);
352
353 #else
354
355         err=NULL;       /* not precleared by g_utf8_to_utf16()! */
356         r=g_utf16_to_utf8(
357                         (const gunichar2 *)string_UnicodeString->Buffer,        /* str */
358                         -1,     /* len=>'\0'-terminated */
359                         &utf16_read,    /* items_read; counted in unichar2 (NOT UTF-16 characters or bytes!) */
360                         &utf8_written,  /* items_written; counted in bytes (NOT UTF-8 characters!) */
361                         &err);
362         if (err) {
363                 g_warning("%s: utf16_read=%ld,utf8_written=%ld: %s",G_STRLOC,
364                                 (long)utf16_read,(long)utf8_written,err->message);
365                 g_error_free(err);
366                 g_assert(r==NULL);
367                 g_return_val_if_reached(g_strdup(""));
368                 }
369         g_assert(r!=NULL);
370
371         g_assert(utf16_read==(glong)(string_UnicodeString->length/sizeof(*string_UnicodeString->Buffer)));
372         g_assert(utf6_written==strlen(r));
373
374 #endif /* !FUNCMALLOC_FROM_ALLOCA */
375
376         g_assert(captive_validate_utf8(r));
377
378         return r;
379 }
380
381
382 /* detect required memory size for g_alloca() */
383 size_t _captive_utf8_to_UnicodeString_alloca_internal_sizeof(const gchar *string_utf8)
384 {
385         g_return_val_if_fail(captive_validate_utf8(string_utf8),1);
386
387         /* find the value for PUNICODE_STRING->MaximumLength */
388         return 0
389                         +sizeof(UNICODE_STRING)
390                         +sizeof(WCHAR)*(g_utf8_strlen(string_utf8,
391                                         -1      /* max; -1 means '\0'-terminated */
392                                         )+1);   /* '\0'-termination */
393 }
394
395 static void terminate_static_UnicodeString(UNICODE_STRING *string_UnicodeString,glong length)
396 {
397         /* 'string_UnicodeString' is not yet valid in this point! */
398         g_return_if_fail(string_UnicodeString!=NULL);
399         g_return_if_fail(length>=0);
400
401         string_UnicodeString->Length=length*sizeof(WCHAR);
402         string_UnicodeString->MaximumLength=(length+1)*sizeof(WCHAR);
403         string_UnicodeString->Buffer[length]=0;
404
405         g_assert(captive_validate_UnicodeString(string_UnicodeString));
406 }
407
408 /* transfer 'string_UnicodeString' to memory in 'mem' w/o any further allocations */
409 void _captive_utf8_to_UnicodeString_alloca_internal_fill(UNICODE_STRING *mem,const gchar *string_utf8)
410 {
411 gunichar2 *utf16;
412 captive_ucs2 *ucs2;
413 glong utf8_read,utf16_written;
414 GError *err;
415
416         g_return_if_fail(mem!=NULL);
417         mem->Buffer=(PWSTR)(((char *)mem)+sizeof(*mem));        /* for terminate_static_UnicodeString() below */
418         if (!captive_validate_utf8(string_utf8)) {
419                 terminate_static_UnicodeString(mem,0);
420                 g_return_if_reached();
421                 }
422
423         err=NULL;       /* not precleared by g_utf8_to_utf16()! */
424         utf16=g_utf8_to_utf16(
425                         string_utf8,    /* str */
426                         -1,     /* len=>'\0'-terminated */
427                         &utf8_read,     /* items_read; counted in bytes (NOT chars!) */
428                         &utf16_written, /* items_written; counted in UTF-16 characters (NOT unichar2 or bytes!) */
429                         &err);
430         if (err) {
431                 g_warning("%s: utf8_read=%ld,utf16_written=%ld: %s",G_STRLOC,
432                                 (long)utf8_read,(long)utf16_written,err->message);
433                 g_error_free(err);
434                 g_assert(utf16==NULL);
435                 terminate_static_UnicodeString(mem,0);
436                 g_return_if_reached();
437                 }
438         g_assert(utf16!=NULL);
439
440         /* Check for UCS-2 compliance (reject if surrogates inside) */
441         g_assert(captive_validate_ucs2((const captive_ucs2 *)utf16));
442         /* valid UCS-2 */
443         ucs2=(captive_ucs2 *)utf16;
444
445         g_assert(utf8_read==(glong)strlen(string_utf8));
446         g_assert(utf16_written==captive_ucs2_strlen(ucs2));
447
448         /* check of validity of _captive_utf8_to_UnicodeString_alloca_internal_sizeof() result */
449         g_assert((gchar *)(mem->Buffer+(utf16_written+1))       /* +1 => '\0'-termination */
450                         == ((gchar *)mem)+_captive_utf8_to_UnicodeString_alloca_internal_sizeof(string_utf8));
451
452         memcpy(mem->Buffer,ucs2,sizeof(WCHAR)*(utf16_written+1));
453         g_free(ucs2);
454         terminate_static_UnicodeString(mem,utf16_written);
455
456         g_assert(captive_validate_UnicodeString(mem));
457 }
458
459
460 /**
461  * captive_utf8_to_UnicodeString_malloc:
462  * @string_utf8: #const #gchar * string in #utf8 to convert.
463  *
464  * g_malloc()-based conversion from plain #utf8 string to #PUNICODE_STRING.
465  * You must free the result with g_free() function.
466  *
467  * Returns: #PUNICODE_STRING g_malloc()ed converted string @string_utf8.
468  */
469 PUNICODE_STRING captive_utf8_to_UnicodeString_malloc(const gchar *string_utf8)
470 {
471 UNICODE_STRING *r;
472 #ifndef FUNCMALLOC_FROM_ALLOCA
473 gunichar *ucs4;
474 glong utf8_read,ucs4_written;
475 GError *err;
476 #endif /* !FUNCMALLOC_FROM_ALLOCA */
477
478         g_return_val_if_fail(captive_validate_utf8(string_utf8),captive_utf8_to_UnicodeString_malloc(""));
479
480 #ifdef FUNCMALLOC_FROM_ALLOCA
481
482         r=g_malloc(_captive_utf8_to_UnicodeString_alloca_internal_sizeof(string_utf8));
483         _captive_utf8_to_UnicodeString_alloca_internal_fill(r,string_utf8);
484
485 #else
486
487 #error "FIXME: NOT IMPLEMENTED"
488
489 #endif /* !FUNCMALLOC_FROM_ALLOCA */
490
491         g_assert(captive_validate_UnicodeString(r));
492
493         return r;
494 }
495
496
497 /* map: (const gunichar *) -> (const gunichar2 *); UCS-4 -> UTF-16 */
498 static GHashTable *captive_ucs4_to_utf16_hash;
499
500 static void captive_ucs4_to_utf16_hash_init(void)
501 {
502         if (captive_ucs4_to_utf16_hash)
503                 return;
504         captive_ucs4_to_utf16_hash=g_hash_table_new_full(
505                         g_direct_hash,  /* hash_func */
506                         g_direct_equal, /* key_equal_func */
507                         (GDestroyNotify)NULL,   /* key_destroy_func; we require persistent strings as input */
508                         (GDestroyNotify)g_free);        /* value_destroy_func; result of g_ucs4_to_utf16() */
509 }
510
511 /**
512  * captive_ucs4_to_utf16_const:
513  * @string_ucs4: #const #gunichar * type of persistent string to convert.
514  * This string MUST remain readable with the same content forever.
515  *
516  * Constant string conversion from 32-bit #wchar_t to 16-bit (possible pairs of) UTF-16.
517  * You may not modify the result in any way.
518  * 
519  * It is guaranteed to get two different string addresses for two different
520  * input addresses even if the input strings content is the same.
521  * Otherwise we would behave as #GCC option %-fmerge-constants which
522  * results in %C non-conforming behaviour.
523  *
524  * FIXME: UTF-16 encoding IS NOT IMPLEMENTED.
525  *
526  * See also captive_ucs4_to_ucs2_const().
527  *
528  * Returns: #const #gunichar2 * converted string @string_ucs4.
529  */
530 const gunichar2 *captive_ucs4_to_utf16_const(const gunichar *string_ucs4)
531 {
532 glong ucs4_read,utf16_written;
533 GError *err;
534 const gunichar2 *r_lookup;
535 gunichar2 *r;
536
537         g_return_val_if_fail(captive_validate_ucs4(string_ucs4),captive_ucs4_to_utf16_const((const gunichar *)L""));
538
539         captive_ucs4_to_utf16_hash_init();
540
541         /* found already existing item in the table */
542         if ((r_lookup=g_hash_table_lookup(captive_ucs4_to_utf16_hash,
543                         string_ucs4)    /* key */
544                         )) {
545                 return r_lookup;
546                 }
547
548         /* Prepare 'r' as UTF-16 */
549         err=NULL;       /* not precleared by g_ucs4_to_utf16()! */
550         r=g_ucs4_to_utf16(
551                         (const gunichar *)string_ucs4,  /* str */
552                         -1,     /* len; -1 means '\0'-termination */
553                         &ucs4_read,     /* items_read; counted in chars (==unichars; NOT bytes!) */
554                         &utf16_written, /* items_written; counted in gunichar2 (NOT chars or bytes!) */
555                         &err);
556         if (err) {
557                 g_warning("%s: ucs4_read=%ld,utf16_written=%ld: %s",G_STRLOC,
558                                 (long)ucs4_read,(long)utf16_written,err->message);
559                 g_error_free(err);
560                 g_assert(r==NULL);
561                 g_return_val_if_reached(captive_ucs4_to_utf16_const((const gunichar *)L""));
562                 }
563         g_assert(r!=NULL);
564         g_assert(ucs4_read==(glong)wcslen((const wchar_t *)string_ucs4));
565         /* FIXME: We don't have captive_utf16_strlen() */
566         g_assert(utf16_written==(glong)captive_ucs2_strlen((const gunichar2 *)r));
567         /* (ucs4_read==utf16_written) check would discard any double-pair UTF-16 encodings
568          * but this function is designed as UTF-16 compliant.
569          */
570
571         /* store new item to the table */
572         g_hash_table_insert(captive_ucs4_to_utf16_hash,
573                         (gpointer)string_ucs4,  /* key; de-const */
574                         r);     /* value */
575
576 #if 0   /* We don't have captive_validate_utf16() */
577         g_assert(captive_validate_utf16(r));
578 #endif
579
580         return r;
581 }