Initial original import from: fuse-2.4.2-2.fc4
[captive.git] / src / libcaptive / rtl / unicode.c
1 /* $Id$
2  * Unicode add-ons to reactos ntoskrnl/rtl/unicode.c for libcaptive
3  * Copyright (C) 2002 Jan Kratochvil <project-captive@jankratochvil.net>
4  * 
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; exactly version 2 of June 1991 is required
8  * 
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  * 
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17  */
18
19
20 #include "config.h"
21
22 #include "captive/unicode.h"    /* self */
23 #include "captive/unicode_reactos.h"    /* for captive_ucs2 */
24 #include <glib/gtypes.h>
25 #include <glib/gmessages.h>
26 #include <glib/gunicode.h>
27 #include <glib/gmem.h>
28 #include "reactos/napi/types.h"  /* for PUNICODE_STRING etc. */
29 #include "reactos/unicode.h"
30 #include "captive/macros.h"
31 #include <glib/gstrfuncs.h>
32 #include <wchar.h>      /* for wcslen() */
33 #include <glib/ghash.h>
34 #include <string.h>
35
36
37 /* CONFIG: */
38 /* Use simplified g_malloc() functions as wrappers around g_alloca() ones.
39  */
40 #define FUNCMALLOC_FROM_ALLOCA 1
41
42
43 /* compiler sanity */
44 static gboolean captive_validate_unicode_types(void)
45 {
46         g_return_val_if_fail(4==sizeof(gunichar),FALSE);
47         g_return_val_if_fail(2==sizeof(WCHAR),FALSE);
48         g_return_val_if_fail(1==sizeof(CHAR),FALSE);
49
50         return TRUE;
51 }
52
53
54 /**
55  * captive_validate_ucs4:
56  * @string_ucs4: #const #gunichar * type string to validate.
57  * Invalid string input is forbidden.
58  *
59  * Checks the validity of all 32-bit unicharacters of 0-terminated string.
60  * It is required to have characters complying to g_unichar_validate().
61  *
62  * Returns: %TRUE if the string is valid.
63  */ 
64 gboolean captive_validate_ucs4(const gunichar *string_ucs4)
65 {
66 const gunichar *cs_ucs4;
67
68         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
69         g_return_val_if_fail(string_ucs4!=NULL,FALSE);
70
71         for (cs_ucs4=string_ucs4;*cs_ucs4;cs_ucs4++)
72                 g_return_val_if_fail(g_unichar_validate(*cs_ucs4),FALSE);
73
74         return TRUE;
75 }
76
77
78 /**
79  * captive_validate_ucs2_fixlen:
80  * @string_ucs2: #const #captive_ucs2 * type string to validate.
81  * Invalid string input is forbidden.
82  * UTF-16 encoded strings are forbidden.
83  * @string_ucs2_fixlen: Number of characters from @string_ucs2 to check.
84  * captive_ucs2_strlen(@string_ucs2)>=@string_ucs2_fixlen is required.
85  * Negative value is forbidden.
86  *
87  * Checks the validity of first @string_ucs2_fixlen 16-bit unicharacters of @string_ucs2.
88  * It is required to have characters complying to g_unichar_validate().
89  * String length must be equal or larger than @string_ucs2_fixlen;
90  *
91  * Returns: %TRUE if the string is valid.
92  */ 
93 gboolean captive_validate_ucs2_fixlen(const captive_ucs2 *string_ucs2,glong string_ucs2_fixlen)
94 {
95 const captive_ucs2 *cs_ucs2;
96
97         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
98         g_return_val_if_fail(string_ucs2!=NULL,FALSE);
99         g_return_val_if_fail(string_ucs2_fixlen>=0,FALSE);
100
101         /* g_unichar_validate() will reject surrogates (G_UNICODE_SURROGATE) */
102         for (cs_ucs2=string_ucs2;cs_ucs2<string_ucs2+string_ucs2_fixlen;cs_ucs2++) {
103                 g_return_val_if_fail(*cs_ucs2!=0,FALSE);
104                 g_return_val_if_fail(g_unichar_validate(*cs_ucs2),FALSE);
105                 }
106
107         return TRUE;
108 }
109
110
111 /**
112  * captive_validate_ucs2:
113  * @string_ucs2: #const #captive_ucs2 * type string to validate.
114  * Invalid string input is forbidden.
115  * UTF-16 encoded strings are forbidden.
116  *
117  * Checks the validity of all 16-bit unicharacters of 0-terminated string.
118  * It is required to have characters complying to g_unichar_validate().
119  *
120  * Returns: %TRUE if the string is valid.
121  */ 
122 gboolean captive_validate_ucs2(const captive_ucs2 *string_ucs2)
123 {
124         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
125         g_return_val_if_fail(string_ucs2!=NULL,FALSE);
126
127         return captive_validate_ucs2_fixlen(string_ucs2,captive_ucs2_strlen(string_ucs2));
128 }
129
130
131 /**
132  * captive_validate_utf8:
133  * @string_utf8: #const #gchar * utf8 type string to validate.
134  * Invalid string input is forbidden.
135  *
136  * Checks the validity of all utf8 of 0-terminated string.
137  * It is required to have characters complying to g_utf8_validate().
138  *
139  * Returns: %TRUE if the string is valid.
140  */ 
141 gboolean captive_validate_utf8(const gchar *string_utf8)
142 {
143         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
144         g_return_val_if_fail(string_utf8!=NULL,FALSE);
145
146         g_return_val_if_fail(g_utf8_validate(
147                                         string_utf8,    /* str */
148                                         -1,     /* max_len; -1 means '\0'-terminated */
149                                         NULL),  /* end */
150                         FALSE);
151
152         return TRUE;
153 }
154
155
156 /**
157  * captive_ucs2_strlen:
158  * @string_ucs2: String of type #const #gunichar2 * in pure UCS-2
159  * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
160  *
161  * Counts the number of characters (=2bytes) in @strings_ucs2.
162  *
163  * Returns: @string_ucs2 length in UCS-2 characters.
164  */
165 glong captive_ucs2_strlen(const captive_ucs2 *string_ucs2)
166 {
167 glong r;
168
169         /* Do not call captive_validate_ucs2(string_ucs2) as we would be looping! */
170         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
171         g_return_val_if_fail(string_ucs2!=NULL,FALSE);
172
173         for (r=0;*string_ucs2;string_ucs2++)
174                 r++;
175
176         return r;
177 }
178
179
180 /**
181  * captive_validate_UnicodeString:
182  * @string_UnicodeString: #PUNICODE_STRING type string to validate.
183  * Invalid string input is forbidden.
184  *
185  * Checks the internal consistency of the given @string_UnicodeString.
186  * It is required to have characters complying to g_unichar_validate().
187  * @string_UnicodeString MUST be zero-terminated.
188  *
189  * Returns: %TRUE if the string is valid.
190  */
191 gboolean captive_validate_UnicodeString(const UNICODE_STRING *string_UnicodeString)
192 {
193         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
194         g_return_val_if_fail(sizeof(WCHAR)==sizeof(*string_UnicodeString->Buffer),FALSE);
195         g_return_val_if_fail(string_UnicodeString!=NULL,FALSE);
196         g_return_val_if_fail(string_UnicodeString->Length%sizeof(*string_UnicodeString->Buffer)==0,FALSE);
197         g_return_val_if_fail(string_UnicodeString->MaximumLength
198                         >=string_UnicodeString->Length+sizeof(*string_UnicodeString->Buffer),FALSE);
199         g_return_val_if_fail(string_UnicodeString->Length==sizeof(*string_UnicodeString->Buffer)*
200                         captive_ucs2_strlen(string_UnicodeString->Buffer)
201                         ,FALSE);
202
203         g_return_val_if_fail(captive_validate_ucs2(string_UnicodeString->Buffer),FALSE);
204
205         return TRUE;
206 }
207
208
209 /**
210  * captive_validate_UnicodeString_noterm:
211  * @string_UnicodeString_noterm: #PUNICODE_STRING type string to validate.
212  * Invalid string input is forbidden.
213  *
214  * Checks the internal consistency of the given @string_UnicodeString.
215  * It is required to have characters complying to g_unichar_validate().
216  * @string_UnicodeString_noterm does not neet to be zero-terminated.
217  *
218  * Returns: %TRUE if the string is valid.
219  */
220 gboolean captive_validate_UnicodeString_noterm(const UNICODE_STRING *string_UnicodeString_noterm)
221 {
222 const WCHAR *cwp;
223
224         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
225         g_return_val_if_fail(sizeof(WCHAR)==sizeof(*string_UnicodeString_noterm->Buffer),FALSE);
226         g_return_val_if_fail(string_UnicodeString_noterm!=NULL,FALSE);
227         g_return_val_if_fail(string_UnicodeString_noterm->Length%sizeof(*string_UnicodeString_noterm->Buffer)==0,FALSE);
228         g_return_val_if_fail(string_UnicodeString_noterm->MaximumLength>=string_UnicodeString_noterm->Length,FALSE);
229
230         for (
231                         cwp=string_UnicodeString_noterm->Buffer;
232                         cwp<string_UnicodeString_noterm->Buffer
233                                         +(string_UnicodeString_noterm->Length/sizeof(*string_UnicodeString_noterm->Buffer));
234                         cwp++)
235                 g_return_val_if_fail(*cwp!=0,FALSE);
236
237         g_return_val_if_fail(captive_validate_ucs2_fixlen(string_UnicodeString_noterm->Buffer,
238                                         string_UnicodeString_noterm->Length/sizeof(*string_UnicodeString_noterm->Buffer)),
239                         FALSE);
240
241         return TRUE;
242 }
243
244
245 /**
246  * captive_validate_AnsiString:
247  * @string_AnsiString: #PANSI_STRING type string to validate.
248  * Invalid string input is forbidden.
249  *
250  * Checks the internal consistency of the given @string_AnsiString.
251  *
252  * Returns: %TRUE if the string is valid.
253  */
254 gboolean captive_validate_AnsiString(const ANSI_STRING *string_AnsiString)
255 {
256         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
257         g_return_val_if_fail(sizeof(CHAR)==sizeof(*string_AnsiString->Buffer),FALSE);
258         g_return_val_if_fail(string_AnsiString!=NULL,FALSE);
259         g_return_val_if_fail(string_AnsiString->MaximumLength>=string_AnsiString->Length+1,FALSE);
260         g_return_val_if_fail(string_AnsiString->Length==strlen(string_AnsiString->Buffer),FALSE);
261
262         return TRUE;
263 }
264
265
266 /**
267  * captive_ucs2_compare:
268  * @string_a_ucs2: First string of type #const #gunichar2 * in pure UCS-2.
269  * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
270  * @string_b_ucs2: Second string of type #const #gunichar2 * in pure UCS-2.
271  * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
272  *
273  * Compares case-sensitively @string_a_ucs2 and @string_b_ucs2.
274  *
275  * Returns: %TRUE if @string_a_ucs2 and @string_b_ucs2 are the same.
276  */
277 gboolean captive_ucs2_compare(const captive_ucs2 *string_a_ucs2,const captive_ucs2 *string_b_ucs2)
278 {
279 guint ui;
280
281         g_return_val_if_fail(captive_validate_ucs2(string_a_ucs2),FALSE);
282         g_return_val_if_fail(captive_validate_ucs2(string_b_ucs2),FALSE);
283
284         ui=0;
285         do {
286                 if (string_a_ucs2[ui]!=string_b_ucs2[ui])
287                         return FALSE;
288                 } while (string_a_ucs2[ui++]);
289         return TRUE;
290 }
291
292
293 /**
294  * captive_UnicodeString_compare:
295  * @string_a_UnicodeString: First string of type #PUNICODE_STRING.
296  * Invalid string input is forbidden.
297  * @string_b_UnicodeString: Second string of type #PUNICODE_STRING.
298  * Invalid string input is forbidden.
299  *
300  * Compares case-sensitively @string_a_UnicodeString and @string_b_UnicodeString.
301  *
302  * Returns: %TRUE if @string_a_UnicodeString and @string_b_UnicodeString are the same.
303  */
304 gboolean captive_UnicodeString_compare
305                 (const UNICODE_STRING *string_a_UnicodeString,const UNICODE_STRING *string_b_UnicodeString)
306 {
307         g_return_val_if_fail(captive_validate_UnicodeString(string_a_UnicodeString),FALSE);
308         g_return_val_if_fail(captive_validate_UnicodeString(string_b_UnicodeString),FALSE);
309
310         if (string_a_UnicodeString->Length!=string_b_UnicodeString->Length)
311                 return FALSE;
312         return captive_ucs2_compare(string_a_UnicodeString->Buffer,string_b_UnicodeString->Buffer);
313 }
314
315
316 /**
317  * captive_ucs2_compare_insensitive:
318  * @string_a_ucs2: First string of type #const #gunichar2 * in pure UCS-2.
319  * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
320  * @string_b_ucs2: Second string of type #const #gunichar2 * in pure UCS-2.
321  * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
322  *
323  * Compares case-insensitively @string_a_ucs2 and @string_b_ucs2.
324  *
325  * Returns: %TRUE if @string_a_ucs2 and @string_b_ucs2 are the same.
326  */
327 gboolean captive_ucs2_compare_insensitive(const captive_ucs2 *string_a_ucs2,const captive_ucs2 *string_b_ucs2)
328 {
329 guint ui;
330
331         g_return_val_if_fail(captive_validate_ucs2(string_a_ucs2),FALSE);
332         g_return_val_if_fail(captive_validate_ucs2(string_b_ucs2),FALSE);
333
334         ui=0;
335         do {
336                 if (g_unichar_toupper(string_a_ucs2[ui])!=g_unichar_toupper(string_b_ucs2[ui]))
337                         return FALSE;
338                 } while (string_a_ucs2[ui++]);
339         return TRUE;
340 }
341
342
343 /**
344  * captive_UnicodeString_compare_insensitive:
345  * @string_a_UnicodeString: First string of type #PUNICODE_STRING.
346  * Invalid string input is forbidden.
347  * @string_b_UnicodeString: Second string of type #PUNICODE_STRING.
348  * Invalid string input is forbidden.
349  *
350  * Compares case-insensitively @string_a_UnicodeString and @string_b_UnicodeString.
351  *
352  * Returns: %TRUE if @string_a_UnicodeString and @string_b_UnicodeString are the same.
353  */
354 gboolean captive_UnicodeString_compare_insensitive
355                 (const UNICODE_STRING *string_a_UnicodeString,const UNICODE_STRING *string_b_UnicodeString)
356 {
357         g_return_val_if_fail(captive_validate_UnicodeString(string_a_UnicodeString),FALSE);
358         g_return_val_if_fail(captive_validate_UnicodeString(string_b_UnicodeString),FALSE);
359
360         if (string_a_UnicodeString->Length!=string_b_UnicodeString->Length)
361                 return FALSE;
362         return captive_ucs2_compare_insensitive(string_a_UnicodeString->Buffer,string_b_UnicodeString->Buffer);
363 }
364
365
366 /* detect required memory size for g_alloca() */
367 size_t _captive_UnicodeString_to_utf8_alloca_internal_sizeof(const UNICODE_STRING *string_UnicodeString)
368 {
369 glong length;
370 size_t r;
371 const WCHAR *cwcharp;
372
373         g_return_val_if_fail(captive_validate_UnicodeString(string_UnicodeString),1);
374
375         /* measure 'string_UnicodeString->Buffer' length in UTF-8 to 'r' */
376         cwcharp=string_UnicodeString->Buffer;
377         r=0;
378         for (length=string_UnicodeString->Length/sizeof(*string_UnicodeString->Buffer);length;length--) {
379 gint utf8len;
380
381                 utf8len=g_unichar_to_utf8(
382                                 *cwcharp++,     /* c */
383                                 NULL);  /* outbuf=NULL => just the length will be computed */
384                 g_assert(utf8len>=0);
385                 r+=utf8len;
386                 }
387         g_assert(*cwcharp==0);
388         r++;    /* '\0'-termination */
389
390         /* utf8 byte-size */
391         return r;
392 }
393
394 /* transfer 'string_UnicodeString' to memory in 'mem' as utf8 w/o any further allocations */
395 void _captive_UnicodeString_to_utf8_alloca_internal_fill(gchar *mem,const UNICODE_STRING *string_UnicodeString)
396 {
397 const WCHAR *cwcharp;
398 #ifndef G_DISABLE_ASSERT
399 gchar *mem_orig=mem;
400 #endif /* G_DISABLE_ASSERT */
401
402         g_return_if_fail(mem!=NULL);
403         if (!captive_validate_UnicodeString(string_UnicodeString)) {
404                 *mem='\0';
405                 g_return_if_reached();
406                 }
407
408         /* We can't use any glib string conversions as UNICODE_STRING uses ucs2! */
409         /* We can't use any glib string conversions as we need to write the string
410          * to our supplied memory storage but glib always g_malloc()s it
411          */
412         /* copy 'string_UnicodeString->Buffer' to 'mem' */
413         for (cwcharp=string_UnicodeString->Buffer;*cwcharp;cwcharp++) {
414 gint utf8len;
415
416                 utf8len=g_unichar_to_utf8(
417                                 (gunichar)*cwcharp,     /* c */
418                                 mem);   /* outbuf */
419                 g_assert(utf8len>=0);
420                 mem+=utf8len;
421                 }
422         *mem='\0';
423
424         g_assert((size_t)((mem+1)-mem_orig) == _captive_UnicodeString_to_utf8_alloca_internal_sizeof(string_UnicodeString));
425         g_assert(captive_validate_utf8(mem_orig));
426 }
427
428
429 /**
430  * captive_UnicodeString_to_utf8_malloc:
431  * @string_UnicodeString: #PUNICODE_STRING type of string to convert.
432  *
433  * g_malloc()-based conversion from #PUNICODE_STRING to plain #utf8 string.
434  * You must free the result with g_free() function.
435  *
436  * Returns: #const #gchar * g_malloc()ed converted string @string_UnicodeString.
437  */
438 gchar *captive_UnicodeString_to_utf8_malloc(const UNICODE_STRING *string_UnicodeString)
439 {
440 gchar *r;
441 #ifndef FUNCMALLOC_FROM_ALLOCA
442 glong utf16_read,utf8_written;
443 GError *err;
444 #endif /* !FUNCMALLOC_FROM_ALLOCA */
445
446         g_return_val_if_fail(captive_validate_UnicodeString(string_UnicodeString),g_strdup(""));
447
448 #ifdef FUNCMALLOC_FROM_ALLOCA
449
450         r=g_malloc(_captive_UnicodeString_to_utf8_alloca_internal_sizeof(string_UnicodeString));
451         _captive_UnicodeString_to_utf8_alloca_internal_fill(r,string_UnicodeString);
452
453 #else
454
455         err=NULL;       /* not precleared by g_utf8_to_utf16()! */
456         r=g_utf16_to_utf8(
457                         (const gunichar2 *)string_UnicodeString->Buffer,        /* str */
458                         -1,     /* len=>'\0'-terminated */
459                         &utf16_read,    /* items_read; counted in unichar2 (NOT UTF-16 characters or bytes!) */
460                         &utf8_written,  /* items_written; counted in bytes (NOT UTF-8 characters!) */
461                         &err);
462         if (err) {
463                 g_warning("%s: utf16_read=%ld,utf8_written=%ld: %s",G_STRLOC,
464                                 (long)utf16_read,(long)utf8_written,err->message);
465                 g_error_free(err);
466                 g_assert(r==NULL);
467                 g_return_val_if_reached(g_strdup(""));
468                 }
469         g_assert(r!=NULL);
470
471         g_assert(utf16_read==(glong)(string_UnicodeString->length/sizeof(*string_UnicodeString->Buffer)));
472         g_assert(utf6_written==strlen(r));
473
474 #endif /* !FUNCMALLOC_FROM_ALLOCA */
475
476         g_assert(captive_validate_utf8(r));
477
478         return r;
479 }
480
481
482 /* detect required memory size for g_alloca() */
483 size_t _captive_utf8_to_UnicodeString_alloca_internal_sizeof(const gchar *string_utf8)
484 {
485         g_return_val_if_fail(captive_validate_utf8(string_utf8),1);
486
487         /* find the value for PUNICODE_STRING->MaximumLength */
488         return 0
489                         +sizeof(UNICODE_STRING)
490                         +sizeof(WCHAR)*(g_utf8_strlen(string_utf8,
491                                         -1      /* max; -1 means '\0'-terminated */
492                                         )+1);   /* '\0'-termination */
493 }
494
495 static void terminate_static_UnicodeString(UNICODE_STRING *string_UnicodeString,glong length)
496 {
497         /* 'string_UnicodeString' is not yet valid in this point! */
498         g_return_if_fail(string_UnicodeString!=NULL);
499         g_return_if_fail(length>=0);
500
501         string_UnicodeString->Length=length*sizeof(WCHAR);
502         string_UnicodeString->MaximumLength=(length+1)*sizeof(WCHAR);
503         string_UnicodeString->Buffer[length]=0;
504
505         g_assert(captive_validate_UnicodeString(string_UnicodeString));
506 }
507
508 /* transfer 'string_UnicodeString' to memory in 'mem' w/o any further allocations */
509 void _captive_utf8_to_UnicodeString_alloca_internal_fill(UNICODE_STRING *mem,const gchar *string_utf8)
510 {
511 gunichar2 *utf16;
512 captive_ucs2 *ucs2;
513 glong utf8_read,utf16_written;
514 GError *err;
515
516         g_return_if_fail(mem!=NULL);
517         mem->Buffer=(PWSTR)(((char *)mem)+sizeof(*mem));        /* for terminate_static_UnicodeString() below */
518         if (!captive_validate_utf8(string_utf8)) {
519                 terminate_static_UnicodeString(mem,0);
520                 g_return_if_reached();
521                 }
522
523         err=NULL;       /* not precleared by g_utf8_to_utf16()! */
524         utf16=g_utf8_to_utf16(
525                         string_utf8,    /* str */
526                         -1,     /* len=>'\0'-terminated */
527                         &utf8_read,     /* items_read; counted in bytes (NOT chars!) */
528                         &utf16_written, /* items_written; counted in UTF-16 characters (NOT unichar2 or bytes!) */
529                         &err);
530         if (err) {
531                 g_warning("%s: utf8_read=%ld,utf16_written=%ld: %s",G_STRLOC,
532                                 (long)utf8_read,(long)utf16_written,err->message);
533                 g_error_free(err);
534                 g_assert(utf16==NULL);
535                 terminate_static_UnicodeString(mem,0);
536                 g_return_if_reached();
537                 }
538         g_assert(utf16!=NULL);
539
540         /* Check for UCS-2 compliance (reject if surrogates inside) */
541         g_assert(captive_validate_ucs2((const captive_ucs2 *)utf16));
542         /* valid UCS-2 */
543         ucs2=(captive_ucs2 *)utf16;
544
545         g_assert(utf8_read==(glong)strlen(string_utf8));
546         g_assert(utf16_written==captive_ucs2_strlen(ucs2));
547
548         /* check of validity of _captive_utf8_to_UnicodeString_alloca_internal_sizeof() result */
549         g_assert((gchar *)(mem->Buffer+(utf16_written+1))       /* +1 => '\0'-termination */
550                         == ((gchar *)mem)+_captive_utf8_to_UnicodeString_alloca_internal_sizeof(string_utf8));
551
552         memcpy(mem->Buffer,ucs2,sizeof(WCHAR)*(utf16_written+1));
553         g_free(ucs2);
554         terminate_static_UnicodeString(mem,utf16_written);
555
556         g_assert(captive_validate_UnicodeString(mem));
557 }
558
559
560 /**
561  * captive_utf8_to_UnicodeString_malloc:
562  * @string_utf8: #const #gchar * string in #utf8 to convert.
563  *
564  * g_malloc()-based conversion from plain #utf8 string to #PUNICODE_STRING.
565  * You must free the result with g_free() function.
566  *
567  * Returns: #PUNICODE_STRING g_malloc()ed converted string @string_utf8.
568  */
569 PUNICODE_STRING captive_utf8_to_UnicodeString_malloc(const gchar *string_utf8)
570 {
571 UNICODE_STRING *r;
572 #ifndef FUNCMALLOC_FROM_ALLOCA
573 gunichar *ucs4;
574 glong utf8_read,ucs4_written;
575 GError *err;
576 #endif /* !FUNCMALLOC_FROM_ALLOCA */
577
578         g_return_val_if_fail(captive_validate_utf8(string_utf8),captive_utf8_to_UnicodeString_malloc(""));
579
580 #ifdef FUNCMALLOC_FROM_ALLOCA
581
582         r=g_malloc(_captive_utf8_to_UnicodeString_alloca_internal_sizeof(string_utf8));
583         _captive_utf8_to_UnicodeString_alloca_internal_fill(r,string_utf8);
584
585 #else
586
587 #error "FIXME: NOT IMPLEMENTED"
588
589 #endif /* !FUNCMALLOC_FROM_ALLOCA */
590
591         g_assert(captive_validate_UnicodeString(r));
592
593         return r;
594 }
595
596
597 /* map: (const gunichar *) -> (const gunichar2 *); UCS-4 -> UTF-16 */
598 static GHashTable *captive_ucs4_to_utf16_hash;
599
600 static void captive_ucs4_to_utf16_hash_init(void)
601 {
602         if (captive_ucs4_to_utf16_hash)
603                 return;
604         captive_ucs4_to_utf16_hash=g_hash_table_new_full(
605                         g_direct_hash,  /* hash_func */
606                         g_direct_equal, /* key_equal_func */
607                         (GDestroyNotify)NULL,   /* key_destroy_func; we require persistent strings as input */
608                         (GDestroyNotify)g_free);        /* value_destroy_func; result of g_ucs4_to_utf16() */
609 }
610
611 /**
612  * captive_ucs4_to_utf16_const:
613  * @string_ucs4: #const #gunichar * type of persistent string to convert.
614  * This string MUST remain readable with the same content forever.
615  *
616  * Constant string conversion from 32-bit #wchar_t to 16-bit (possible pairs of) UTF-16.
617  * You may not modify the result in any way.
618  * 
619  * It is guaranteed to get two different string addresses for two different
620  * input addresses even if the input strings content is the same.
621  * Otherwise we would behave as #GCC option %-fmerge-constants which
622  * results in %C non-conforming behaviour.
623  *
624  * FIXME: UTF-16 encoding IS NOT IMPLEMENTED.
625  *
626  * See also captive_ucs4_to_ucs2_const().
627  *
628  * Returns: #const #gunichar2 * converted string @string_ucs4.
629  */
630 const gunichar2 *captive_ucs4_to_utf16_const(const gunichar *string_ucs4)
631 {
632 glong ucs4_read,utf16_written;
633 GError *err;
634 const gunichar2 *r_lookup;
635 gunichar2 *r;
636
637         g_return_val_if_fail(captive_validate_ucs4(string_ucs4),captive_ucs4_to_utf16_const((const gunichar *)L""));
638
639         captive_ucs4_to_utf16_hash_init();
640
641         /* found already existing item in the table */
642         if ((r_lookup=g_hash_table_lookup(captive_ucs4_to_utf16_hash,
643                         string_ucs4)    /* key */
644                         )) {
645                 return r_lookup;
646                 }
647
648         /* Prepare 'r' as UTF-16 */
649         err=NULL;       /* not precleared by g_ucs4_to_utf16()! */
650         r=g_ucs4_to_utf16(
651                         (const gunichar *)string_ucs4,  /* str */
652                         -1,     /* len; -1 means '\0'-termination */
653                         &ucs4_read,     /* items_read; counted in chars (==unichars; NOT bytes!) */
654                         &utf16_written, /* items_written; counted in gunichar2 (NOT chars or bytes!) */
655                         &err);
656         if (err) {
657                 g_warning("%s: ucs4_read=%ld,utf16_written=%ld: %s",G_STRLOC,
658                                 (long)ucs4_read,(long)utf16_written,err->message);
659                 g_error_free(err);
660                 g_assert(r==NULL);
661                 g_return_val_if_reached(captive_ucs4_to_utf16_const((const gunichar *)L""));
662                 }
663         g_assert(r!=NULL);
664         g_assert(ucs4_read==(glong)wcslen((const wchar_t *)string_ucs4));
665         /* FIXME: We don't have captive_utf16_strlen() */
666         g_assert(utf16_written==(glong)captive_ucs2_strlen((const gunichar2 *)r));
667         /* (ucs4_read==utf16_written) check would discard any double-pair UTF-16 encodings
668          * but this function is designed as UTF-16 compliant.
669          */
670
671         /* store new item to the table */
672         g_hash_table_insert(captive_ucs4_to_utf16_hash,
673                         (gpointer)string_ucs4,  /* key; de-const */
674                         r);     /* value */
675
676 #if 0   /* We don't have captive_validate_utf16() */
677         g_assert(captive_validate_utf16(r));
678 #endif
679
680         return r;
681 }