src/libcaptive/rtl/unicode.c

   1 /* $Id$
   2  * Unicode add-ons to reactos ntoskrnl/rtl/unicode.c for libcaptive
   3  * Copyright (C) 2002 Jan Kratochvil <project-captive@jankratochvil.net>
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; exactly version 2 of June 1991 is required
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  17  */
  18
  19
  20 #include "config.h"
  21
  22 #include "captive/unicode.h"    /* self */
  23 #include "captive/unicode_reactos.h"    /* for captive_ucs2 */
  24 #include <glib/gtypes.h>
  25 #include <glib/gmessages.h>
  26 #include <glib/gunicode.h>
  27 #include <glib/gmem.h>
  28 #include "reactos/napi/types.h"  /* for PUNICODE_STRING etc. */
  29 #include "reactos/unicode.h"
  30 #include "captive/macros.h"
  31 #include <glib/gstrfuncs.h>
  32 #include <wchar.h>      /* for wcslen() */
  33 #include <glib/ghash.h>
  34 #include <string.h>
  35
  36
  37 /* CONFIG: */
  38 /* Use simplified g_malloc() functions as wrappers around g_alloca() ones.
  39  */
  40 #define FUNCMALLOC_FROM_ALLOCA 1
  41
  42
  43 /* compiler sanity */
  44 static gboolean captive_validate_unicode_types(void)
  45 {
  46         g_return_val_if_fail(4==sizeof(gunichar),FALSE);
  47         g_return_val_if_fail(2==sizeof(WCHAR),FALSE);
  48         g_return_val_if_fail(1==sizeof(CHAR),FALSE);
  49
  50         return TRUE;
  51 }
  52
  53
  54 /**
  55  * captive_validate_ucs4:
  56  * @string_ucs4: #const #gunichar * type string to validate.
  57  * Invalid string input is forbidden.
  58  *
  59  * Checks the validity of all 32-bit unicharacters of 0-terminated string.
  60  * It is required to have characters complying to g_unichar_validate().
  61  *
  62  * Returns: %TRUE if the string is valid.
  63  */
  64 gboolean captive_validate_ucs4(const gunichar *string_ucs4)
  65 {
  66 const gunichar *cs_ucs4;
  67
  68         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
  69         g_return_val_if_fail(string_ucs4!=NULL,FALSE);
  70
  71         for (cs_ucs4=string_ucs4;*cs_ucs4;cs_ucs4++)
  72                 g_return_val_if_fail(g_unichar_validate(*cs_ucs4),FALSE);
  73
  74         return TRUE;
  75 }
  76
  77
  78 /**
  79  * captive_validate_ucs2_fixlen:
  80  * @string_ucs2: #const #captive_ucs2 * type string to validate.
  81  * Invalid string input is forbidden.
  82  * UTF-16 encoded strings are forbidden.
  83  * @string_ucs2_fixlen: Number of characters from @string_ucs2 to check.
  84  * captive_ucs2_strlen(@string_ucs2)>=@string_ucs2_fixlen is required.
  85  * Negative value is forbidden.
  86  *
  87  * Checks the validity of first @string_ucs2_fixlen 16-bit unicharacters of @string_ucs2.
  88  * It is required to have characters complying to g_unichar_validate().
  89  * String length must be equal or larger than @string_ucs2_fixlen;
  90  *
  91  * Returns: %TRUE if the string is valid.
  92  */
  93 gboolean captive_validate_ucs2_fixlen(const captive_ucs2 *string_ucs2,glong string_ucs2_fixlen)
  94 {
  95 const captive_ucs2 *cs_ucs2;
  96
  97         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
  98         g_return_val_if_fail(string_ucs2!=NULL,FALSE);
  99         g_return_val_if_fail(string_ucs2_fixlen>=0,FALSE);
 100
 101         /* g_unichar_validate() will reject surrogates (G_UNICODE_SURROGATE) */
 102         for (cs_ucs2=string_ucs2;cs_ucs2<string_ucs2+string_ucs2_fixlen;cs_ucs2++) {
 103                 g_return_val_if_fail(*cs_ucs2!=0,FALSE);
 104                 g_return_val_if_fail(g_unichar_validate(*cs_ucs2),FALSE);
 105                 }
 106
 107         return TRUE;
 108 }
 109
 110
 111 /**
 112  * captive_validate_ucs2:
 113  * @string_ucs2: #const #captive_ucs2 * type string to validate.
 114  * Invalid string input is forbidden.
 115  * UTF-16 encoded strings are forbidden.
 116  *
 117  * Checks the validity of all 16-bit unicharacters of 0-terminated string.
 118  * It is required to have characters complying to g_unichar_validate().
 119  *
 120  * Returns: %TRUE if the string is valid.
 121  */
 122 gboolean captive_validate_ucs2(const captive_ucs2 *string_ucs2)
 123 {
 124         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
 125         g_return_val_if_fail(string_ucs2!=NULL,FALSE);
 126
 127         return captive_validate_ucs2_fixlen(string_ucs2,captive_ucs2_strlen(string_ucs2));
 128 }
 129
 130
 131 /**
 132  * captive_validate_utf8:
 133  * @string_utf8: #const #gchar * utf8 type string to validate.
 134  * Invalid string input is forbidden.
 135  *
 136  * Checks the validity of all utf8 of 0-terminated string.
 137  * It is required to have characters complying to g_utf8_validate().
 138  *
 139  * Returns: %TRUE if the string is valid.
 140  */
 141 gboolean captive_validate_utf8(const gchar *string_utf8)
 142 {
 143         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
 144         g_return_val_if_fail(string_utf8!=NULL,FALSE);
 145
 146         g_return_val_if_fail(g_utf8_validate(
 147                                         string_utf8,    /* str */
 148                                         -1,     /* max_len; -1 means '\0'-terminated */
 149                                         NULL),  /* end */
 150                         FALSE);
 151
 152         return TRUE;
 153 }
 154
 155
 156 /**
 157  * captive_ucs2_strlen:
 158  * @string_ucs2: String of type #const #gunichar2 * in pure UCS-2
 159  * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
 160  *
 161  * Counts the number of characters (=2bytes) in @strings_ucs2.
 162  *
 163  * Returns: @string_ucs2 length in UCS-2 characters.
 164  */
 165 glong captive_ucs2_strlen(const captive_ucs2 *string_ucs2)
 166 {
 167 glong r;
 168
 169         /* Do not call captive_validate_ucs2(string_ucs2) as we would be looping! */
 170         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
 171         g_return_val_if_fail(string_ucs2!=NULL,FALSE);
 172
 173         for (r=0;*string_ucs2;string_ucs2++)
 174                 r++;
 175
 176         return r;
 177 }
 178
 179
 180 /**
 181  * captive_validate_UnicodeString:
 182  * @string_UnicodeString: #PUNICODE_STRING type string to validate.
 183  * Invalid string input is forbidden.
 184  *
 185  * Checks the internal consistency of the given @string_UnicodeString.
 186  * It is required to have characters complying to g_unichar_validate().
 187  * @string_UnicodeString MUST be zero-terminated.
 188  *
 189  * Returns: %TRUE if the string is valid.
 190  */
 191 gboolean captive_validate_UnicodeString(const UNICODE_STRING *string_UnicodeString)
 192 {
 193         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
 194         g_return_val_if_fail(sizeof(WCHAR)==sizeof(*string_UnicodeString->Buffer),FALSE);
 195         g_return_val_if_fail(string_UnicodeString!=NULL,FALSE);
 196         g_return_val_if_fail(string_UnicodeString->Length%sizeof(*string_UnicodeString->Buffer)==0,FALSE);
 197         g_return_val_if_fail(string_UnicodeString->MaximumLength
 198                         >=string_UnicodeString->Length+sizeof(*string_UnicodeString->Buffer),FALSE);
 199         g_return_val_if_fail(string_UnicodeString->Length==sizeof(*string_UnicodeString->Buffer)*
 200                         captive_ucs2_strlen(string_UnicodeString->Buffer)
 201                         ,FALSE);
 202
 203         g_return_val_if_fail(captive_validate_ucs2(string_UnicodeString->Buffer),FALSE);
 204
 205         return TRUE;
 206 }
 207
 208
 209 /**
 210  * captive_validate_UnicodeString_noterm:
 211  * @string_UnicodeString_noterm: #PUNICODE_STRING type string to validate.
 212  * Invalid string input is forbidden.
 213  *
 214  * Checks the internal consistency of the given @string_UnicodeString.
 215  * It is required to have characters complying to g_unichar_validate().
 216  * @string_UnicodeString_noterm does not neet to be zero-terminated.
 217  *
 218  * Returns: %TRUE if the string is valid.
 219  */
 220 gboolean captive_validate_UnicodeString_noterm(const UNICODE_STRING *string_UnicodeString_noterm)
 221 {
 222 const WCHAR *cwp;
 223
 224         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
 225         g_return_val_if_fail(sizeof(WCHAR)==sizeof(*string_UnicodeString_noterm->Buffer),FALSE);
 226         g_return_val_if_fail(string_UnicodeString_noterm!=NULL,FALSE);
 227         g_return_val_if_fail(string_UnicodeString_noterm->Length%sizeof(*string_UnicodeString_noterm->Buffer)==0,FALSE);
 228         g_return_val_if_fail(string_UnicodeString_noterm->MaximumLength>=string_UnicodeString_noterm->Length,FALSE);
 229
 230         for (
 231                         cwp=string_UnicodeString_noterm->Buffer;
 232                         cwp<string_UnicodeString_noterm->Buffer
 233                                         +(string_UnicodeString_noterm->Length/sizeof(*string_UnicodeString_noterm->Buffer));
 234                         cwp++)
 235                 g_return_val_if_fail(*cwp!=0,FALSE);
 236
 237         g_return_val_if_fail(captive_validate_ucs2_fixlen(string_UnicodeString_noterm->Buffer,
 238                                         string_UnicodeString_noterm->Length/sizeof(*string_UnicodeString_noterm->Buffer)),
 239                         FALSE);
 240
 241         return TRUE;
 242 }
 243
 244
 245 /**
 246  * captive_validate_AnsiString:
 247  * @string_AnsiString: #PANSI_STRING type string to validate.
 248  * Invalid string input is forbidden.
 249  *
 250  * Checks the internal consistency of the given @string_AnsiString.
 251  *
 252  * Returns: %TRUE if the string is valid.
 253  */
 254 gboolean captive_validate_AnsiString(const ANSI_STRING *string_AnsiString)
 255 {
 256         g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
 257         g_return_val_if_fail(sizeof(CHAR)==sizeof(*string_AnsiString->Buffer),FALSE);
 258         g_return_val_if_fail(string_AnsiString!=NULL,FALSE);
 259         g_return_val_if_fail(string_AnsiString->MaximumLength>=string_AnsiString->Length+1,FALSE);
 260         g_return_val_if_fail(string_AnsiString->Length==strlen(string_AnsiString->Buffer),FALSE);
 261
 262         return TRUE;
 263 }
 264
 265
 266 /**
 267  * captive_ucs2_compare:
 268  * @string_a_ucs2: First string of type #const #gunichar2 * in pure UCS-2.
 269  * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
 270  * @string_b_ucs2: Second string of type #const #gunichar2 * in pure UCS-2.
 271  * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
 272  *
 273  * Compares case-sensitively @string_a_ucs2 and @string_b_ucs2.
 274  *
 275  * Returns: %TRUE if @string_a_ucs2 and @string_b_ucs2 are the same.
 276  */
 277 gboolean captive_ucs2_compare(const captive_ucs2 *string_a_ucs2,const captive_ucs2 *string_b_ucs2)
 278 {
 279 guint ui;
 280
 281         g_return_val_if_fail(captive_validate_ucs2(string_a_ucs2),FALSE);
 282         g_return_val_if_fail(captive_validate_ucs2(string_b_ucs2),FALSE);
 283
 284         ui=0;
 285         do {
 286                 if (string_a_ucs2[ui]!=string_b_ucs2[ui])
 287                         return FALSE;
 288                 } while (string_a_ucs2[ui++]);
 289         return TRUE;
 290 }
 291
 292
 293 /**
 294  * captive_UnicodeString_compare:
 295  * @string_a_UnicodeString: First string of type #PUNICODE_STRING.
 296  * Invalid string input is forbidden.
 297  * @string_b_UnicodeString: Second string of type #PUNICODE_STRING.
 298  * Invalid string input is forbidden.
 299  *
 300  * Compares case-sensitively @string_a_UnicodeString and @string_b_UnicodeString.
 301  *
 302  * Returns: %TRUE if @string_a_UnicodeString and @string_b_UnicodeString are the same.
 303  */
 304 gboolean captive_UnicodeString_compare
 305                 (const UNICODE_STRING *string_a_UnicodeString,const UNICODE_STRING *string_b_UnicodeString)
 306 {
 307         g_return_val_if_fail(captive_validate_UnicodeString(string_a_UnicodeString),FALSE);
 308         g_return_val_if_fail(captive_validate_UnicodeString(string_b_UnicodeString),FALSE);
 309
 310         if (string_a_UnicodeString->Length!=string_b_UnicodeString->Length)
 311                 return FALSE;
 312         return captive_ucs2_compare(string_a_UnicodeString->Buffer,string_b_UnicodeString->Buffer);
 313 }
 314
 315
 316 /**
 317  * captive_ucs2_compare_insensitive:
 318  * @string_a_ucs2: First string of type #const #gunichar2 * in pure UCS-2.
 319  * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
 320  * @string_b_ucs2: Second string of type #const #gunichar2 * in pure UCS-2.
 321  * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
 322  *
 323  * Compares case-insensitively @string_a_ucs2 and @string_b_ucs2.
 324  *
 325  * Returns: %TRUE if @string_a_ucs2 and @string_b_ucs2 are the same.
 326  */
 327 gboolean captive_ucs2_compare_insensitive(const captive_ucs2 *string_a_ucs2,const captive_ucs2 *string_b_ucs2)
 328 {
 329 guint ui;
 330
 331         g_return_val_if_fail(captive_validate_ucs2(string_a_ucs2),FALSE);
 332         g_return_val_if_fail(captive_validate_ucs2(string_b_ucs2),FALSE);
 333
 334         ui=0;
 335         do {
 336                 if (g_unichar_toupper(string_a_ucs2[ui])!=g_unichar_toupper(string_b_ucs2[ui]))
 337                         return FALSE;
 338                 } while (string_a_ucs2[ui++]);
 339         return TRUE;
 340 }
 341
 342
 343 /**
 344  * captive_UnicodeString_compare_insensitive:
 345  * @string_a_UnicodeString: First string of type #PUNICODE_STRING.
 346  * Invalid string input is forbidden.
 347  * @string_b_UnicodeString: Second string of type #PUNICODE_STRING.
 348  * Invalid string input is forbidden.
 349  *
 350  * Compares case-insensitively @string_a_UnicodeString and @string_b_UnicodeString.
 351  *
 352  * Returns: %TRUE if @string_a_UnicodeString and @string_b_UnicodeString are the same.
 353  */
 354 gboolean captive_UnicodeString_compare_insensitive
 355                 (const UNICODE_STRING *string_a_UnicodeString,const UNICODE_STRING *string_b_UnicodeString)
 356 {
 357         g_return_val_if_fail(captive_validate_UnicodeString(string_a_UnicodeString),FALSE);
 358         g_return_val_if_fail(captive_validate_UnicodeString(string_b_UnicodeString),FALSE);
 359
 360         if (string_a_UnicodeString->Length!=string_b_UnicodeString->Length)
 361                 return FALSE;
 362         return captive_ucs2_compare_insensitive(string_a_UnicodeString->Buffer,string_b_UnicodeString->Buffer);
 363 }
 364
 365
 366 /* detect required memory size for g_alloca() */
 367 size_t _captive_UnicodeString_to_utf8_alloca_internal_sizeof(const UNICODE_STRING *string_UnicodeString)
 368 {
 369 glong length;
 370 size_t r;
 371 const WCHAR *cwcharp;
 372
 373         g_return_val_if_fail(captive_validate_UnicodeString(string_UnicodeString),1);
 374
 375         /* measure 'string_UnicodeString->Buffer' length in UTF-8 to 'r' */
 376         cwcharp=string_UnicodeString->Buffer;
 377         r=0;
 378         for (length=string_UnicodeString->Length/sizeof(*string_UnicodeString->Buffer);length;length--) {
 379 gint utf8len;
 380
 381                 utf8len=g_unichar_to_utf8(
 382                                 *cwcharp++,     /* c */
 383                                 NULL);  /* outbuf=NULL => just the length will be computed */
 384                 g_assert(utf8len>=0);
 385                 r+=utf8len;
 386                 }
 387         g_assert(*cwcharp==0);
 388         r++;    /* '\0'-termination */
 389
 390         /* utf8 byte-size */
 391         return r;
 392 }
 393
 394 /* transfer 'string_UnicodeString' to memory in 'mem' as utf8 w/o any further allocations */
 395 void _captive_UnicodeString_to_utf8_alloca_internal_fill(gchar *mem,const UNICODE_STRING *string_UnicodeString)
 396 {
 397 const WCHAR *cwcharp;
 398 #ifndef G_DISABLE_ASSERT
 399 gchar *mem_orig=mem;
 400 #endif /* G_DISABLE_ASSERT */
 401
 402         g_return_if_fail(mem!=NULL);
 403         if (!captive_validate_UnicodeString(string_UnicodeString)) {
 404                 *mem='\0';
 405                 g_return_if_reached();
 406                 }
 407
 408         /* We can't use any glib string conversions as UNICODE_STRING uses ucs2! */
 409         /* We can't use any glib string conversions as we need to write the string
 410          * to our supplied memory storage but glib always g_malloc()s it
 411          */
 412         /* copy 'string_UnicodeString->Buffer' to 'mem' */
 413         for (cwcharp=string_UnicodeString->Buffer;*cwcharp;cwcharp++) {
 414 gint utf8len;
 415
 416                 utf8len=g_unichar_to_utf8(
 417                                 (gunichar)*cwcharp,     /* c */
 418                                 mem);   /* outbuf */
 419                 g_assert(utf8len>=0);
 420                 mem+=utf8len;
 421                 }
 422         *mem='\0';
 423
 424         g_assert((size_t)((mem+1)-mem_orig) == _captive_UnicodeString_to_utf8_alloca_internal_sizeof(string_UnicodeString));
 425         g_assert(captive_validate_utf8(mem_orig));
 426 }
 427
 428
 429 /**
 430  * captive_UnicodeString_to_utf8_malloc:
 431  * @string_UnicodeString: #PUNICODE_STRING type of string to convert.
 432  *
 433  * g_malloc()-based conversion from #PUNICODE_STRING to plain #utf8 string.
 434  * You must free the result with g_free() function.
 435  *
 436  * Returns: #const #gchar * g_malloc()ed converted string @string_UnicodeString.
 437  */
 438 gchar *captive_UnicodeString_to_utf8_malloc(const UNICODE_STRING *string_UnicodeString)
 439 {
 440 gchar *r;
 441 #ifndef FUNCMALLOC_FROM_ALLOCA
 442 glong utf16_read,utf8_written;
 443 GError *err;
 444 #endif /* !FUNCMALLOC_FROM_ALLOCA */
 445
 446         g_return_val_if_fail(captive_validate_UnicodeString(string_UnicodeString),g_strdup(""));
 447
 448 #ifdef FUNCMALLOC_FROM_ALLOCA
 449
 450         r=g_malloc(_captive_UnicodeString_to_utf8_alloca_internal_sizeof(string_UnicodeString));
 451         _captive_UnicodeString_to_utf8_alloca_internal_fill(r,string_UnicodeString);
 452
 453 #else
 454
 455         err=NULL;       /* not precleared by g_utf8_to_utf16()! */
 456         r=g_utf16_to_utf8(
 457                         (const gunichar2 *)string_UnicodeString->Buffer,        /* str */
 458                         -1,     /* len=>'\0'-terminated */
 459                         &utf16_read,    /* items_read; counted in unichar2 (NOT UTF-16 characters or bytes!) */
 460                         &utf8_written,  /* items_written; counted in bytes (NOT UTF-8 characters!) */
 461                         &err);
 462         if (err) {
 463                 g_warning("%s: utf16_read=%ld,utf8_written=%ld: %s",G_STRLOC,
 464                                 (long)utf16_read,(long)utf8_written,err->message);
 465                 g_error_free(err);
 466                 g_assert(r==NULL);
 467                 g_return_val_if_reached(g_strdup(""));
 468                 }
 469         g_assert(r!=NULL);
 470
 471         g_assert(utf16_read==(glong)(string_UnicodeString->length/sizeof(*string_UnicodeString->Buffer)));
 472         g_assert(utf6_written==strlen(r));
 473
 474 #endif /* !FUNCMALLOC_FROM_ALLOCA */
 475
 476         g_assert(captive_validate_utf8(r));
 477
 478         return r;
 479 }
 480
 481
 482 /* detect required memory size for g_alloca() */
 483 size_t _captive_utf8_to_UnicodeString_alloca_internal_sizeof(const gchar *string_utf8)
 484 {
 485         g_return_val_if_fail(captive_validate_utf8(string_utf8),1);
 486
 487         /* find the value for PUNICODE_STRING->MaximumLength */
 488         return 0
 489                         +sizeof(UNICODE_STRING)
 490                         +sizeof(WCHAR)*(g_utf8_strlen(string_utf8,
 491                                         -1      /* max; -1 means '\0'-terminated */
 492                                         )+1);   /* '\0'-termination */
 493 }
 494
 495 static void terminate_static_UnicodeString(UNICODE_STRING *string_UnicodeString,glong length)
 496 {
 497         /* 'string_UnicodeString' is not yet valid in this point! */
 498         g_return_if_fail(string_UnicodeString!=NULL);
 499         g_return_if_fail(length>=0);
 500
 501         string_UnicodeString->Length=length*sizeof(WCHAR);
 502         string_UnicodeString->MaximumLength=(length+1)*sizeof(WCHAR);
 503         string_UnicodeString->Buffer[length]=0;
 504
 505         g_assert(captive_validate_UnicodeString(string_UnicodeString));
 506 }
 507
 508 /* transfer 'string_UnicodeString' to memory in 'mem' w/o any further allocations */
 509 void _captive_utf8_to_UnicodeString_alloca_internal_fill(UNICODE_STRING *mem,const gchar *string_utf8)
 510 {
 511 gunichar2 *utf16;
 512 captive_ucs2 *ucs2;
 513 glong utf8_read,utf16_written;
 514 GError *err;
 515
 516         g_return_if_fail(mem!=NULL);
 517         mem->Buffer=(PWSTR)(((char *)mem)+sizeof(*mem));        /* for terminate_static_UnicodeString() below */
 518         if (!captive_validate_utf8(string_utf8)) {
 519                 terminate_static_UnicodeString(mem,0);
 520                 g_return_if_reached();
 521                 }
 522
 523         err=NULL;       /* not precleared by g_utf8_to_utf16()! */
 524         utf16=g_utf8_to_utf16(
 525                         string_utf8,    /* str */
 526                         -1,     /* len=>'\0'-terminated */
 527                         &utf8_read,     /* items_read; counted in bytes (NOT chars!) */
 528                         &utf16_written, /* items_written; counted in UTF-16 characters (NOT unichar2 or bytes!) */
 529                         &err);
 530         if (err) {
 531                 g_warning("%s: utf8_read=%ld,utf16_written=%ld: %s",G_STRLOC,
 532                                 (long)utf8_read,(long)utf16_written,err->message);
 533                 g_error_free(err);
 534                 g_assert(utf16==NULL);
 535                 terminate_static_UnicodeString(mem,0);
 536                 g_return_if_reached();
 537                 }
 538         g_assert(utf16!=NULL);
 539
 540         /* Check for UCS-2 compliance (reject if surrogates inside) */
 541         g_assert(captive_validate_ucs2((const captive_ucs2 *)utf16));
 542         /* valid UCS-2 */
 543         ucs2=(captive_ucs2 *)utf16;
 544
 545         g_assert(utf8_read==(glong)strlen(string_utf8));
 546         g_assert(utf16_written==captive_ucs2_strlen(ucs2));
 547
 548         /* check of validity of _captive_utf8_to_UnicodeString_alloca_internal_sizeof() result */
 549         g_assert((gchar *)(mem->Buffer+(utf16_written+1))       /* +1 => '\0'-termination */
 550                         == ((gchar *)mem)+_captive_utf8_to_UnicodeString_alloca_internal_sizeof(string_utf8));
 551
 552         memcpy(mem->Buffer,ucs2,sizeof(WCHAR)*(utf16_written+1));
 553         g_free(ucs2);
 554         terminate_static_UnicodeString(mem,utf16_written);
 555
 556         g_assert(captive_validate_UnicodeString(mem));
 557 }
 558
 559
 560 /**
 561  * captive_utf8_to_UnicodeString_malloc:
 562  * @string_utf8: #const #gchar * string in #utf8 to convert.
 563  *
 564  * g_malloc()-based conversion from plain #utf8 string to #PUNICODE_STRING.
 565  * You must free the result with g_free() function.
 566  *
 567  * Returns: #PUNICODE_STRING g_malloc()ed converted string @string_utf8.
 568  */
 569 PUNICODE_STRING captive_utf8_to_UnicodeString_malloc(const gchar *string_utf8)
 570 {
 571 UNICODE_STRING *r;
 572 #ifndef FUNCMALLOC_FROM_ALLOCA
 573 gunichar *ucs4;
 574 glong utf8_read,ucs4_written;
 575 GError *err;
 576 #endif /* !FUNCMALLOC_FROM_ALLOCA */
 577
 578         g_return_val_if_fail(captive_validate_utf8(string_utf8),captive_utf8_to_UnicodeString_malloc(""));
 579
 580 #ifdef FUNCMALLOC_FROM_ALLOCA
 581
 582         r=g_malloc(_captive_utf8_to_UnicodeString_alloca_internal_sizeof(string_utf8));
 583         _captive_utf8_to_UnicodeString_alloca_internal_fill(r,string_utf8);
 584
 585 #else
 586
 587 #error "FIXME: NOT IMPLEMENTED"
 588
 589 #endif /* !FUNCMALLOC_FROM_ALLOCA */
 590
 591         g_assert(captive_validate_UnicodeString(r));
 592
 593         return r;
 594 }
 595
 596
 597 /* map: (const gunichar *) -> (const gunichar2 *); UCS-4 -> UTF-16 */
 598 static GHashTable *captive_ucs4_to_utf16_hash;
 599
 600 static void captive_ucs4_to_utf16_hash_init(void)
 601 {
 602         if (captive_ucs4_to_utf16_hash)
 603                 return;
 604         captive_ucs4_to_utf16_hash=g_hash_table_new_full(
 605                         g_direct_hash,  /* hash_func */
 606                         g_direct_equal, /* key_equal_func */
 607                         (GDestroyNotify)NULL,   /* key_destroy_func; we require persistent strings as input */
 608                         (GDestroyNotify)g_free);        /* value_destroy_func; result of g_ucs4_to_utf16() */
 609 }
 610
 611 /**
 612  * captive_ucs4_to_utf16_const:
 613  * @string_ucs4: #const #gunichar * type of persistent string to convert.
 614  * This string MUST remain readable with the same content forever.
 615  *
 616  * Constant string conversion from 32-bit #wchar_t to 16-bit (possible pairs of) UTF-16.
 617  * You may not modify the result in any way.
 618  *
 619  * It is guaranteed to get two different string addresses for two different
 620  * input addresses even if the input strings content is the same.
 621  * Otherwise we would behave as #GCC option %-fmerge-constants which
 622  * results in %C non-conforming behaviour.
 623  *
 624  * FIXME: UTF-16 encoding IS NOT IMPLEMENTED.
 625  *
 626  * See also captive_ucs4_to_ucs2_const().
 627  *
 628  * Returns: #const #gunichar2 * converted string @string_ucs4.
 629  */
 630 const gunichar2 *captive_ucs4_to_utf16_const(const gunichar *string_ucs4)
 631 {
 632 glong ucs4_read,utf16_written;
 633 GError *err;
 634 const gunichar2 *r_lookup;
 635 gunichar2 *r;
 636
 637         g_return_val_if_fail(captive_validate_ucs4(string_ucs4),captive_ucs4_to_utf16_const((const gunichar *)L""));
 638
 639         captive_ucs4_to_utf16_hash_init();
 640
 641         /* found already existing item in the table */
 642         if ((r_lookup=g_hash_table_lookup(captive_ucs4_to_utf16_hash,
 643                         string_ucs4)    /* key */
 644                         )) {
 645                 return r_lookup;
 646                 }
 647
 648         /* Prepare 'r' as UTF-16 */
 649         err=NULL;       /* not precleared by g_ucs4_to_utf16()! */
 650         r=g_ucs4_to_utf16(
 651                         (const gunichar *)string_ucs4,  /* str */
 652                         -1,     /* len; -1 means '\0'-termination */
 653                         &ucs4_read,     /* items_read; counted in chars (==unichars; NOT bytes!) */
 654                         &utf16_written, /* items_written; counted in gunichar2 (NOT chars or bytes!) */
 655                         &err);
 656         if (err) {
 657                 g_warning("%s: ucs4_read=%ld,utf16_written=%ld: %s",G_STRLOC,
 658                                 (long)ucs4_read,(long)utf16_written,err->message);
 659                 g_error_free(err);
 660                 g_assert(r==NULL);
 661                 g_return_val_if_reached(captive_ucs4_to_utf16_const((const gunichar *)L""));
 662                 }
 663         g_assert(r!=NULL);
 664         g_assert(ucs4_read==(glong)wcslen((const wchar_t *)string_ucs4));
 665         /* FIXME: We don't have captive_utf16_strlen() */
 666         g_assert(utf16_written==(glong)captive_ucs2_strlen((const gunichar2 *)r));
 667         /* (ucs4_read==utf16_written) check would discard any double-pair UTF-16 encodings
 668          * but this function is designed as UTF-16 compliant.
 669          */
 670
 671         /* store new item to the table */
 672         g_hash_table_insert(captive_ucs4_to_utf16_hash,
 673                         (gpointer)string_ucs4,  /* key; de-const */
 674                         r);     /* value */
 675
 676 #if 0   /* We don't have captive_validate_utf16() */
 677         g_assert(captive_validate_utf16(r));
 678 #endif
 679
 680         return r;
 681 }