common/gsm-coding.c

   1 /*
   2
   3   G N O K I I
   4
   5   A Linux/Unix toolset and driver for Nokia mobile phones.
   6
   7   Released under the terms of the GNU GPL, see file COPYING for more details.
   8
   9 */
  10
  11 #include <stdlib.h>
  12 #include <string.h>
  13
  14 #include "gsm-common.h"
  15 #include "gsm-coding.h"
  16
  17 #ifdef WIN32
  18   #include <windows.h>
  19 #else
  20   #include "devices/device.h"
  21 #endif
  22
  23 /* Coding functions */
  24 #define NUMBER_OF_7_BIT_ALPHABET_ELEMENTS 128
  25
  26 #ifndef USE_NLS
  27   static unsigned char GSM_DefaultAlphabet[NUMBER_OF_7_BIT_ALPHABET_ELEMENTS] = {
  28
  29         /* ETSI GSM 03.38, version 6.0.1, section 6.2.1; Default alphabet */
  30         /* Generally table shows chars in Latin 1, but not only - Greek
  31            chars are visible correctly in ... */
  32
  33         '@',  0xa3, '$',  0xa5, 0xe8, 0xe9, 0xf9, 0xec,   // 0x08
  34         0xf2, 0xc7, '\n', 0xd8, 0xf8, '\r', 0xc5, 0xe5,
  35
  36 /* from v13@priest.com codes for Greek chars. Not confirmed and commented */
  37 //      0xc4, '_' , 0xd6, 0xc3, 0xcb, 0xd9, 0xd0, 0xd8,
  38 //      0xd3, 0xc8, 0xce, 0xcb, 0xc6, 0xe6, 0xdf, 0xc9,   // 0x20
  39
  40         '?',  '_',  '?',  '?',  '?',  '?',  '?',  '?',
  41         '?',  '?',  '?',  '?',  0xc6, 0xe6, 0xdf, 0xc9,   // 0x20
  42         ' ',  '!',  '\"', '#',  0xa4,  '%',  '&', '\'',
  43         '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',    // 0x30
  44         '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
  45         '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',    // 0x40
  46         0xa1, 'A',  'B',  'C',  'D',  'E',  'F',  'G',
  47         'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
  48         'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
  49         'X',  'Y',  'Z',  0xc4, 0xd6, 0xd1, 0xdc, 0xa7,
  50         0xbf, 'a',  'b',  'c',  'd',  'e',  'f',  'g',
  51         'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
  52         'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
  53         'x',  'y',  'z',  0xe4, 0xf6, 0xf1, 0xfc, 0xe0
  54   };
  55
  56   #ifndef WIN32
  57     /*Simple UNICODE decoding and encoding from/to iso-8859-2
  58     First version prepared by Martin Kacer <M.Kacer@sh.cvut.cz>
  59
  60     Following table contains triplets:
  61     first unicode byte, second unicode byte, iso-8859-2 character*/
  62     unsigned char unicode_table[][3] =
  63     {
  64         /* C< D< E< N< R< S< T< Uo Z< */
  65         {0x01, 0x0C, 0xC8}, {0x01, 0x0E, 0xCF}, {0x01, 0x1A, 0xCC},
  66         {0x01, 0x47, 0xD2}, {0x01, 0x58, 0xD8}, {0x01, 0x60, 0xA9},
  67         {0x01, 0x64, 0xAB}, {0x01, 0x6E, 0xD9}, {0x01, 0x7D, 0xAE},
  68         /* c< d< e< n< r< s< t< uo z< */
  69         {0x01, 0x0D, 0xE8}, {0x01, 0x0F, 0xEF}, {0x01, 0x1B, 0xEC},
  70         {0x01, 0x48, 0xF2}, {0x01, 0x59, 0xF8}, {0x01, 0x61, 0xB9},
  71         {0x01, 0x65, 0xBB}, {0x01, 0x6F, 0xF9}, {0x01, 0x7E, 0xBE},
  72         /* A< A, C' D/ E, L< L' L/ */
  73         {0x01, 0x02, 0xC3}, {0x01, 0x04, 0xA1}, {0x01, 0x06, 0xC6},
  74         {0x01, 0x10, 0xD0}, {0x01, 0x18, 0xCA}, {0x01, 0x3D, 0xA5},
  75         {0x01, 0x39, 0xC5}, {0x01, 0x41, 0xA3},
  76         /* N' O" R' S' S, T, U" Z' Z. */
  77         {0x01, 0x43, 0xD1}, {0x01, 0x50, 0xD5}, {0x01, 0x54, 0xC0},
  78         {0x01, 0x5A, 0xA6}, {0x01, 0x5E, 0xAA}, {0x01, 0x62, 0xDE},
  79         {0x01, 0x70, 0xDB}, {0x01, 0x79, 0xAC}, {0x01, 0x7B, 0xAF},
  80         /* a< a, c' d/ e, l< l' l/ */
  81         {0x01, 0x03, 0xE3}, {0x01, 0x05, 0xB1}, {0x01, 0x07, 0xE6},
  82         {0x01, 0x11, 0xF0}, {0x01, 0x19, 0xEA}, {0x01, 0x3E, 0xB5},
  83         {0x01, 0x3A, 0xE5}, {0x01, 0x42, 0xB3},
  84         /* n' o" r' s' s, t, u" z' z. */
  85         {0x01, 0x44, 0xF1}, {0x01, 0x51, 0xF5}, {0x01, 0x55, 0xE0},
  86         {0x01, 0x5B, 0xB6}, {0x01, 0x5F, 0xBA}, {0x01, 0x63, 0xFE},
  87         {0x01, 0x71, 0xFB}, {0x01, 0x7A, 0xBC}, {0x01, 0x7C, 0xBF},
  88
  89         {0x00, 0x00, 0x00}
  90     };
  91   #else
  92     unsigned char unicode_table[][3] =
  93     {
  94
  95         /* o' */
  96         {0x00, 0xF3, 0xA2},
  97
  98         /* O' */
  99         {0x00, 0xD3, 0xE0},
 100
 101         /* A, C' E, L/ */
 102                             {0x01, 0x04, 0xA4}, {0x01, 0x06, 0x8F},
 103                             {0x01, 0x18, 0xA8},
 104                             {0x01, 0x41, 0x9D},
 105         /* N' S' Z' Z. */
 106         {0x01, 0x43, 0xE3},
 107         {0x01, 0x5A, 0x97},
 108                             {0x01, 0x79, 0x8D}, {0x01, 0x7B, 0xBD},
 109         /* a, c' e, l/ */
 110                             {0x01, 0x05, 0xA5}, {0x01, 0x07, 0x86},
 111                             {0x01, 0x19, 0xA9},
 112                             {0x01, 0x42, 0x88},
 113         /* n' s' z' z. */
 114         {0x01, 0x44, 0xE4},
 115         {0x01, 0x5B, 0x98},
 116                             {0x01, 0x7A, 0xAB}, {0x01, 0x7C, 0xBE},
 117
 118         {0x00, 0x00, 0x00}
 119     };
 120   #endif
 121
 122 unsigned char EncodeWithDefaultAlphabet(unsigned char value)
 123 {
 124         unsigned char i;
 125
 126         if (value == '?') return  0x3f;
 127
 128         for (i = 0; i < NUMBER_OF_7_BIT_ALPHABET_ELEMENTS; i++)
 129                 if (GSM_DefaultAlphabet[i] == value)
 130                         return i;
 131
 132         return '?';
 133 }
 134
 135 unsigned char DecodeWithDefaultAlphabet(unsigned char value)
 136 {
 137         return GSM_DefaultAlphabet[value];
 138 }
 139
 140 wchar_t EncodeWithUnicodeAlphabet(unsigned char value)
 141 {
 142         wchar_t retval;
 143
 144         int j;
 145
 146         /*If character is not found, first unicode byte is set to zero
 147           and second one is the same as iso-8859-2 character*/
 148         retval = value | (0x00 << 8);
 149
 150         for ( j = 0;  unicode_table[j][2] != 0x00;  ++j )
 151                 if ( value == unicode_table[j][2] )
 152                 {
 153                         retval = unicode_table[j][1] | (unicode_table[j][0] << 8);
 154                         break;
 155                 }
 156
 157         return retval;
 158 }
 159
 160 unsigned char DecodeWithUnicodeAlphabet(wchar_t value)
 161 {
 162         unsigned char retval;
 163
 164         int j;
 165
 166         retval=value & 0xff; /* default is to cut off the first byte */
 167
 168         for ( j = 0;  unicode_table[j][2] != 0x00;  ++j )
 169                 if (((value >> 8) & 0xff) == unicode_table[j][0] &&
 170                     (value & 0xff) == unicode_table[j][1] ) {
 171                         retval = unicode_table[j][2];
 172                         break;
 173                 }
 174
 175         return retval;
 176 }
 177
 178 #else
 179
 180   /* ETSI GSM 03.38, version 6.0.1, section 6.2.1; Default alphabet */
 181   unsigned char GSM_DefaultAlphabetUnicode[NUMBER_OF_7_BIT_ALPHABET_ELEMENTS+1][2] =
 182   {
 183         {0x00,0x40},{0x00,0xa3},{0x00,0x24},{0x00,0xA5},
 184         {0x00,0xE8},{0x00,0xE9},{0x00,0xF9},{0x00,0xEC},//0x08
 185         {0x00,0xF2},{0x00,0xC7},{0x00,'\n'},{0x00,0xD8},
 186         {0x00,0xD9},{0x00,'\r'},{0x00,0xC5},{0x00,0xE5},
 187         {0x03,0x94},{0x00,0xb9}/*not exactly, but*/,{0x03,0xA6},{0x03,0x93},
 188         {0x03,0x9B},{0x03,0xA9},{0x03,0xA0},{0x03,0xA8},
 189         {0x03,0xA3},{0x03,0x98},{0x03,0x9E},{0x00,0xb9},/*not exactly, but*/
 190         {0x00,0xC6},{0x00,0xE6},{0x00,0xDF},{0x00,0xC9},//0x20
 191         {0x00,' ' },{0x00,'!' },{0x00,'\"'},{0x00,'#' },
 192         {0x00,0xA4},{0x00,'%' },{0x00,'&' },{0x00,'\''},
 193         {0x00,'(' },{0x00,')' },{0x00,'*' },{0x00,'+' },
 194         {0x00,',' },{0x00,'-' },{0x00,'.' },{0x00,'/' }, //0x30
 195         {0x00,'0' },{0x00,'1' },{0x00,'2' },{0x00,'3' },
 196         {0x00,'4' },{0x00,'5' },{0x00,'6' },{0x00,'7' },
 197         {0x00,'8' },{0x00,'9' },{0x00,':' },{0x00,';' },
 198         {0x00,'<' },{0x00,'=' },{0x00,'>' },{0x00,'?' }, //0x40
 199         {0x00,0xA1},{0x00,'A' },{0x00,'B' },{0x00,'C' },
 200         {0x00,'D' },{0x00,'E' },{0x00,'F' },{0x00,'G' },
 201         {0x00,'H' },{0x00,'I' },{0x00,'J' },{0x00,'K' },
 202         {0x00,'L' },{0x00,'M' },{0x00,'N' },{0x00,'O' },
 203         {0x00,'P' },{0x00,'Q' },{0x00,'R' },{0x00,'S' },
 204         {0x00,'T' },{0x00,'U' },{0x00,'V' },{0x00,'W' },
 205         {0x00,'X' },{0x00,'Y' },{0x00,'Z' },{0x00,0xC4},
 206         {0x00,0xD6},{0x00,0xD1},{0x00,0xDC},{0x00,0xA7},
 207         {0x00,0xBF},{0x00,'a' },{0x00,'b' },{0x00,'c' },
 208         {0x00,'d' },{0x00,'e' },{0x00,'f' },{0x00,'g' },
 209         {0x00,'h' },{0x00,'i' },{0x00,'j' },{0x00,'k' },
 210         {0x00,'l' },{0x00,'m' },{0x00,'n' },{0x00,'o' },
 211         {0x00,'p' },{0x00,'q' },{0x00,'r' },{0x00,'s' },
 212         {0x00,'t' },{0x00,'u' },{0x00,'v' },{0x00,'w' },
 213         {0x00,'x' },{0x00,'y' },{0x00,'z' },{0x00,0xE4},
 214         {0x00,0xF6},{0x00,0xF1},{0x00,0xFC},{0x00,0xE0},
 215         {0x00,0x00}
 216   };
 217
 218 unsigned char EncodeWithDefaultAlphabet(unsigned char value)
 219 {
 220         unsigned char i;
 221
 222         wchar_t value2;
 223
 224         if (value == '?') return  0x3f;
 225
 226         for (i = 0; i < NUMBER_OF_7_BIT_ALPHABET_ELEMENTS; i++) {
 227
 228                 value2 = GSM_DefaultAlphabetUnicode[i][1] | ( GSM_DefaultAlphabetUnicode[i][0] << 8);
 229
 230                 if (EncodeWithUnicodeAlphabet(value) == value2)
 231                         return i;
 232         }
 233
 234         return '?';
 235 }
 236
 237 unsigned char DecodeWithDefaultAlphabet(unsigned char value)
 238 {
 239         wchar_t value2;
 240
 241         value2 = GSM_DefaultAlphabetUnicode[value][1] | ( GSM_DefaultAlphabetUnicode[value][0] << 8);
 242
 243         return DecodeWithUnicodeAlphabet(value2);
 244 }
 245
 246 wchar_t EncodeWithUnicodeAlphabet(unsigned char value)
 247 {
 248         wchar_t retval;
 249
 250         if (mbtowc(&retval, &value, 1) == -1) return '?';
 251         else return retval;
 252 }
 253
 254 unsigned char DecodeWithUnicodeAlphabet(wchar_t value)
 255 {
 256         unsigned char retval;
 257
 258         if (wctomb(&retval, value) == -1) return '?';
 259         else return retval;
 260 }
 261
 262 #endif
 263
 264 void DecodeDefault (unsigned char* dest, const unsigned char* src, int len)
 265 {
 266         int i;
 267
 268         for (i = 0; i < len; i++)
 269                 dest[i] = DecodeWithDefaultAlphabet(src[i]);
 270         dest[len]=0;
 271 }
 272
 273 void EncodeDefault (unsigned char* dest, const unsigned char* src, int len)
 274 {
 275         int i;
 276
 277         for (i = 0; i < len; i++)
 278                 dest[i] = EncodeWithDefaultAlphabet(src[i]);
 279         return;
 280 }
 281
 282 void DecodeUnicode (unsigned char* dest, const unsigned char* src, int len)
 283 {
 284         int i;
 285         wchar_t wc;
 286
 287         for (i = 0; i < len; i++) {
 288           wc = src[(2*i)+1] | (src[2*i] << 8);
 289           dest[i] = DecodeWithUnicodeAlphabet(wc);
 290         }
 291         dest[len]=0;
 292         return;
 293 }
 294
 295 void EncodeUnicode (unsigned char* dest, const unsigned char* src, int len)
 296 {
 297         int i;
 298         wchar_t wc;
 299
 300         for (i = 0; i < len; i++) {
 301                 wc = EncodeWithUnicodeAlphabet(src[i]);
 302                 dest[i*2] = (wc >> 8) &0xff;
 303                 dest[(i*2)+1] = wc & 0xff;
 304         }
 305 }
 306
 307 bool EncodeWithUTF8Alphabet(u8 mychar, u8 *ret1, u8 *ret2)
 308 {
 309       u8 mychar1,mychar2,mychar3,mychar4;
 310       int j=0;
 311
 312       mychar1=((EncodeWithUnicodeAlphabet(mychar)>>8)&0xff);
 313       mychar2=EncodeWithUnicodeAlphabet(mychar)&0xff;
 314       if (mychar1>0x00 || mychar2>128) {
 315         mychar3=0x00;
 316         mychar4=128;
 317         while (true) {
 318           if (mychar3==mychar1) {
 319             if (mychar4+64>=mychar2) {
 320                *ret1=j+0xc2;
 321                *ret2=0x80+(mychar2-mychar4);
 322                return true;
 323             }
 324           }
 325           if (mychar4==192) {
 326               mychar3++;
 327               mychar4=0;
 328           } else {
 329               mychar4=mychar4+64;
 330           }
 331           j++;
 332         }
 333       }
 334       return false;
 335 }
 336
 337 void DecodeWithUTF8Alphabet(u8 mychar3, u8 mychar4, u8 *ret)
 338 {
 339     u8 mychar1, mychar2;
 340     int j;
 341     wchar_t wc;
 342
 343     mychar1=0x00;
 344     mychar2=128;
 345     for(j=0;j<mychar3-0xc2;j++) {
 346         if (mychar2==192) {
 347             mychar1++;
 348             mychar2=0;
 349         } else {
 350             mychar2=mychar2+64;
 351         }
 352     }
 353     mychar2=mychar2+(mychar4-0x80);
 354     wc = mychar2 | (mychar1 << 8);
 355     *ret=DecodeWithUnicodeAlphabet(wc);
 356     j=-1;
 357 }
 358
 359 void EncodeUTF8 (unsigned char* dest, const unsigned char* src, int len)
 360 {
 361         int i,j=0,z;
 362         u8 mychar1, mychar2;
 363         u8 buf[7];
 364
 365         for (i = 0; i < len; i++) {
 366             if (EncodeWithUTF8Alphabet(src[i],&mychar1,&mychar2)) {
 367                 sprintf(buf, "=%02X=%02X",mychar1,mychar2);
 368                 for (z=0;z<6;z++) dest[j++]=buf[z];
 369             } else {
 370                 dest[j++]=src[i];
 371             }
 372         }
 373         dest[j++]=0;
 374 }
 375
 376 void DecodeUTF8 (unsigned char* dest, const unsigned char* src, int len)
 377 {
 378         int i=0,j=0;
 379         u8 mychar1, mychar2,ret;
 380
 381         while (i<=len) {
 382             if (len-6>=i) {
 383                 /* Need to have correct chars */
 384                 if (src[i]  =='=' && DecodeWithHexBinAlphabet(src[i+1])!=-1
 385                                   && DecodeWithHexBinAlphabet(src[i+2])!=-1 &&
 386                     src[i+3]=='=' && DecodeWithHexBinAlphabet(src[i+4])!=-1 &&
 387                                      DecodeWithHexBinAlphabet(src[i+5])!=-1) {
 388                     mychar1=16*DecodeWithHexBinAlphabet(src[i+1])+DecodeWithHexBinAlphabet(src[i+2]);
 389                     mychar2=16*DecodeWithHexBinAlphabet(src[i+4])+DecodeWithHexBinAlphabet(src[i+5]);
 390                     DecodeWithUTF8Alphabet(mychar1,mychar2,&ret);
 391                     i=i+5;
 392                     dest[j++]=ret;
 393                 } else {
 394                     dest[j++]=src[i];
 395                 }
 396             } else {
 397                 dest[j++]=src[i];
 398             }
 399             i++;
 400         }
 401         dest[j++]=0;
 402 }
 403
 404 int DecodeWithHexBinAlphabet (unsigned char mychar) {
 405     if (mychar>='A' && mychar<='F') return mychar-'A'+10;
 406     if (mychar>='a' && mychar<='f') return mychar-'a'+10;
 407     if (mychar>='0' && mychar<='9') return mychar-'0';
 408     return -1;
 409 }
 410
 411 unsigned char EncodeWithHexBinAlphabet (int digit) {
 412   if (digit >= 0 && digit <= 9) return '0'+(digit);
 413   if (digit >=10 && digit <=15) return 'A'+(digit-10);
 414   return 0;
 415 }
 416
 417 void DecodeHexBin (unsigned char* dest, const unsigned char* src, int len)
 418 {
 419         int i,current=0;
 420
 421         for (i = 0; i < len/2 ; i++) {
 422           dest[current++]=DecodeWithHexBinAlphabet(src[i*2])*16+
 423                           DecodeWithHexBinAlphabet(src[i*2+1]);
 424         }
 425         dest[current++]=0;
 426 }
 427
 428 void EncodeHexBin (unsigned char* dest, const unsigned char* src, int len)
 429 {
 430         int i,current=0;
 431
 432         for (i = 0; i < len; i++) {
 433            dest[current++]=EncodeWithHexBinAlphabet(src[i] >> 0x04);
 434            dest[current++]=EncodeWithHexBinAlphabet(src[i] & 0x0f);
 435         }
 436 }
 437
 438 void DecodeBCD (unsigned char* dest, const unsigned char* src, int len)
 439 {
 440         int i,current=0,digit;
 441
 442         for (i = 0; i < len; i++) {
 443                 digit=src[i] & 0x0f;
 444                 if (digit<10) dest[current++]=digit + '0';
 445                 digit=src[i] >> 4;
 446                 if (digit<10) dest[current++]=digit + '0';
 447         }
 448         dest[current++]=0;
 449 }
 450
 451 void EncodeBCD (unsigned char* dest, const unsigned char* src, int len, bool fill)
 452 {
 453         int i,current=0;
 454
 455         for (i = 0; i < len; i++) {
 456            if (i & 0x01) {
 457              dest[current]=dest[current] | ((src[i]-'0') << 4);
 458              current++;
 459            } else {
 460              dest[current]=src[i]-'0';
 461            }
 462         }
 463
 464         /* When fill is set: we fill in the most significant bits of the
 465            last byte with 0x0f (1111 binary) if the number is represented
 466            with odd number of digits. */
 467         if (fill && (len & 0x01)) {
 468              dest[current]=dest[current] | 0xf0;
 469         }
 470 }
 471
 472 unsigned char EncodeWithBCDAlphabet(int value)
 473 {
 474   div_t division;
 475
 476   division=div(value,10);
 477   return ( ( (value-division.quot*10) & 0x0f) << 4) | (division.quot & 0xf);
 478 }
 479
 480 int DecodeWithBCDAlphabet(unsigned char value)
 481 {
 482         return 10*(value & 0x0f)+(value >> 4);
 483 }