1 /***************************************************************************/
5 /* TrueType new character mapping table (cmap) support (body). */
7 /* Copyright 2002 by */
8 /* David Turner, Robert Wilhelm, and Werner Lemberg. */
10 /* This file is part of the FreeType project, and may only be used, */
11 /* modified, and distributed under the terms of the FreeType project */
12 /* license, LICENSE.TXT. By continuing to use, modify, or distribute */
13 /* this file you indicate that you have read the license and */
14 /* understand and accept it fully. */
16 /***************************************************************************/
20 #include FT_INTERNAL_DEBUG_H
21 #include FT_INTERNAL_OBJECTS_H
22 #include FT_INTERNAL_STREAM_H
28 /*************************************************************************/
30 /* The macro FT_COMPONENT is used in trace mode. It is an implicit */
31 /* parameter of the FT_TRACE() and FT_ERROR() macros, used to print/log */
32 /* messages during execution. */
35 #define FT_COMPONENT trace_ttcmap
38 #define TT_PEEK_SHORT FT_PEEK_SHORT
39 #define TT_PEEK_USHORT FT_PEEK_USHORT
40 #define TT_PEEK_LONG FT_PEEK_LONG
41 #define TT_PEEK_ULONG FT_PEEK_ULONG
43 #define TT_NEXT_SHORT FT_NEXT_SHORT
44 #define TT_NEXT_USHORT FT_NEXT_USHORT
45 #define TT_NEXT_LONG FT_NEXT_LONG
46 #define TT_NEXT_ULONG FT_NEXT_ULONG
49 FT_CALLBACK_DEF( FT_Error )
50 tt_cmap_init( TT_CMap cmap,
58 /*************************************************************************/
59 /*************************************************************************/
61 /***** FORMAT 0 *****/
63 /*************************************************************************/
64 /*************************************************************************/
66 /*************************************************************************/
71 /* NAME OFFSET TYPE DESCRIPTION */
73 /* format 0 USHORT must be 0 */
74 /* length 2 USHORT table length in bytes */
75 /* language 4 USHORT Mac language code */
76 /* glyph_ids 6 BYTE[256] array of glyph indices */
80 #ifdef TT_CONFIG_CMAP_FORMAT_0
82 FT_CALLBACK_DEF( void )
83 tt_cmap0_validate( FT_Byte* table,
86 FT_Byte* p = table + 2;
87 FT_UInt length = TT_NEXT_USHORT( p );
90 if ( table + length > valid->limit || length < 262 )
93 /* check glyph indices whenever necessary */
94 if ( valid->level >= FT_VALIDATE_TIGHT )
100 for ( n = 0; n < 256; n++ )
103 if ( idx >= TT_VALID_GLYPH_COUNT( valid ) )
110 FT_CALLBACK_DEF( FT_UInt )
111 tt_cmap0_char_index( TT_CMap cmap,
112 FT_UInt32 char_code )
114 FT_Byte* table = cmap->data;
117 return char_code < 256 ? table[6 + char_code] : 0;
121 FT_CALLBACK_DEF( FT_UInt )
122 tt_cmap0_char_next( TT_CMap cmap,
123 FT_UInt32 *pchar_code )
125 FT_Byte* table = cmap->data;
126 FT_UInt32 charcode = *pchar_code;
127 FT_UInt32 result = 0;
131 table += 6; /* go to glyph ids */
132 while ( ++charcode < 256 )
134 gindex = table[charcode];
142 *pchar_code = result;
147 FT_CALLBACK_TABLE_DEF
148 const TT_CMap_ClassRec tt_cmap0_class_rec =
151 sizeof( TT_CMapRec ),
153 (FT_CMap_InitFunc) tt_cmap_init,
154 (FT_CMap_DoneFunc) NULL,
155 (FT_CMap_CharIndexFunc)tt_cmap0_char_index,
156 (FT_CMap_CharNextFunc) tt_cmap0_char_next
159 (TT_CMap_ValidateFunc) tt_cmap0_validate
162 #endif /* TT_CONFIG_CMAP_FORMAT_0 */
165 /*************************************************************************/
166 /*************************************************************************/
168 /***** FORMAT 2 *****/
170 /***** This is used for certain CJK encodings that encode text in a *****/
171 /***** mixed 8/16 bits encoding along the following lines: *****/
173 /***** * Certain byte values correspond to an 8-bit character code *****/
174 /***** (typically in the range 0..127 for ASCII compatibility). *****/
176 /***** * Certain byte values signal the first byte of a 2-byte *****/
177 /***** character code (but these values are also valid as the *****/
178 /***** second byte of a 2-byte character). *****/
180 /***** The following charmap lookup and iteration functions all *****/
181 /***** assume that the value "charcode" correspond to following: *****/
183 /***** - For one byte characters, "charcode" is simply the *****/
184 /***** character code. *****/
186 /***** - For two byte characters, "charcode" is the 2-byte *****/
187 /***** character code in big endian format. More exactly: *****/
189 /***** (charcode >> 8) is the first byte value *****/
190 /***** (charcode & 0xFF) is the second byte value *****/
192 /***** Note that not all values of "charcode" are valid according *****/
193 /***** to these rules, and the function moderately check the *****/
194 /***** arguments. *****/
196 /*************************************************************************/
197 /*************************************************************************/
199 /*************************************************************************/
204 /* NAME OFFSET TYPE DESCRIPTION */
206 /* format 0 USHORT must be 2 */
207 /* length 2 USHORT table length in bytes */
208 /* language 4 USHORT Mac language code */
209 /* keys 6 USHORT[256] sub-header keys */
210 /* subs 518 SUBHEAD[NSUBS] sub-headers array */
211 /* glyph_ids 518+NSUB*8 USHORT[] glyph id array */
213 /* The `keys' table is used to map charcode high-bytes to sub-headers. */
214 /* The value of `NSUBS' is the number of sub-headers defined in the */
215 /* table and is computed by finding the maximum of the `keys' table. */
217 /* Note that for any n, `keys[n]' is a byte offset within the `subs' */
218 /* table, i.e., it is the corresponding sub-header index multiplied */
221 /* Each sub-header has the following format: */
223 /* NAME OFFSET TYPE DESCRIPTION */
225 /* first 0 USHORT first valid low-byte */
226 /* count 2 USHORT number of valid low-bytes */
227 /* delta 4 SHORT see below */
228 /* offset 6 USHORT see below */
230 /* A sub-header defines, for each high-byte, the range of valid */
231 /* low-bytes within the charmap. Note that the range defined by `first' */
232 /* and `count' must be completely included in the interval [0..255] */
233 /* according to the specification. */
235 /* If a character code is contained within a given sub-header, then */
236 /* mapping it to a glyph index is done as follows: */
238 /* * The value of `offset' is read. This is a _byte_ distance from the */
239 /* location of the `offset' field itself into a slice of the */
240 /* `glyph_ids' table. Let's call it `slice' (it's a USHORT[] too). */
242 /* * The value `slice[char.lo - first]' is read. If it is 0, there is */
243 /* no glyph for the charcode. Otherwise, the value of `delta' is */
244 /* added to it (modulo 65536) to form a new glyph index. */
246 /* It is up to the validation routine to check that all offsets fall */
247 /* within the glyph ids table (and not within the `subs' table itself or */
248 /* outside of the CMap). */
251 #ifdef TT_CONFIG_CMAP_FORMAT_2
253 FT_CALLBACK_DEF( void )
254 tt_cmap2_validate( FT_Byte* table,
257 FT_Byte* p = table + 2; /* skip format */
258 FT_UInt length = TT_PEEK_USHORT( p );
260 FT_Byte* keys; /* keys table */
261 FT_Byte* subs; /* sub-headers */
262 FT_Byte* glyph_ids; /* glyph id array */
265 if ( table + length > valid->limit || length < 6 + 512 )
266 FT_INVALID_TOO_SHORT;
270 /* parse keys to compute sub-headers count */
273 for ( n = 0; n < 256; n++ )
275 FT_UInt idx = TT_NEXT_USHORT( p );
278 /* value must be multiple of 8 */
279 if ( valid->level >= FT_VALIDATE_PARANOID && ( idx & 7 ) != 0 )
284 if ( idx > max_subs )
288 FT_ASSERT( p == table + 518 );
291 glyph_ids = subs + (max_subs + 1) * 8;
292 if ( glyph_ids > valid->limit )
293 FT_INVALID_TOO_SHORT;
295 /* parse sub-headers */
296 for ( n = 0; n <= max_subs; n++ )
298 FT_UInt first_code, code_count, offset;
303 first_code = TT_NEXT_USHORT( p );
304 code_count = TT_NEXT_USHORT( p );
305 delta = TT_NEXT_SHORT( p );
306 offset = TT_NEXT_USHORT( p );
308 /* check range within 0..255 */
309 if ( valid->level >= FT_VALIDATE_PARANOID )
311 if ( first_code >= 256 || first_code + code_count > 256 )
318 ids = p - 2 + offset;
319 if ( ids < glyph_ids || ids + code_count*2 > table + length )
322 /* check glyph ids */
323 if ( valid->level >= FT_VALIDATE_TIGHT )
325 FT_Byte* limit = p + code_count * 2;
331 idx = TT_NEXT_USHORT( p );
334 idx = ( idx + delta ) & 0xFFFFU;
335 if ( idx >= TT_VALID_GLYPH_COUNT( valid ) )
345 /* return sub header corresponding to a given character code */
346 /* NULL on invalid charcode */
348 tt_cmap2_get_subheader( FT_Byte* table,
349 FT_UInt32 char_code )
351 FT_Byte* result = NULL;
354 if ( char_code < 0x10000UL )
356 FT_UInt char_lo = (FT_UInt)( char_code & 0xFF );
357 FT_UInt char_hi = (FT_UInt)( char_code >> 8 );
358 FT_Byte* p = table + 6; /* keys table */
359 FT_Byte* subs = table + 518; /* subheaders table */
365 /* an 8-bit character code -- we use subHeader 0 in this case */
366 /* to test whether the character code is in the charmap */
368 sub = subs; /* jump to first sub-header */
370 /* check that the sub-header for this byte is 0, which */
371 /* indicates that it's really a valid one-byte value */
372 /* Otherwise, return 0 */
375 if ( TT_PEEK_USHORT( p ) != 0 )
380 /* a 16-bit character code */
381 p += char_hi * 2; /* jump to key entry */
382 sub = subs + ( TT_PEEK_USHORT( p ) & -8 ); /* jump to sub-header */
384 /* check that the hi byte isn't a valid one-byte value */
395 FT_CALLBACK_DEF( FT_UInt )
396 tt_cmap2_char_index( TT_CMap cmap,
397 FT_UInt32 char_code )
399 FT_Byte* table = cmap->data;
404 subheader = tt_cmap2_get_subheader( table, char_code );
407 FT_Byte* p = subheader;
408 FT_UInt idx = (FT_UInt)(char_code & 0xFF);
409 FT_UInt start, count;
414 start = TT_NEXT_USHORT( p );
415 count = TT_NEXT_USHORT( p );
416 delta = TT_NEXT_SHORT ( p );
417 offset = TT_PEEK_USHORT( p );
420 if ( idx < count && offset != 0 )
422 p += offset + 2 * idx;
423 idx = TT_PEEK_USHORT( p );
426 result = (FT_UInt)( idx + delta ) & 0xFFFFU;
433 FT_CALLBACK_DEF( FT_UInt )
434 tt_cmap2_char_next( TT_CMap cmap,
435 FT_UInt32 *pcharcode )
437 FT_Byte* table = cmap->data;
439 FT_UInt32 result = 0;
440 FT_UInt32 charcode = *pcharcode + 1;
444 while ( charcode < 0x10000UL )
446 subheader = tt_cmap2_get_subheader( table, charcode );
449 FT_Byte* p = subheader;
450 FT_UInt start = TT_NEXT_USHORT( p );
451 FT_UInt count = TT_NEXT_USHORT( p );
452 FT_Int delta = TT_NEXT_SHORT ( p );
453 FT_UInt offset = TT_PEEK_USHORT( p );
454 FT_UInt char_lo = (FT_UInt)( charcode & 0xFF );
461 if ( char_lo < start )
467 pos = (FT_UInt)( char_lo - start );
469 p += offset + pos * 2;
470 charcode = ( charcode & -256 ) + char_lo;
472 for ( ; pos < count; pos++, charcode++ )
474 idx = TT_NEXT_USHORT( p );
478 gindex = ( idx + delta ) & 0xFFFFU;
488 /* jump to next sub-header, i.e. higher byte value */
490 charcode = ( charcode & -256 ) + 256;
500 FT_CALLBACK_TABLE_DEF
501 const TT_CMap_ClassRec tt_cmap2_class_rec =
504 sizeof( TT_CMapRec ),
506 (FT_CMap_InitFunc) tt_cmap_init,
507 (FT_CMap_DoneFunc) NULL,
508 (FT_CMap_CharIndexFunc)tt_cmap2_char_index,
509 (FT_CMap_CharNextFunc) tt_cmap2_char_next
512 (TT_CMap_ValidateFunc) tt_cmap2_validate
515 #endif /* TT_CONFIG_CMAP_FORMAT_2 */
518 /*************************************************************************/
519 /*************************************************************************/
521 /***** FORMAT 4 *****/
523 /*************************************************************************/
524 /*************************************************************************/
526 /*************************************************************************/
531 /* NAME OFFSET TYPE DESCRIPTION */
533 /* format 0 USHORT must be 4 */
534 /* length 2 USHORT table length */
536 /* language 4 USHORT Mac language code */
538 /* segCountX2 6 USHORT 2*NUM_SEGS */
539 /* searchRange 8 USHORT 2*(1 << LOG_SEGS) */
540 /* entrySelector 10 USHORT LOG_SEGS */
541 /* rangeShift 12 USHORT segCountX2 - */
544 /* endCount 14 USHORT[NUM_SEGS] end charcode for */
545 /* each segment; last */
548 /* pad 14+NUM_SEGS*2 USHORT padding */
550 /* startCount 16+NUM_SEGS*2 USHORT[NUM_SEGS] first charcode for */
553 /* idDelta 16+NUM_SEGS*4 SHORT[NUM_SEGS] delta for each */
555 /* idOffset 16+NUM_SEGS*6 SHORT[NUM_SEGS] range offset for */
556 /* each segment; can be */
559 /* glyphIds 16+NUM_SEGS*8 USHORT[] array of glyph id */
562 /* Character codes are modelled by a series of ordered (increasing) */
563 /* intervals called segments. Each segment has start and end codes, */
564 /* provided by the `startCount' and `endCount' arrays. Segments must */
565 /* not be overlapping and the last segment should always contain the */
566 /* `0xFFFF' endCount. */
568 /* The fields `searchRange', `entrySelector' and `rangeShift' are better */
569 /* ignored (they are traces of over-engineering in the TrueType */
570 /* specification). */
572 /* Each segment also has a signed `delta', as well as an optional offset */
573 /* within the `glyphIds' table. */
575 /* If a segment's idOffset is 0, the glyph index corresponding to any */
576 /* charcode within the segment is obtained by adding the value of */
577 /* `idDelta' directly to the charcode, modulo 65536. */
579 /* Otherwise, a glyph index is taken from the glyph ids sub-array for */
580 /* the segment, and the value of `idDelta' is added to it. */
583 /* Finally, note that certain fonts contain invalid charmaps that */
584 /* contain end=0xFFFF, start=0xFFFF, delta=0x0001, offset=0xFFFF at the */
585 /* of their charmaps (e.g. opens___.ttf which comes with OpenOffice.org) */
586 /* we need special code to deal with them correctly... */
589 #ifdef TT_CONFIG_CMAP_FORMAT_4
591 FT_CALLBACK_DEF( void )
592 tt_cmap4_validate( FT_Byte* table,
595 FT_Byte* p = table + 2; /* skip format */
596 FT_UInt length = TT_NEXT_USHORT( p );
597 FT_Byte *ends, *starts, *offsets, *deltas, *glyph_ids;
601 /* in certain fonts, the `length' field is invalid and goes */
602 /* out of bound. We try to correct this here... */
604 FT_INVALID_TOO_SHORT;
606 if ( table + length > valid->limit )
608 if ( valid->level >= FT_VALIDATE_TIGHT )
609 FT_INVALID_TOO_SHORT;
611 length = (FT_UInt)( valid->limit - table );
615 num_segs = TT_NEXT_USHORT( p ); /* read segCountX2 */
617 if ( valid->level >= FT_VALIDATE_PARANOID )
619 /* check that we have an even value here */
626 /* check the search parameters - even though we never use them */
628 if ( valid->level >= FT_VALIDATE_PARANOID )
630 /* check the values of 'searchRange', 'entrySelector', 'rangeShift' */
631 FT_UInt search_range = TT_NEXT_USHORT( p );
632 FT_UInt entry_selector = TT_NEXT_USHORT( p );
633 FT_UInt range_shift = TT_NEXT_USHORT( p );
636 if ( ( search_range | range_shift ) & 1 ) /* must be even values */
642 /* `search range' is the greatest power of 2 that is <= num_segs */
644 if ( search_range > num_segs ||
645 search_range * 2 < num_segs ||
646 search_range + range_shift != num_segs ||
647 search_range != ( 1U << entry_selector ) )
652 starts = table + 16 + num_segs * 2;
653 deltas = starts + num_segs * 2;
654 offsets = deltas + num_segs * 2;
655 glyph_ids = offsets + num_segs * 2;
657 if ( glyph_ids > table + length )
658 FT_INVALID_TOO_SHORT;
660 /* check last segment, its end count must be FFFF */
661 if ( valid->level >= FT_VALIDATE_PARANOID )
663 p = ends + ( num_segs - 1 ) * 2;
664 if ( TT_PEEK_USHORT( p ) != 0xFFFFU )
668 /* check that segments are sorted in increasing order and do not */
669 /* overlap; check also the offsets */
671 FT_UInt start, end, last = 0, offset, n;
675 for ( n = 0; n < num_segs; n++ )
678 start = TT_PEEK_USHORT( p );
680 end = TT_PEEK_USHORT( p );
682 delta = TT_PEEK_SHORT( p );
684 offset = TT_PEEK_USHORT( p );
689 /* this test should be performed at default validation level; */
690 /* unfortunately, some popular Asian fonts present overlapping */
691 /* ranges in their charmaps */
693 if ( valid->level >= FT_VALIDATE_TIGHT )
695 if ( n > 0 && start <= last )
699 if ( offset && offset != 0xFFFFU )
701 p += offset; /* start of glyph id array */
703 /* check that we point within the glyph ids table only */
704 if ( p < glyph_ids ||
705 p + ( end - start + 1 ) * 2 > table + length )
708 /* check glyph indices within the segment range */
709 if ( valid->level >= FT_VALIDATE_TIGHT )
714 for ( i = start; i < end; i++ )
716 idx = FT_NEXT_USHORT( p );
719 idx = (FT_UInt)( idx + delta ) & 0xFFFFU;
721 if ( idx >= TT_VALID_GLYPH_COUNT( valid ) )
727 else if ( offset == 0xFFFFU )
729 /* Some fonts (erroneously?) use a range offset of 0xFFFF */
730 /* to mean missing glyph in cmap table */
732 if ( valid->level >= FT_VALIDATE_PARANOID ||
734 !( start == 0xFFFFU && end == 0xFFFFU && delta == 0x1U ) )
744 FT_CALLBACK_DEF( FT_UInt )
745 tt_cmap4_char_index( TT_CMap cmap,
746 FT_UInt32 char_code )
748 FT_Byte* table = cmap->data;
752 if ( char_code < 0x10000UL )
754 FT_UInt idx, num_segs2;
756 FT_UInt code = (FT_UInt)char_code;
761 num_segs2 = TT_PEEK_USHORT( p ) & -2; /* be paranoid! */
764 /* Some fonts have more than 170 segments in their charmaps! */
765 /* We changed this function to use a more efficient binary */
766 /* search for improving performance */
769 FT_UInt max = num_segs2 >> 1;
770 FT_UInt mid, start, end, offset;
775 mid = ( min + max ) >> 1;
776 p = table + 14 + mid * 2;
777 end = TT_NEXT_USHORT( p );
779 start = TT_PEEK_USHORT( p);
784 else if ( code > end )
789 /* we found the segment */
793 delta = TT_PEEK_SHORT( p );
796 offset = TT_PEEK_USHORT( p );
798 if ( offset == 0xFFFFU )
803 p += offset + 2 * ( idx - start );
804 idx = TT_PEEK_USHORT( p );
808 result = (FT_UInt)( idx + delta ) & 0xFFFFU;
815 #else /* 0 - old code */
822 p = table + 14; /* ends table */
823 q = table + 16 + num_segs2; /* starts table */
826 for ( n = 0; n < num_segs2; n += 2 )
828 FT_UInt end = TT_NEXT_USHORT( p );
829 FT_UInt start = TT_NEXT_USHORT( q );
840 p = q + num_segs2 - 2;
841 delta = TT_PEEK_SHORT( p );
843 offset = TT_PEEK_USHORT( p );
845 if ( offset == 0xFFFFU )
850 p += offset + 2 * ( idx - start );
851 idx = TT_PEEK_USHORT( p );
855 result = (FT_UInt)( idx + delta ) & 0xFFFFU;
869 FT_CALLBACK_DEF( FT_UInt )
870 tt_cmap4_char_next( TT_CMap cmap,
871 FT_UInt32 *pchar_code )
873 FT_Byte* table = cmap->data;
874 FT_UInt32 result = 0;
875 FT_UInt32 char_code = *pchar_code + 1;
879 FT_UInt code, num_segs2;
882 if ( char_code >= 0x10000UL )
885 code = (FT_UInt)char_code;
887 num_segs2 = TT_PEEK_USHORT(p) & -2; /* ensure even-ness */
895 p = table + 14; /* ends table */
896 q = table + 16 + num_segs2; /* starts table */
898 for ( n = 0; n < num_segs2; n += 2 )
900 FT_UInt end = TT_NEXT_USHORT( p );
901 FT_UInt start = TT_NEXT_USHORT( q );
909 p = q + num_segs2 - 2;
910 delta = TT_PEEK_SHORT( p );
912 offset = TT_PEEK_USHORT( p );
914 if ( offset != 0 && offset != 0xFFFFU )
916 /* parse the glyph ids array for non-0 index */
917 p += offset + ( code - start ) * 2;
918 while ( code <= end )
920 gindex = TT_NEXT_USHORT( p );
923 gindex = (FT_UInt)( gindex + delta ) & 0xFFFFU;
930 else if ( offset == 0xFFFFU )
932 /* an offset of 0xFFFF means an empty glyph in certain fonts! */
937 gindex = (FT_UInt)( code + delta ) & 0xFFFFU;
947 /* loop to next trial charcode */
948 if ( code >= 0xFFFFU )
953 return (FT_UInt)result;
956 *pchar_code = result;
961 FT_CALLBACK_TABLE_DEF
962 const TT_CMap_ClassRec tt_cmap4_class_rec =
965 sizeof ( TT_CMapRec ),
967 (FT_CMap_InitFunc) tt_cmap_init,
968 (FT_CMap_DoneFunc) NULL,
969 (FT_CMap_CharIndexFunc)tt_cmap4_char_index,
970 (FT_CMap_CharNextFunc) tt_cmap4_char_next
973 (TT_CMap_ValidateFunc) tt_cmap4_validate
976 #endif /* TT_CONFIG_CMAP_FORMAT_4 */
979 /*************************************************************************/
980 /*************************************************************************/
982 /***** FORMAT 6 *****/
984 /*************************************************************************/
985 /*************************************************************************/
987 /*************************************************************************/
992 /* NAME OFFSET TYPE DESCRIPTION */
994 /* format 0 USHORT must be 4 */
995 /* length 2 USHORT table length in bytes */
996 /* language 4 USHORT Mac language code */
998 /* first 6 USHORT first segment code */
999 /* count 8 USHORT segment size in chars */
1000 /* glyphIds 10 USHORT[count] glyph ids */
1002 /* A very simplified segment mapping. */
1005 #ifdef TT_CONFIG_CMAP_FORMAT_6
1007 FT_CALLBACK_DEF( void )
1008 tt_cmap6_validate( FT_Byte* table,
1009 FT_Validator valid )
1012 FT_UInt length, start, count;
1015 if ( table + 10 > valid->limit )
1016 FT_INVALID_TOO_SHORT;
1019 length = TT_NEXT_USHORT( p );
1021 p = table + 6; /* skip language */
1022 start = TT_NEXT_USHORT( p );
1023 count = TT_NEXT_USHORT( p );
1025 if ( table + length > valid->limit || length < 10 + count * 2 )
1026 FT_INVALID_TOO_SHORT;
1028 /* check glyph indices */
1029 if ( valid->level >= FT_VALIDATE_TIGHT )
1034 for ( ; count > 0; count-- )
1036 gindex = TT_NEXT_USHORT( p );
1037 if ( gindex >= TT_VALID_GLYPH_COUNT( valid ) )
1038 FT_INVALID_GLYPH_ID;
1044 FT_CALLBACK_DEF( FT_UInt )
1045 tt_cmap6_char_index( TT_CMap cmap,
1046 FT_UInt32 char_code )
1048 FT_Byte* table = cmap->data;
1050 FT_Byte* p = table + 6;
1051 FT_UInt start = TT_NEXT_USHORT( p );
1052 FT_UInt count = TT_NEXT_USHORT( p );
1053 FT_UInt idx = (FT_UInt)( char_code - start );
1059 result = TT_PEEK_USHORT( p );
1065 FT_CALLBACK_DEF( FT_UInt )
1066 tt_cmap6_char_next( TT_CMap cmap,
1067 FT_UInt32 *pchar_code )
1069 FT_Byte* table = cmap->data;
1070 FT_UInt32 result = 0;
1071 FT_UInt32 char_code = *pchar_code + 1;
1074 FT_Byte* p = table + 6;
1075 FT_UInt start = TT_NEXT_USHORT( p );
1076 FT_UInt count = TT_NEXT_USHORT( p );
1080 if ( char_code >= 0x10000UL )
1083 if ( char_code < start )
1086 idx = (FT_UInt)( char_code - start );
1089 for ( ; idx < count; idx++ )
1091 gindex = TT_NEXT_USHORT( p );
1101 *pchar_code = result;
1106 FT_CALLBACK_TABLE_DEF
1107 const TT_CMap_ClassRec tt_cmap6_class_rec =
1110 sizeof ( TT_CMapRec ),
1112 (FT_CMap_InitFunc) tt_cmap_init,
1113 (FT_CMap_DoneFunc) NULL,
1114 (FT_CMap_CharIndexFunc)tt_cmap6_char_index,
1115 (FT_CMap_CharNextFunc) tt_cmap6_char_next
1118 (TT_CMap_ValidateFunc) tt_cmap6_validate
1121 #endif /* TT_CONFIG_CMAP_FORMAT_6 */
1124 /*************************************************************************/
1125 /*************************************************************************/
1127 /***** FORMAT 8 *****/
1129 /***** It's hard to completely understand what the OpenType spec *****/
1130 /***** says about this format, but here is my conclusion. *****/
1132 /***** The purpose of this format is to easily map UTF-16 text to *****/
1133 /***** glyph indices. Basically, the `char_code' must be in one of *****/
1134 /***** the following formats: *****/
1136 /***** - A 16-bit value that isn't part of the Unicode Surrogates *****/
1137 /***** Area (i.e. U+D800-U+DFFF). *****/
1139 /***** - A 32-bit value, made of two surrogate values, i.e.. if *****/
1140 /***** `char_code = (char_hi << 16) | char_lo', then both *****/
1141 /***** `char_hi' and `char_lo' must be in the Surrogates Area. *****/
1144 /***** The 'is32' table embedded in the charmap indicates whether a *****/
1145 /***** given 16-bit value is in the surrogates area or not. *****/
1147 /***** So, for any given `char_code', we can assert the following: *****/
1149 /***** If `char_hi == 0' then we must have `is32[char_lo] == 0'. *****/
1151 /***** If `char_hi != 0' then we must have both *****/
1152 /***** `is32[char_hi] != 0' and `is32[char_lo] != 0'. *****/
1154 /*************************************************************************/
1155 /*************************************************************************/
1157 /*************************************************************************/
1159 /* TABLE OVERVIEW */
1160 /* -------------- */
1162 /* NAME OFFSET TYPE DESCRIPTION */
1164 /* format 0 USHORT must be 8 */
1165 /* reseved 2 USHORT reserved */
1166 /* length 4 ULONG length in bytes */
1167 /* language 8 ULONG Mac language code */
1168 /* is32 12 BYTE[8192] 32-bitness bitmap */
1169 /* count 8204 ULONG number of groups */
1171 /* This header is followed by 'count' groups of the following format: */
1173 /* start 0 ULONG first charcode */
1174 /* end 4 ULONG last charcode */
1175 /* startId 8 ULONG start glyph id for the group */
1178 #ifdef TT_CONFIG_CMAP_FORMAT_8
1180 FT_CALLBACK_DEF( void )
1181 tt_cmap8_validate( FT_Byte* table,
1182 FT_Validator valid )
1184 FT_Byte* p = table + 4;
1187 FT_UInt32 num_groups;
1190 if ( table + 16 + 8192 > valid->limit )
1191 FT_INVALID_TOO_SHORT;
1193 length = TT_NEXT_ULONG( p );
1194 if ( table + length > valid->limit || length < 8208 )
1195 FT_INVALID_TOO_SHORT;
1198 p = is32 + 8192; /* skip `is32' array */
1199 num_groups = TT_NEXT_ULONG( p );
1201 if ( p + num_groups * 12 > valid->limit )
1202 FT_INVALID_TOO_SHORT;
1204 /* check groups, they must be in increasing order */
1206 FT_UInt32 n, start, end, start_id, count, last = 0;
1209 for ( n = 0; n < num_groups; n++ )
1214 start = TT_NEXT_ULONG( p );
1215 end = TT_NEXT_ULONG( p );
1216 start_id = TT_NEXT_ULONG( p );
1221 if ( n > 0 && start <= last )
1224 if ( valid->level >= FT_VALIDATE_TIGHT )
1226 if ( start_id + end - start >= TT_VALID_GLYPH_COUNT( valid ) )
1227 FT_INVALID_GLYPH_ID;
1229 count = (FT_UInt32)( end - start + 1 );
1231 if ( start & ~0xFFFFU )
1233 /* start_hi != 0; check that is32[i] is 1 for each i in */
1234 /* the `hi' and `lo' of the range [start..end] */
1235 for ( ; count > 0; count--, start++ )
1237 hi = (FT_UInt)( start >> 16 );
1238 lo = (FT_UInt)( start & 0xFFFFU );
1240 if ( (is32[hi >> 3] & ( 0x80 >> ( hi & 7 ) ) ) == 0 )
1243 if ( (is32[lo >> 3] & ( 0x80 >> ( lo & 7 ) ) ) == 0 )
1249 /* start_hi == 0; check that is32[i] is 0 for each i in */
1250 /* the range [start..end] */
1252 /* end_hi cannot be != 0! */
1253 if ( end & ~0xFFFFU )
1256 for ( ; count > 0; count--, start++ )
1258 lo = (FT_UInt)( start & 0xFFFFU );
1260 if ( (is32[lo >> 3] & ( 0x80 >> ( lo & 7 ) ) ) != 0 )
1272 FT_CALLBACK_DEF( FT_UInt )
1273 tt_cmap8_char_index( TT_CMap cmap,
1274 FT_UInt32 char_code )
1276 FT_Byte* table = cmap->data;
1278 FT_Byte* p = table + 8204;
1279 FT_UInt32 num_groups = TT_NEXT_ULONG( p );
1280 FT_UInt32 start, end, start_id;
1283 for ( ; num_groups > 0; num_groups-- )
1285 start = TT_NEXT_ULONG( p );
1286 end = TT_NEXT_ULONG( p );
1287 start_id = TT_NEXT_ULONG( p );
1289 if ( char_code < start )
1292 if ( char_code <= end )
1294 result = (FT_UInt)( start_id + char_code - start );
1302 FT_CALLBACK_DEF( FT_UInt )
1303 tt_cmap8_char_next( TT_CMap cmap,
1304 FT_UInt32 *pchar_code )
1306 FT_UInt32 result = 0;
1307 FT_UInt32 char_code = *pchar_code + 1;
1309 FT_Byte* table = cmap->data;
1310 FT_Byte* p = table + 8204;
1311 FT_UInt32 num_groups = TT_NEXT_ULONG( p );
1312 FT_UInt32 start, end, start_id;
1317 for ( ; num_groups > 0; num_groups-- )
1319 start = TT_NEXT_ULONG( p );
1320 end = TT_NEXT_ULONG( p );
1321 start_id = TT_NEXT_ULONG( p );
1323 if ( char_code < start )
1326 if ( char_code <= end )
1328 gindex = (FT_UInt)( char_code - start + start_id );
1338 *pchar_code = result;
1343 FT_CALLBACK_TABLE_DEF
1344 const TT_CMap_ClassRec tt_cmap8_class_rec =
1347 sizeof ( TT_CMapRec ),
1349 (FT_CMap_InitFunc) tt_cmap_init,
1350 (FT_CMap_DoneFunc) NULL,
1351 (FT_CMap_CharIndexFunc)tt_cmap8_char_index,
1352 (FT_CMap_CharNextFunc) tt_cmap8_char_next
1355 (TT_CMap_ValidateFunc) tt_cmap8_validate
1358 #endif /* TT_CONFIG_CMAP_FORMAT_8 */
1361 /*************************************************************************/
1362 /*************************************************************************/
1364 /***** FORMAT 10 *****/
1366 /*************************************************************************/
1367 /*************************************************************************/
1369 /*************************************************************************/
1371 /* TABLE OVERVIEW */
1372 /* -------------- */
1374 /* NAME OFFSET TYPE DESCRIPTION */
1376 /* format 0 USHORT must be 10 */
1377 /* reserved 2 USHORT reserved */
1378 /* length 4 ULONG length in bytes */
1379 /* language 8 ULONG Mac language code */
1381 /* start 12 ULONG first char in range */
1382 /* count 16 ULONG number of chars in range */
1383 /* glyphIds 20 USHORT[count] glyph indices covered */
1386 #ifdef TT_CONFIG_CMAP_FORMAT_10
1388 FT_CALLBACK_DEF( void )
1389 tt_cmap10_validate( FT_Byte* table,
1390 FT_Validator valid )
1392 FT_Byte* p = table + 4;
1393 FT_ULong length, start, count;
1396 if ( table + 20 > valid->limit )
1397 FT_INVALID_TOO_SHORT;
1399 length = TT_NEXT_ULONG( p );
1401 start = TT_NEXT_ULONG( p );
1402 count = TT_NEXT_ULONG( p );
1404 if ( table + length > valid->limit || length < 20 + count * 2 )
1405 FT_INVALID_TOO_SHORT;
1407 /* check glyph indices */
1408 if ( valid->level >= FT_VALIDATE_TIGHT )
1413 for ( ; count > 0; count-- )
1415 gindex = TT_NEXT_USHORT( p );
1416 if ( gindex >= TT_VALID_GLYPH_COUNT( valid ) )
1417 FT_INVALID_GLYPH_ID;
1423 FT_CALLBACK_DEF( FT_UInt )
1424 tt_cmap10_char_index( TT_CMap cmap,
1425 FT_UInt32 char_code )
1427 FT_Byte* table = cmap->data;
1429 FT_Byte* p = table + 12;
1430 FT_UInt32 start = TT_NEXT_ULONG( p );
1431 FT_UInt32 count = TT_NEXT_ULONG( p );
1432 FT_UInt32 idx = (FT_ULong)( char_code - start );
1438 result = TT_PEEK_USHORT( p );
1444 FT_CALLBACK_DEF( FT_UInt )
1445 tt_cmap10_char_next( TT_CMap cmap,
1446 FT_UInt32 *pchar_code )
1448 FT_Byte* table = cmap->data;
1449 FT_UInt32 result = 0;
1450 FT_UInt32 char_code = *pchar_code + 1;
1452 FT_Byte* p = table + 12;
1453 FT_UInt32 start = TT_NEXT_ULONG( p );
1454 FT_UInt32 count = TT_NEXT_ULONG( p );
1458 if ( char_code < start )
1461 idx = (FT_UInt32)( char_code - start );
1464 for ( ; idx < count; idx++ )
1466 gindex = TT_NEXT_USHORT( p );
1475 *pchar_code = char_code;
1480 FT_CALLBACK_TABLE_DEF
1481 const TT_CMap_ClassRec tt_cmap10_class_rec =
1484 sizeof ( TT_CMapRec ),
1486 (FT_CMap_InitFunc) tt_cmap_init,
1487 (FT_CMap_DoneFunc) NULL,
1488 (FT_CMap_CharIndexFunc)tt_cmap10_char_index,
1489 (FT_CMap_CharNextFunc) tt_cmap10_char_next
1492 (TT_CMap_ValidateFunc) tt_cmap10_validate
1495 #endif /* TT_CONFIG_CMAP_FORMAT_10 */
1498 /*************************************************************************/
1499 /*************************************************************************/
1501 /***** FORMAT 12 *****/
1503 /*************************************************************************/
1504 /*************************************************************************/
1506 /*************************************************************************/
1508 /* TABLE OVERVIEW */
1509 /* -------------- */
1511 /* NAME OFFSET TYPE DESCRIPTION */
1513 /* format 0 USHORT must be 12 */
1514 /* reserved 2 USHORT reserved */
1515 /* length 4 ULONG length in bytes */
1516 /* language 8 ULONG Mac language code */
1517 /* count 12 ULONG number of groups */
1520 /* This header is followed by `count' groups of the following format: */
1522 /* start 0 ULONG first charcode */
1523 /* end 4 ULONG last charcode */
1524 /* startId 8 ULONG start glyph id for the group */
1527 #ifdef TT_CONFIG_CMAP_FORMAT_12
1529 FT_CALLBACK_DEF( void )
1530 tt_cmap12_validate( FT_Byte* table,
1531 FT_Validator valid )
1535 FT_ULong num_groups;
1538 if ( table + 16 > valid->limit )
1539 FT_INVALID_TOO_SHORT;
1542 length = TT_NEXT_ULONG( p );
1545 num_groups = TT_NEXT_ULONG( p );
1547 if ( table + length > valid->limit || length < 16 + 12 * num_groups )
1548 FT_INVALID_TOO_SHORT;
1550 /* check groups, they must be in increasing order */
1552 FT_ULong n, start, end, start_id, last = 0;
1555 for ( n = 0; n < num_groups; n++ )
1557 start = TT_NEXT_ULONG( p );
1558 end = TT_NEXT_ULONG( p );
1559 start_id = TT_NEXT_ULONG( p );
1564 if ( n > 0 && start <= last )
1567 if ( valid->level >= FT_VALIDATE_TIGHT )
1569 if ( start_id + end - start >= TT_VALID_GLYPH_COUNT( valid ) )
1570 FT_INVALID_GLYPH_ID;
1579 FT_CALLBACK_DEF( FT_UInt )
1580 tt_cmap12_char_index( TT_CMap cmap,
1581 FT_UInt32 char_code )
1584 FT_Byte* table = cmap->data;
1585 FT_Byte* p = table + 12;
1586 FT_UInt32 num_groups = TT_NEXT_ULONG( p );
1587 FT_UInt32 start, end, start_id;
1590 for ( ; num_groups > 0; num_groups-- )
1592 start = TT_NEXT_ULONG( p );
1593 end = TT_NEXT_ULONG( p );
1594 start_id = TT_NEXT_ULONG( p );
1596 if ( char_code < start )
1599 if ( char_code <= end )
1601 result = (FT_UInt)( start_id + char_code - start );
1609 FT_CALLBACK_DEF( FT_UInt )
1610 tt_cmap12_char_next( TT_CMap cmap,
1611 FT_UInt32 *pchar_code )
1613 FT_Byte* table = cmap->data;
1614 FT_UInt32 result = 0;
1615 FT_UInt32 char_code = *pchar_code + 1;
1617 FT_Byte* p = table + 12;
1618 FT_UInt32 num_groups = TT_NEXT_ULONG( p );
1619 FT_UInt32 start, end, start_id;
1624 for ( ; num_groups > 0; num_groups-- )
1626 start = TT_NEXT_ULONG( p );
1627 end = TT_NEXT_ULONG( p );
1628 start_id = TT_NEXT_ULONG( p );
1630 if ( char_code < start )
1633 if ( char_code <= end )
1635 gindex = (FT_UInt)(char_code - start + start_id);
1645 *pchar_code = result;
1650 FT_CALLBACK_TABLE_DEF
1651 const TT_CMap_ClassRec tt_cmap12_class_rec =
1654 sizeof ( TT_CMapRec ),
1656 (FT_CMap_InitFunc) tt_cmap_init,
1657 (FT_CMap_DoneFunc) NULL,
1658 (FT_CMap_CharIndexFunc)tt_cmap12_char_index,
1659 (FT_CMap_CharNextFunc) tt_cmap12_char_next
1662 (TT_CMap_ValidateFunc) tt_cmap12_validate
1666 #endif /* TT_CONFIG_CMAP_FORMAT_12 */
1669 static const TT_CMap_Class tt_cmap_classes[] =
1671 #ifdef TT_CONFIG_CMAP_FORMAT_0
1672 &tt_cmap0_class_rec,
1675 #ifdef TT_CONFIG_CMAP_FORMAT_2
1676 &tt_cmap2_class_rec,
1679 #ifdef TT_CONFIG_CMAP_FORMAT_4
1680 &tt_cmap4_class_rec,
1683 #ifdef TT_CONFIG_CMAP_FORMAT_6
1684 &tt_cmap6_class_rec,
1687 #ifdef TT_CONFIG_CMAP_FORMAT_8
1688 &tt_cmap8_class_rec,
1691 #ifdef TT_CONFIG_CMAP_FORMAT_10
1692 &tt_cmap10_class_rec,
1695 #ifdef TT_CONFIG_CMAP_FORMAT_12
1696 &tt_cmap12_class_rec,
1703 /* parse the `cmap' table and build the corresponding TT_CMap objects */
1704 /* in the current face */
1706 FT_LOCAL_DEF( FT_Error )
1707 tt_face_build_cmaps( TT_Face face )
1709 FT_Byte* table = face->cmap_table;
1710 FT_Byte* limit = table + face->cmap_size;
1711 FT_UInt volatile num_cmaps;
1712 FT_Byte* volatile p = table;
1715 if ( p + 4 > limit )
1716 return FT_Err_Invalid_Table;
1718 /* only recognize format 0 */
1719 if ( TT_NEXT_USHORT( p ) != 0 )
1722 FT_ERROR(( "tt_face_build_cmaps: unsupported `cmap' table format = %d\n",
1723 TT_PEEK_USHORT( p ) ));
1724 return FT_Err_Invalid_Table;
1727 num_cmaps = TT_NEXT_USHORT( p );
1729 for ( ; num_cmaps > 0 && p + 8 <= limit; num_cmaps-- )
1731 FT_CharMapRec charmap;
1735 charmap.platform_id = TT_NEXT_USHORT( p );
1736 charmap.encoding_id = TT_NEXT_USHORT( p );
1737 charmap.face = FT_FACE( face );
1738 charmap.encoding = FT_ENCODING_NONE; /* will be filled later */
1739 offset = TT_NEXT_ULONG( p );
1741 if ( offset && table + offset + 2 < limit )
1743 FT_Byte* cmap = table + offset;
1744 FT_UInt format = TT_PEEK_USHORT( cmap );
1745 const TT_CMap_Class* volatile pclazz = tt_cmap_classes;
1746 TT_CMap_Class clazz;
1749 for ( ; *pclazz; pclazz++ )
1752 if ( clazz->format == format )
1754 volatile TT_ValidatorRec valid;
1757 ft_validator_init( FT_VALIDATOR( &valid ), cmap, limit,
1758 FT_VALIDATE_DEFAULT );
1760 valid.num_glyphs = (FT_UInt)face->root.num_glyphs;
1762 if ( ft_setjmp( FT_VALIDATOR( &valid )->jump_buffer ) == 0 )
1764 /* validate this cmap sub-table */
1765 clazz->validate( cmap, FT_VALIDATOR( &valid ) );
1768 if ( valid.validator.error == 0 )
1769 (void)FT_CMap_New( (FT_CMap_Class)clazz, cmap, &charmap, NULL );
1772 FT_ERROR(( "tt_face_build_cmaps:" ));
1773 FT_ERROR(( " broken cmap sub-table ignored!\n" ));