1 /***************************************************************************/
5 /* TrueType character mapping table (cmap) support (body). */
7 /* Copyright 1996-2001, 2002 by */
8 /* David Turner, Robert Wilhelm, and Werner Lemberg. */
10 /* This file is part of the FreeType project, and may only be used, */
11 /* modified, and distributed under the terms of the FreeType project */
12 /* license, LICENSE.TXT. By continuing to use, modify, or distribute */
13 /* this file you indicate that you have read the license and */
14 /* understand and accept it fully. */
16 /***************************************************************************/
20 #include FT_INTERNAL_DEBUG_H
27 /*************************************************************************/
29 /* The macro FT_COMPONENT is used in trace mode. It is an implicit */
30 /* parameter of the FT_TRACE() and FT_ERROR() macros, used to print/log */
31 /* messages during execution. */
34 #define FT_COMPONENT trace_ttcmap
37 FT_CALLBACK_DEF( FT_UInt )
38 code_to_index0( TT_CMapTable charmap,
41 FT_CALLBACK_DEF( FT_ULong )
42 code_to_next0( TT_CMapTable charmap,
45 FT_CALLBACK_DEF( FT_UInt )
46 code_to_index2( TT_CMapTable charmap,
49 FT_CALLBACK_DEF( FT_ULong )
50 code_to_next2( TT_CMapTable charmap,
53 FT_CALLBACK_DEF( FT_UInt )
54 code_to_index4( TT_CMapTable charmap,
57 FT_CALLBACK_DEF( FT_ULong )
58 code_to_next4( TT_CMapTable charmap,
61 FT_CALLBACK_DEF( FT_UInt )
62 code_to_index6( TT_CMapTable charmap,
65 FT_CALLBACK_DEF( FT_ULong )
66 code_to_next6( TT_CMapTable charmap,
69 FT_CALLBACK_DEF( FT_UInt )
70 code_to_index8_12( TT_CMapTable charmap,
73 FT_CALLBACK_DEF( FT_ULong )
74 code_to_next8_12( TT_CMapTable charmap,
77 FT_CALLBACK_DEF( FT_UInt )
78 code_to_index10( TT_CMapTable charmap,
81 FT_CALLBACK_DEF( FT_ULong )
82 code_to_next10( TT_CMapTable charmap,
86 /*************************************************************************/
89 /* tt_face_load_charmap */
92 /* Loads a given TrueType character map into memory. */
95 /* face :: A handle to the parent face object. */
97 /* stream :: A handle to the current stream object. */
100 /* table :: A pointer to a cmap object. */
103 /* FreeType error code. 0 means success. */
106 /* The function assumes that the stream is already in use (i.e., */
107 /* opened). In case of error, all partially allocated tables are */
110 FT_LOCAL_DEF( FT_Error )
111 tt_face_load_charmap( TT_Face face,
117 FT_UShort num_SH, num_Seg, i;
126 TT_CMap8_12 cmap8_12;
129 TT_CMap2SubHeader cmap2sub;
130 TT_CMap4Segment segments;
137 memory = stream->memory;
139 if ( FT_STREAM_SEEK( cmap->offset ) )
142 switch ( cmap->format )
145 cmap0 = &cmap->c.cmap0;
147 if ( FT_READ_USHORT( cmap0->language ) ||
148 FT_ALLOC( cmap0->glyphIdArray, 256L ) ||
149 FT_STREAM_READ( cmap0->glyphIdArray, 256L ) )
152 cmap->get_index = code_to_index0;
153 cmap->get_next_char = code_to_next0;
158 cmap2 = &cmap->c.cmap2;
160 /* allocate subheader keys */
162 if ( FT_NEW_ARRAY( cmap2->subHeaderKeys, 256 ) ||
163 FT_FRAME_ENTER( 2L + 512L ) )
166 cmap2->language = FT_GET_USHORT();
168 for ( i = 0; i < 256; i++ )
170 u = (FT_UShort)( FT_GET_USHORT() / 8 );
171 cmap2->subHeaderKeys[i] = u;
179 /* load subheaders */
181 cmap2->numGlyphId = l = (FT_UShort)(
182 ( ( cmap->length - 2L * ( 256 + 3 ) - num_SH * 8L ) & 0xFFFFU ) / 2 );
184 if ( FT_NEW_ARRAY( cmap2->subHeaders, num_SH + 1 ) ||
185 FT_FRAME_ENTER( ( num_SH + 1 ) * 8L ) )
187 FT_FREE( cmap2->subHeaderKeys );
191 cmap2sub = cmap2->subHeaders;
193 for ( i = 0; i <= num_SH; i++ )
195 cmap2sub->firstCode = FT_GET_USHORT();
196 cmap2sub->entryCount = FT_GET_USHORT();
197 cmap2sub->idDelta = FT_GET_SHORT();
198 /* we apply the location offset immediately */
199 cmap2sub->idRangeOffset = (FT_UShort)(
200 FT_GET_USHORT() - ( num_SH - i ) * 8 - 2 );
209 if ( FT_NEW_ARRAY( cmap2->glyphIdArray, l ) ||
210 FT_FRAME_ENTER( l * 2L ) )
212 FT_FREE( cmap2->subHeaders );
213 FT_FREE( cmap2->subHeaderKeys );
217 for ( i = 0; i < l; i++ )
218 cmap2->glyphIdArray[i] = FT_GET_USHORT();
222 cmap->get_index = code_to_index2;
223 cmap->get_next_char = code_to_next2;
227 cmap4 = &cmap->c.cmap4;
231 if ( FT_FRAME_ENTER( 10L ) )
234 cmap4->language = FT_GET_USHORT();
235 cmap4->segCountX2 = FT_GET_USHORT();
236 cmap4->searchRange = FT_GET_USHORT();
237 cmap4->entrySelector = FT_GET_USHORT();
238 cmap4->rangeShift = FT_GET_USHORT();
240 num_Seg = (FT_UShort)( cmap4->segCountX2 / 2 );
246 if ( FT_NEW_ARRAY( cmap4->segments, num_Seg ) ||
247 FT_FRAME_ENTER( ( num_Seg * 4 + 1 ) * 2L ) )
250 segments = cmap4->segments;
252 for ( i = 0; i < num_Seg; i++ )
253 segments[i].endCount = FT_GET_USHORT();
255 (void)FT_GET_USHORT();
257 for ( i = 0; i < num_Seg; i++ )
258 segments[i].startCount = FT_GET_USHORT();
260 for ( i = 0; i < num_Seg; i++ )
261 segments[i].idDelta = FT_GET_SHORT();
263 for ( i = 0; i < num_Seg; i++ )
264 segments[i].idRangeOffset = FT_GET_USHORT();
268 cmap4->numGlyphId = l = (FT_UShort)(
269 ( ( cmap->length - ( 16L + 8L * num_Seg ) ) & 0xFFFFU ) / 2 );
273 if ( FT_NEW_ARRAY( cmap4->glyphIdArray, l ) ||
274 FT_FRAME_ENTER( l * 2L ) )
276 FT_FREE( cmap4->segments );
280 for ( i = 0; i < l; i++ )
281 cmap4->glyphIdArray[i] = FT_GET_USHORT();
285 cmap4->last_segment = cmap4->segments;
287 cmap->get_index = code_to_index4;
288 cmap->get_next_char = code_to_next4;
292 cmap6 = &cmap->c.cmap6;
294 if ( FT_FRAME_ENTER( 6L ) )
297 cmap6->language = FT_GET_USHORT();
298 cmap6->firstCode = FT_GET_USHORT();
299 cmap6->entryCount = FT_GET_USHORT();
303 l = cmap6->entryCount;
305 if ( FT_NEW_ARRAY( cmap6->glyphIdArray, l ) ||
306 FT_FRAME_ENTER( l * 2L ) )
309 for ( i = 0; i < l; i++ )
310 cmap6->glyphIdArray[i] = FT_GET_USHORT();
313 cmap->get_index = code_to_index6;
314 cmap->get_next_char = code_to_next6;
319 cmap8_12 = &cmap->c.cmap8_12;
321 if ( FT_FRAME_ENTER( 8L ) )
324 cmap->length = FT_GET_ULONG();
325 cmap8_12->language = FT_GET_ULONG();
329 if ( cmap->format == 8 )
330 if ( FT_STREAM_SKIP( 8192L ) )
333 if ( FT_READ_ULONG( cmap8_12->nGroups ) )
336 n = cmap8_12->nGroups;
338 if ( FT_NEW_ARRAY( cmap8_12->groups, n ) ||
339 FT_FRAME_ENTER( n * 3 * 4L ) )
342 groups = cmap8_12->groups;
344 for ( j = 0; j < n; j++ )
346 groups[j].startCharCode = FT_GET_ULONG();
347 groups[j].endCharCode = FT_GET_ULONG();
348 groups[j].startGlyphID = FT_GET_ULONG();
353 cmap8_12->last_group = cmap8_12->groups;
355 cmap->get_index = code_to_index8_12;
356 cmap->get_next_char = code_to_next8_12;
360 cmap10 = &cmap->c.cmap10;
362 if ( FT_FRAME_ENTER( 16L ) )
365 cmap->length = FT_GET_ULONG();
366 cmap10->language = FT_GET_ULONG();
367 cmap10->startCharCode = FT_GET_ULONG();
368 cmap10->numChars = FT_GET_ULONG();
372 n = cmap10->numChars;
374 if ( FT_NEW_ARRAY( cmap10->glyphs, n ) ||
375 FT_FRAME_ENTER( n * 2L ) )
378 for ( j = 0; j < n; j++ )
379 cmap10->glyphs[j] = FT_GET_USHORT();
382 cmap->get_index = code_to_index10;
383 cmap->get_next_char = code_to_next10;
386 default: /* corrupt character mapping table */
387 return SFNT_Err_Invalid_CharMap_Format;
394 tt_face_free_charmap( face, cmap );
399 /*************************************************************************/
402 /* tt_face_free_charmap */
405 /* Destroys a character mapping table. */
408 /* face :: A handle to the parent face object. */
410 /* cmap :: A handle to a cmap object. */
413 /* FreeType error code. 0 means success. */
415 FT_LOCAL_DEF( FT_Error )
416 tt_face_free_charmap( TT_Face face,
425 memory = face->root.driver->root.memory;
427 switch ( cmap->format )
430 FT_FREE( cmap->c.cmap0.glyphIdArray );
434 FT_FREE( cmap->c.cmap2.subHeaderKeys );
435 FT_FREE( cmap->c.cmap2.subHeaders );
436 FT_FREE( cmap->c.cmap2.glyphIdArray );
440 FT_FREE( cmap->c.cmap4.segments );
441 FT_FREE( cmap->c.cmap4.glyphIdArray );
442 cmap->c.cmap4.segCountX2 = 0;
446 FT_FREE( cmap->c.cmap6.glyphIdArray );
447 cmap->c.cmap6.entryCount = 0;
452 FT_FREE( cmap->c.cmap8_12.groups );
453 cmap->c.cmap8_12.nGroups = 0;
457 FT_FREE( cmap->c.cmap10.glyphs );
458 cmap->c.cmap10.numChars = 0;
462 /* invalid table format, do nothing */
466 cmap->loaded = FALSE;
471 /*************************************************************************/
477 /* Converts the character code into a glyph index. Uses format 0. */
478 /* `charCode' must be in the range 0x00-0xFF (otherwise 0 is */
482 /* charCode :: The wanted character code. */
484 /* cmap0 :: A pointer to a cmap table in format 0. */
487 /* Glyph index into the glyphs array. 0 if the glyph does not exist. */
489 FT_CALLBACK_DEF( FT_UInt )
490 code_to_index0( TT_CMapTable cmap,
493 TT_CMap0 cmap0 = &cmap->c.cmap0;
496 return ( charCode <= 0xFF ? cmap0->glyphIdArray[charCode] : 0 );
500 /*************************************************************************/
506 /* Finds the next encoded character after the given one. Uses */
507 /* format 0. `charCode' must be in the range 0x00-0xFF (otherwise 0 */
511 /* charCode :: The wanted character code. */
513 /* cmap0 :: A pointer to a cmap table in format 0. */
516 /* Next char code. 0 if no higher one is encoded. */
518 FT_CALLBACK_DEF( FT_ULong )
519 code_to_next0( TT_CMapTable cmap,
522 TT_CMap0 cmap0 = &cmap->c.cmap0;
525 while ( ++charCode <= 0xFF )
526 if ( cmap0->glyphIdArray[charCode] )
532 /*************************************************************************/
538 /* Converts the character code into a glyph index. Uses format 2. */
541 /* charCode :: The wanted character code. */
543 /* cmap2 :: A pointer to a cmap table in format 2. */
546 /* Glyph index into the glyphs array. 0 if the glyph does not exist. */
548 FT_CALLBACK_DEF( FT_UInt )
549 code_to_index2( TT_CMapTable cmap,
552 FT_UInt result, index1, offset;
555 TT_CMap2SubHeader sh2;
559 cmap2 = &cmap->c.cmap2;
561 char_lo = (FT_UInt)( charCode & 0xFF );
562 char_hi = charCode >> 8;
566 /* an 8-bit character code -- we use the subHeader 0 in this case */
567 /* to test whether the character code is in the charmap */
568 index1 = cmap2->subHeaderKeys[char_lo];
574 /* a 16-bit character code */
575 index1 = cmap2->subHeaderKeys[char_hi & 0xFF];
580 sh2 = cmap2->subHeaders + index1;
581 char_lo -= sh2->firstCode;
583 if ( char_lo < (FT_UInt)sh2->entryCount )
585 offset = sh2->idRangeOffset / 2 + char_lo;
586 if ( offset < (FT_UInt)cmap2->numGlyphId )
588 result = cmap2->glyphIdArray[offset];
590 result = ( result + sh2->idDelta ) & 0xFFFFU;
598 /*************************************************************************/
604 /* Find the next encoded character. Uses format 2. */
607 /* charCode :: The wanted character code. */
609 /* cmap2 :: A pointer to a cmap table in format 2. */
612 /* Next encoded character. 0 if none exists. */
614 FT_CALLBACK_DEF( FT_ULong )
615 code_to_next2( TT_CMapTable cmap,
618 FT_UInt index1, offset;
621 TT_CMap2SubHeader sh2;
625 cmap2 = &cmap->c.cmap2;
629 * This is relatively simplistic -- look for a subHeader containing
630 * glyphs and then walk to the first glyph in that subHeader.
632 while ( charCode < 0x10000L )
634 char_lo = (FT_UInt)( charCode & 0xFF );
635 char_hi = charCode >> 8;
639 /* an 8-bit character code -- we use the subHeader 0 in this case */
640 /* to test whether the character code is in the charmap */
641 index1 = cmap2->subHeaderKeys[char_lo];
650 /* a 16-bit character code */
651 index1 = cmap2->subHeaderKeys[char_hi & 0xFF];
654 charCode = ( char_hi + 1 ) << 8;
659 sh2 = cmap2->subHeaders + index1;
660 char_lo -= sh2->firstCode;
662 if ( char_lo > (FT_UInt)sh2->entryCount )
664 charCode = ( char_hi + 1 ) << 8;
668 offset = sh2->idRangeOffset / 2 + char_lo;
669 if ( offset >= (FT_UInt)cmap2->numGlyphId ||
670 cmap2->glyphIdArray[offset] == 0 )
682 /*************************************************************************/
688 /* Converts the character code into a glyph index. Uses format 4. */
691 /* charCode :: The wanted character code. */
693 /* cmap4 :: A pointer to a cmap table in format 4. */
696 /* Glyph index into the glyphs array. 0 if the glyph does not exist. */
698 FT_CALLBACK_DEF( FT_UInt )
699 code_to_index4( TT_CMapTable cmap,
702 FT_UInt result, index1, segCount;
704 TT_CMap4SegmentRec *seg4, *limit;
707 cmap4 = &cmap->c.cmap4;
709 segCount = cmap4->segCountX2 / 2;
710 limit = cmap4->segments + segCount;
712 /* first, check against the last used segment */
714 seg4 = cmap4->last_segment;
716 /* the following is equivalent to performing two tests, as in */
718 /* if ( charCode >= seg4->startCount && charCode <= seg4->endCount ) */
720 /* This is a bit strange, but it is faster, and the idea behind the */
721 /* cache is to significantly speed up charcode to glyph index */
724 if ( (FT_ULong)( charCode - seg4->startCount ) <
725 (FT_ULong)( seg4->endCount - seg4->startCount ) )
728 for ( seg4 = cmap4->segments; seg4 < limit; seg4++ )
730 /* the ranges are sorted in increasing order. If we are out of */
731 /* the range here, the char code isn't in the charmap, so exit. */
733 if ( charCode > (FT_UInt)seg4->endCount )
736 if ( charCode >= (FT_UInt)seg4->startCount )
742 cmap4->last_segment = seg4;
745 /* if the idRangeOffset is 0, we can compute the glyph index */
748 if ( seg4->idRangeOffset == 0 )
749 result = (FT_UInt)( charCode + seg4->idDelta ) & 0xFFFFU;
752 /* otherwise, we must use the glyphIdArray to do it */
753 index1 = (FT_UInt)( seg4->idRangeOffset / 2
754 + ( charCode - seg4->startCount )
755 + ( seg4 - cmap4->segments )
758 if ( index1 < (FT_UInt)cmap4->numGlyphId &&
759 cmap4->glyphIdArray[index1] != 0 )
760 result = ( cmap4->glyphIdArray[index1] + seg4->idDelta ) & 0xFFFFU;
767 /*************************************************************************/
773 /* Find the next encoded character. Uses format 4. */
776 /* charCode :: The wanted character code. */
778 /* cmap :: A pointer to a cmap table in format 4. */
781 /* Next encoded character. 0 if none exists. */
783 FT_CALLBACK_DEF( FT_ULong )
784 code_to_next4( TT_CMapTable cmap,
787 FT_UInt index1, segCount;
789 TT_CMap4SegmentRec *seg4, *limit;
792 cmap4 = &cmap->c.cmap4;
793 segCount = cmap4->segCountX2 / 2;
794 limit = cmap4->segments + segCount;
798 for ( seg4 = cmap4->segments; seg4 < limit; seg4++ )
800 /* The ranges are sorted in increasing order. If we are out of */
801 /* the range here, the char code isn't in the charmap, so exit. */
803 if ( charCode <= (FT_UInt)seg4->endCount )
809 if ( charCode < (FT_ULong) seg4->startCount )
810 charCode = seg4->startCount;
812 /* if the idRangeOffset is 0, all chars in the map exist */
814 if ( seg4->idRangeOffset == 0 )
817 while ( charCode <= (FT_UInt) seg4->endCount )
819 /* otherwise, we must use the glyphIdArray to do it */
820 index1 = (FT_UInt)( seg4->idRangeOffset / 2
821 + ( charCode - seg4->startCount )
822 + ( seg4 - cmap4->segments )
825 if ( index1 < (FT_UInt)cmap4->numGlyphId &&
826 cmap4->glyphIdArray[index1] != 0 )
835 /*************************************************************************/
841 /* Converts the character code into a glyph index. Uses format 6. */
844 /* charCode :: The wanted character code. */
846 /* cmap6 :: A pointer to a cmap table in format 6. */
849 /* Glyph index into the glyphs array. 0 if the glyph does not exist. */
851 FT_CALLBACK_DEF( FT_UInt )
852 code_to_index6( TT_CMapTable cmap,
859 cmap6 = &cmap->c.cmap6;
860 charCode -= cmap6->firstCode;
862 if ( charCode < (FT_UInt)cmap6->entryCount )
863 result = cmap6->glyphIdArray[charCode];
869 /*************************************************************************/
875 /* Find the next encoded character. Uses format 6. */
878 /* charCode :: The wanted character code. */
880 /* cmap :: A pointer to a cmap table in format 6. */
883 /* Next encoded character. 0 if none exists. */
885 FT_CALLBACK_DEF( FT_ULong )
886 code_to_next6( TT_CMapTable cmap,
894 cmap6 = &cmap->c.cmap6;
896 if ( charCode < (FT_ULong) cmap6->firstCode )
897 charCode = cmap6->firstCode;
899 charCode -= cmap6->firstCode;
901 while ( charCode < (FT_UInt)cmap6->entryCount )
903 if ( cmap6->glyphIdArray[charCode] != 0 )
904 return charCode + cmap6->firstCode;
912 /*************************************************************************/
915 /* code_to_index8_12 */
918 /* Converts the (possibly 32bit) character code into a glyph index. */
919 /* Uses format 8 or 12. */
922 /* charCode :: The wanted character code. */
924 /* cmap8_12 :: A pointer to a cmap table in format 8 or 12. */
927 /* Glyph index into the glyphs array. 0 if the glyph does not exist. */
929 FT_CALLBACK_DEF( FT_UInt )
930 code_to_index8_12( TT_CMapTable cmap,
933 TT_CMap8_12 cmap8_12;
934 TT_CMapGroupRec *group, *limit;
937 cmap8_12 = &cmap->c.cmap8_12;
938 limit = cmap8_12->groups + cmap8_12->nGroups;
940 /* first, check against the last used group */
942 group = cmap8_12->last_group;
944 /* the following is equivalent to performing two tests, as in */
946 /* if ( charCode >= group->startCharCode && */
947 /* charCode <= group->endCharCode ) */
949 /* This is a bit strange, but it is faster, and the idea behind the */
950 /* cache is to significantly speed up charcode to glyph index */
953 if ( (FT_ULong)( charCode - group->startCharCode ) <
954 (FT_ULong)( group->endCharCode - group->startCharCode ) )
957 for ( group = cmap8_12->groups; group < limit; group++ )
959 /* the ranges are sorted in increasing order. If we are out of */
960 /* the range here, the char code isn't in the charmap, so exit. */
962 if ( charCode > group->endCharCode )
965 if ( charCode >= group->startCharCode )
971 cmap8_12->last_group = group;
974 return (FT_UInt)( group->startGlyphID +
975 ( charCode - group->startCharCode ) );
979 /*************************************************************************/
982 /* code_to_next8_12 */
985 /* Find the next encoded character. Uses format 8 or 12. */
988 /* charCode :: The wanted character code. */
990 /* cmap :: A pointer to a cmap table in format 8 or 12. */
993 /* Next encoded character. 0 if none exists. */
995 FT_CALLBACK_DEF( FT_ULong )
996 code_to_next8_12( TT_CMapTable cmap,
999 TT_CMap8_12 cmap8_12;
1000 TT_CMapGroupRec *group, *limit;
1004 cmap8_12 = &cmap->c.cmap8_12;
1005 limit = cmap8_12->groups + cmap8_12->nGroups;
1007 for ( group = cmap8_12->groups; group < limit; group++ )
1009 /* the ranges are sorted in increasing order. If we are out of */
1010 /* the range here, the char code isn't in the charmap, so exit. */
1012 if ( charCode <= group->endCharCode )
1018 if ( charCode < group->startCharCode )
1019 charCode = group->startCharCode;
1025 /*************************************************************************/
1028 /* code_to_index10 */
1031 /* Converts the (possibly 32bit) character code into a glyph index. */
1032 /* Uses format 10. */
1035 /* charCode :: The wanted character code. */
1037 /* cmap10 :: A pointer to a cmap table in format 10. */
1040 /* Glyph index into the glyphs array. 0 if the glyph does not exist. */
1042 FT_CALLBACK_DEF( FT_UInt )
1043 code_to_index10( TT_CMapTable cmap,
1050 cmap10 = &cmap->c.cmap10;
1051 charCode -= cmap10->startCharCode;
1053 /* the overflow trick for comparison works here also since the number */
1054 /* of glyphs (even if numChars is specified as ULong in the specs) in */
1055 /* an OpenType font is limited to 64k */
1057 if ( charCode < cmap10->numChars )
1058 result = cmap10->glyphs[charCode];
1064 /*************************************************************************/
1067 /* code_to_next10 */
1070 /* Find the next encoded character. Uses format 10. */
1073 /* charCode :: The wanted character code. */
1075 /* cmap :: A pointer to a cmap table in format 10. */
1078 /* Next encoded character. 0 if none exists. */
1080 FT_CALLBACK_DEF( FT_ULong )
1081 code_to_next10( TT_CMapTable cmap,
1088 cmap10 = &cmap->c.cmap10;
1090 if ( charCode < cmap10->startCharCode )
1091 charCode = cmap10->startCharCode;
1093 charCode -= cmap10->startCharCode;
1095 /* the overflow trick for comparison works here also since the number */
1096 /* of glyphs (even if numChars is specified as ULong in the specs) in */
1097 /* an OpenType font is limited to 64k */
1099 while ( charCode < cmap10->numChars )
1101 if ( cmap10->glyphs[charCode] )
1102 return ( charCode + cmap10->startCharCode );