1 /***************************************************************************/
5 /* Type 1 parser (body). */
7 /* Copyright 1996-2000 by */
8 /* David Turner, Robert Wilhelm, and Werner Lemberg. */
10 /* This file is part of the FreeType project, and may only be used, */
11 /* modified, and distributed under the terms of the FreeType project */
12 /* license, LICENSE.TXT. By continuing to use, modify, or distribute */
13 /* this file you indicate that you have read the license and */
14 /* understand and accept it fully. */
16 /***************************************************************************/
19 /*************************************************************************/
21 /* The tokenizer is in charge of loading and reading a Type1 font file */
22 /* (either in PFB or PFA format), and extracting successive tokens and */
23 /* keywords from its two streams (i.e. the font program, and the private */
26 /* Eexec decryption is performed automatically when entering the private */
27 /* dictionary, or when retrieving char strings. */
29 /*************************************************************************/
32 #include <freetype/internal/ftstream.h>
33 #include <freetype/internal/ftdebug.h>
36 #ifdef FT_FLAT_COMPILE
43 #include <freetype/src/type1/t1tokens.h>
44 #include <freetype/src/type1/t1load.h>
49 #include <string.h> /* for strncmp() */
52 #undef READ_BUFFER_INCREMENT
53 #define READ_BUFFER_INCREMENT 0x400
56 /*************************************************************************/
58 /* The macro FT_COMPONENT is used in trace mode. It is an implicit */
59 /* parameter of the FT_TRACE() and FT_ERROR() macros, used to print/log */
60 /* messages during execution. */
63 #define FT_COMPONENT trace_t1load
66 /* An array of Type1 keywords supported by this engine. This table */
67 /* places the keyword in lexicographical order. It should always */
68 /* correspond to the enums `key_xxx'! */
70 const char* t1_keywords[key_max - key_first_] =
72 "-|", "ExpertEncoding", "ND", "NP", "RD", "StandardEncoding", "array",
73 "begin", "closefile", "currentdict", "currentfile", "def", "dict", "dup",
74 "eexec", "end", "executeonly", "false", "for", "index", "noaccess",
75 "put", "readonly", "true", "userdict", "|", "|-"
79 const char* t1_immediates[imm_max - imm_first_] =
81 "-|", ".notdef", "BlendAxisTypes", "BlueFuzz", "BlueScale", "BlueShift",
82 "BlueValues", "CharStrings", "Encoding", "FamilyBlues", "FamilyName",
83 "FamilyOtherBlues", "FID", "FontBBox", "FontID", "FontInfo", "FontMatrix",
84 "FontName", "FontType", "ForceBold", "FullName", "ItalicAngle",
85 "LanguageGroup", "Metrics", "MinFeature", "ND", "NP", "Notice",
86 "OtherBlues", "OtherSubrs", "PaintType", "Private", "RD", "RndStemUp",
87 "StdHW", "StdVW", "StemSnapH", "StemSnapV", "StrokeWidth", "Subrs",
88 "UnderlinePosition", "UnderlineThickness", "UniqueID", "Weight",
89 "isFixedPitch", "lenIV", "password", "version", "|", "|-"
93 /* lexicographic comparison of two strings */
95 int lexico_strcmp( const char* str1,
102 for ( ; str1_len > 0; str1_len-- )
119 /* find a given token/name, performing binary search */
121 int Find_Name( char* base,
130 right = table_len - 1;
132 while ( right - left > 1 )
134 int middle = left + ( ( right - left ) >> 1 );
138 cmp = lexico_strcmp( base, length, table[middle] );
148 if ( !lexico_strcmp( base, length, table[left ] ) )
150 if ( !lexico_strcmp( base, length, table[right] ) )
157 /* read the small PFB section header */
159 FT_Error Read_PFB_Tag( FT_Stream stream,
168 FT_TRACE2(( "Read_PFB_Tag: reading\n" ));
170 if ( ACCESS_Frame( 6L ) )
179 *asize = ( ( size & 0xFF ) << 24 ) |
180 ( ( ( size >> 8 ) & 0xFF ) << 16 ) |
181 ( ( ( size >> 16 ) & 0xFF ) << 8 ) |
182 ( ( ( size >> 24 ) & 0xFF ) );
184 FT_TRACE2(( " tag = %04x\n", tag ));
185 FT_TRACE4(( " asze = %08x\n", size ));
186 FT_TRACE2(( " size = %08x\n", *asize ));
193 FT_Error grow( T1_Tokenizer tokzer )
197 FT_Memory memory = tokzer->memory;
200 left_bytes = tokzer->max - tokzer->limit;
202 if ( left_bytes > 0 )
204 FT_Stream stream = tokzer->stream;
207 if ( left_bytes > READ_BUFFER_INCREMENT )
208 left_bytes = READ_BUFFER_INCREMENT;
210 FT_TRACE2(( "Growing tokenizer buffer by %d bytes\n", left_bytes ));
212 if ( !REALLOC( tokzer->base, tokzer->limit,
213 tokzer->limit + left_bytes ) &&
214 !FILE_Read( tokzer->base + tokzer->limit, left_bytes ) )
215 tokzer->limit += left_bytes;
219 FT_ERROR(( "Unexpected end of Type1 fragment!\n" ));
220 error = T1_Err_Invalid_File_Format;
223 tokzer->error = error;
228 /*************************************************************************/
234 /* Performs the Type 1 charstring decryption process. */
237 /* buffer :: The base address of the data to decrypt. */
238 /* length :: The number of bytes to decrypt (beginning from the base */
240 /* seed :: The encryption seed (4330 for charstrings). */
243 void t1_decrypt( FT_Byte* buffer,
252 plain = ( *buffer ^ ( seed >> 8 ) );
253 seed = ( *buffer + seed ) * 52845 + 22719;
260 /*************************************************************************/
266 /* Creates a new tokenizer from a given input stream. This function */
267 /* automatically recognizes `pfa' or `pfb' files. The function */
268 /* Read_Token() can then be used to extract successive tokens from */
272 /* stream :: The input stream. */
275 /* tokenizer :: A handle to a new tokenizer object. */
278 /* FreeType error code. 0 means success. */
281 /* This function copies the stream handle within the object. Callers */
282 /* should not discard `stream'. This is done by the Done_Tokenizer() */
286 FT_Error New_Tokenizer( FT_Stream stream,
287 T1_Tokenizer* tokenizer )
289 FT_Memory memory = stream->memory;
302 /* allocate object */
303 if ( FILE_Seek( 0L ) ||
304 ALLOC( tokzer, sizeof ( *tokzer ) ) )
307 tokzer->stream = stream;
308 tokzer->memory = stream->memory;
311 tokzer->in_private = 0;
315 tok_max = stream->size;
317 error = Read_PFB_Tag( stream, &tag, &size );
323 /* assume that it is a PFA file -- an error will be produced later */
324 /* if a character with value > 127 is encountered */
326 /* rewind to start of file */
327 if ( FILE_Seek( 0L ) )
335 /* if it is a memory-based resource, set up pointer */
338 tok_base = (FT_Byte*)stream->base + stream->pos;
342 /* check that the `size' field is valid */
343 if ( FILE_Skip( size ) )
346 else if ( tag == 0x8001 )
348 /* read segment in memory */
349 if ( ALLOC( tok_base, size ) )
352 if ( FILE_Read( tok_base, size ) )
362 tokzer->base = tok_base;
363 tokzer->limit = tok_limit;
364 tokzer->max = tok_max;
369 /* now check font format; we must see `%!PS-AdobeFont-1' */
370 /* or `%!FontType' */
372 if ( 16 > tokzer->limit )
375 if ( tokzer->limit <= 16 ||
376 ( strncmp( (const char*)tokzer->base, "%!PS-AdobeFont-1", 16 ) &&
377 strncmp( (const char*)tokzer->base, "%!FontType", 10 ) ) )
379 FT_TRACE2(( "[not a Type1 font]\n" ));
380 error = FT_Err_Unknown_File_Format;
387 FREE( tokzer->base );
393 /* return the value of an hexadecimal digit */
395 int hexa_value( char c )
400 d = (unsigned int)( c - '0' );
404 d = (unsigned int)( c - 'a' );
406 return (int)( d + 10 );
408 d = (unsigned int)( c - 'A' );
410 return (int)( d + 10 );
416 /*************************************************************************/
422 /* Closes a given tokenizer. This function will also close the */
423 /* stream embedded in the object. */
426 /* tokenizer :: The target tokenizer object. */
429 /* FreeType error code. 0 means success. */
432 FT_Error Done_Tokenizer( T1_Tokenizer tokenizer )
434 FT_Memory memory = tokenizer->memory;
437 /* clear read buffer if needed (disk-based resources) */
438 if ( tokenizer->in_private || !tokenizer->stream->base )
439 FREE( tokenizer->base );
446 /*************************************************************************/
449 /* Open_PrivateDict */
452 /* This function must be called to set the tokenizer to the private */
453 /* section of the Type1 file. It recognizes automatically the */
454 /* the kind of eexec encryption used (ascii or binary). */
457 /* tokenizer :: The target tokenizer object. */
458 /* lenIV :: The value of the `lenIV' variable. */
461 /* FreeType error code. 0 means success. */
464 FT_Error Open_PrivateDict( T1_Tokenizer tokenizer )
466 T1_Tokenizer tokzer = tokenizer;
467 FT_Stream stream = tokzer->stream;
468 FT_Memory memory = tokzer->memory;
474 FT_Byte* private_dict;
476 /* are we already in the private dictionary ? */
477 if ( tokzer->in_private )
480 if ( tokzer->in_pfb )
482 /* in the case of the PFB format, the private dictionary can be */
483 /* made of several segments. We thus first read the number of */
484 /* segments to compute the total size of the private dictionary */
485 /* then re-read them into memory. */
486 FT_Long start_pos = FILE_Pos();
487 FT_ULong private_dict_size = 0;
492 error = Read_PFB_Tag( stream, &tag, &size );
493 if ( error || tag != 0x8002 )
496 private_dict_size += size;
498 if ( FILE_Skip( size ) )
502 /* check that we have a private dictionary there */
503 /* and allocate private dictionary buffer */
504 if ( private_dict_size == 0 )
506 FT_ERROR(( "Open_PrivateDict:" ));
507 FT_ERROR(( " invalid private dictionary section\n" ));
508 error = T1_Err_Invalid_File_Format;
512 if ( ALLOC( private_dict, private_dict_size ) )
515 /* read all sections into buffer */
516 if ( FILE_Seek( start_pos ) )
519 private_dict_size = 0;
522 error = Read_PFB_Tag( stream, &tag, &size );
523 if ( error || tag != 0x8002 )
529 if ( FILE_Read( private_dict + private_dict_size, size ) )
532 private_dict_size += size;
535 /* we must free the field `tokzer.base' if we are in a disk-based */
538 FREE( tokzer->base );
540 tokzer->base = private_dict;
542 tokzer->limit = private_dict_size;
543 tokzer->max = private_dict_size;
550 /* we are in a PFA file; read each token until we find `eexec' */
551 while ( tokzer->token.kind2 != key_eexec )
553 error = Read_Token( tokzer );
558 /* now determine whether the private dictionary is encoded in binary */
559 /* or hexadecimal ASCII format. */
561 /* we need to access the next 4 bytes (after the final \r following */
562 /* the `eexec' keyword); if they all are hexadecimal digits, then */
563 /* we have a case of ASCII storage. */
564 while ( tokzer->cursor + 5 > tokzer->limit )
566 error = grow( tokzer );
571 /* skip whitespace/line feed after `eexec' */
572 base = (char*)tokzer->base + tokzer->cursor + 1;
573 if ( ( hexa_value( base[0] ) | hexa_value( base[1] ) |
574 hexa_value( base[2] ) | hexa_value( base[3] ) ) < 0 )
576 /* binary encoding -- `simply' read the stream */
578 /* if it is a memory-based resource, we need to allocate a new */
579 /* storage buffer for the private dictionary, as it must be */
580 /* decrypted later */
583 size = stream->size - tokzer->cursor - 1; /* remaining bytes */
585 if ( ALLOC( private_dict, size ) ) /* alloc private dict buffer */
588 /* copy eexec-encrypted bytes */
589 MEM_Copy( private_dict, tokzer->base + tokzer->cursor + 1, size );
591 /* reset pointers - forget about file mapping */
592 tokzer->base = private_dict;
593 tokzer->limit = size;
597 /* On the opposite, for disk based resources, we simply grow */
598 /* the current buffer until its completion, and decrypt the */
599 /* bytes within it. In all cases, the `base' buffer will be */
600 /* discarded on DoneTokenizer if we are in the private dict. */
603 /* grow the read buffer to the full file */
604 while ( tokzer->limit < tokzer->max )
606 error = grow( tokenizer );
611 /* set up cursor to first encrypted byte */
617 /* ASCII hexadecimal encoding. This sucks... */
624 /* allocate a buffer, read each one byte at a time */
625 count = stream->size - tokzer->cursor;
628 if ( ALLOC( private_dict, size ) ) /* alloc private dict buffer */
631 write = private_dict;
632 cur = tokzer->base + tokzer->cursor;
633 limit = tokzer->base + tokzer->limit;
635 /* read each bytes */
638 /* ensure that we can read the next 2 bytes! */
639 while ( cur + 2 > limit )
641 int cursor = cur - tokzer->base;
644 error = grow( tokzer );
647 cur = tokzer->base + cursor;
648 limit = tokzer->base + tokzer->limit;
651 /* check for new line */
652 if ( cur[0] == '\r' || cur[0] == '\n' )
659 int hex1 = hexa_value(cur[0]);
662 /* exit if we have a non-hexadecimal digit which isn't */
663 /* a new-line character */
667 /* otherwise, store byte */
668 *write++ = ( hex1 << 4 ) | hexa_value( cur[1] );
674 /* get rid of old buffer in the case of disk-based resources */
676 FREE( tokzer->base );
678 /* set up pointers */
679 tokzer->base = private_dict;
680 tokzer->limit = size;
686 /* finally, decrypt the private dictionary - and skip the lenIV bytes */
687 t1_decrypt( tokzer->base, tokzer->limit, 55665 );
694 FREE( private_dict );
699 /*************************************************************************/
705 /* Reads a new token from the current input stream. This function */
706 /* extracts a token from the font program until Open_PrivateDict() */
707 /* has been called. After this, it returns tokens from the */
708 /* (eexec-encrypted) private dictionary. */
711 /* tokenizer :: The target tokenizer object. */
714 /* FreeType error code. 0 means success. */
717 /* Use the function Read_CharStrings() to read the binary charstrings */
718 /* from the private dict. */
721 FT_Error Read_Token( T1_Tokenizer tokenizer )
723 T1_Tokenizer tok = tokenizer;
726 char c, starter, ender;
727 FT_Bool token_started;
732 tok->error = T1_Err_Ok;
733 tok->token.kind = tok_any;
751 c = (char)base[cur++];
753 /* check that we have an ASCII character */
754 if ( (FT_Byte)c > 127 )
756 FT_ERROR(( "Read_Token:" ));
757 FT_ERROR(( " unexpected binary data in Type1 fragment!\n" ));
758 tok->error = T1_Err_Invalid_File_Format;
767 case '\t': /* skip initial whitespace => skip to next */
770 /* possibly a name, keyword, wathever */
771 tok->token.kind = tok_any;
772 tok->token.len = cur-tok->token.start - 1;
775 /* otherwise, skip everything */
778 case '%': /* this is a comment -- skip everything */
781 FT_Int left = limit - cur;
786 c = (char)base[cur++];
787 if ( c == '\r' || c == '\n' )
792 if ( grow( tokenizer ) )
798 case '(': /* a Postscript string */
803 if ( !token_started )
806 tok->token.start = cur - 1;
810 FT_Int nest_level = 1;
816 FT_Int left = limit - cur;
821 c = (char)base[cur++];
826 else if ( c == ender )
829 if ( nest_level <= 0 )
831 tok->token.kind = kind;
832 tok->token.len = cur - tok->token.start;
846 case '[': /* a Postscript array */
855 case '{': /* a Postscript program */
864 case '<': /* a Postscript hex byte array? */
873 case '0': /* any number */
886 tok->token.kind = tok_number;
888 tok->token.start = cur - 1;
893 FT_Int left = limit-cur;
898 c = (char)base[cur++];
913 tok->token.len = cur - tok->token.start - 1;
928 case '.': /* maybe a number */
935 tok->token.start = cur - 1;
939 FT_Int left = limit - cur;
944 /* test for any following digit, interpreted as number */
946 tok->token.kind = ( c >= '0' && c <= '9' ? tok_number : tok_any );
956 case '/': /* maybe an immediate name */
957 if ( !token_started )
960 tok->token.start = cur - 1;
964 FT_Int left = limit - cur;
969 /* test for single '/', interpreted as garbage */
971 tok->token.kind = ( c == ' ' || c == '\t' ||
972 c == '\r' || c == '\n' ) ? tok_any
985 Any_Token: /* possibly a name or wathever */
987 tok->token.len = cur - tok->token.start;
992 if ( !token_started )
995 tok->token.start = cur - 1;
1008 /* now, tries to match keywords and immediate names */
1012 switch ( tok->token.kind )
1014 case tok_immediate: /* immediate name */
1015 index = Find_Name( (char*)( tok->base + tok->token.start + 1 ),
1018 imm_max - imm_first_ );
1019 tok->token.kind2 = ( index >= 0 )
1020 ? (T1_TokenType)( imm_first_ + index )
1024 case tok_any: /* test for keyword */
1025 index = Find_Name( (char*)( tok->base + tok->token.start ),
1028 key_max - key_first_ );
1031 tok->token.kind = tok_keyword;
1032 tok->token.kind2 = (T1_TokenType)( key_first_ + index );
1035 tok->token.kind2 = tok_error;
1039 tok->token.kind2 = tok_error;
1042 return tokenizer->error;
1048 /*************************************************************************/
1051 /* Read_CharStrings */
1054 /* Reads a charstrings element from the current input stream. These */
1055 /* are binary bytes that encode each individual glyph outline. */
1057 /* The caller is responsible for skipping the `lenIV' bytes at the */
1058 /* start of the record. */
1061 /* tokenizer :: The target tokenizer object. */
1062 /* num_chars :: The number of binary bytes to read. */
1065 /* buffer :: The target array of bytes. These are */
1066 /* eexec-decrypted. */
1069 /* FreeType error code. 0 means success. */
1072 /* Use the function Read_CharStrings() to read binary charstrings */
1073 /* from the private dict. */
1076 FT_Error Read_CharStrings( T1_Tokenizer tokenizer,
1082 FT_Int left = tokenizer->limit - tokenizer->cursor;
1085 if ( left >= num_chars )
1087 MEM_Copy( buffer, tokenizer->base + tokenizer->cursor, num_chars );
1088 t1_decrypt( buffer, num_chars, 4330 );
1089 tokenizer->cursor += num_chars;
1093 if ( grow( tokenizer ) )
1094 return tokenizer->error;