lib/freetype/src/tools/docmaker/sources.py

   1 #
   2 # this file contains definitions of classes needed to decompose
   3 # C sources files into a series of multi-line "blocks". There are
   4 # two kinds of blocks:
   5 #
   6 #   - normal blocks, which contain source code or ordinary comments
   7 #
   8 #   - documentation blocks, which have restricted formatting, and
   9 #     whose text always start with a documentation markup tag like
  10 #     "<Function>", "<Type>", etc..
  11 #
  12 # the routines used to process the content of documentation blocks
  13 # are not contained here, but in "content.py"
  14 #
  15 # the classes and methods found here only deal with text parsing
  16 # and basic documentation block extraction
  17 #
  18 import fileinput, re, sys, os, string
  19
  20
  21
  22
  23
  24
  25 ################################################################
  26 ##
  27 ##  BLOCK FORMAT PATTERN
  28 ##
  29 ##   A simple class containing compiled regular expressions used
  30 ##   to detect potential documentation format block comments within
  31 ##   C source code
  32 ##
  33 ##   note that the 'column' pattern must contain a group that will
  34 ##   be used to "unbox" the content of documentation comment blocks
  35 ##
  36 class SourceBlockFormat:
  37
  38     def __init__( self, id, start, column, end ):
  39         """create a block pattern, used to recognize special documentation blocks"""
  40
  41         self.id     = id
  42         self.start  = re.compile( start, re.VERBOSE )
  43         self.column = re.compile( column, re.VERBOSE )
  44         self.end    = re.compile( end, re.VERBOSE )
  45
  46
  47
  48 #
  49 # format 1 documentation comment blocks look like the following:
  50 #
  51 #    /************************************/
  52 #    /*                                  */
  53 #    /*                                  */
  54 #    /*                                  */
  55 #    /************************************/
  56 #
  57 # we define a few regular expressions here to detect them
  58 #
  59
  60 start = r'''
  61   \s*       # any number of whitespace
  62   /\*{2,}/  # followed by '/' and at least two asterisks then '/'
  63   \s*$      # eventually followed by whitespace
  64 '''
  65
  66 column = r'''
  67   \s*      # any number of whitespace
  68   /\*{1}   # followed by '/' and precisely one asterisk
  69   ([^*].*) # followed by anything (group 1)
  70   \*{1}/   # followed by one asterisk and a '/'
  71   \s*$     # enventually followed by whitespace
  72 '''
  73
  74 re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
  75
  76 #
  77 # format 2 documentation comment blocks look like the following:
  78 #
  79 #    /************************************ (at least 2 asterisks)
  80 #     *
  81 #     *
  82 #     *
  83 #     *
  84 #     **/       (1 or more asterisks at the end)
  85 #
  86 # we define a few regular expressions here to detect them
  87 #
  88 start = r'''
  89   \s*     # any number of whitespace
  90   /\*{2,} # followed by '/' and at least two asterisks
  91   \s*$    # eventually followed by whitespace
  92 '''
  93
  94 column = r'''
  95   \s*         # any number of whitespace
  96   \*{1}       # followed by precisely one asterisk
  97   (.*)        # then anything (group1)
  98 '''
  99
 100 end = r'''
 101   \s*     # any number of whitespace
 102   \*+/    # followed by at least one asterisk, then '/'
 103 '''
 104
 105 re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
 106
 107 #
 108 # the list of supported documentation block formats, we could add new ones
 109 # relatively easily
 110 #
 111 re_source_block_formats = [ re_source_block_format1, re_source_block_format2 ]
 112
 113
 114 #
 115 # the following regular expressions corresponds to markup tags
 116 # within the documentation comment blocks. they're equivalent
 117 # despite their different syntax
 118 #
 119 # notice how each markup tag _must_ begin a new line
 120 #
 121 re_markup_tag1 = re.compile( r'''\s*<(\w*)>''' )  # <xxxx> format
 122 re_markup_tag2 = re.compile( r'''\s*@(\w*):''' )  # @xxxx: format
 123
 124 #
 125 # the list of supported markup tags, we could add new ones relatively
 126 # easily
 127 #
 128 re_markup_tags = [ re_markup_tag1, re_markup_tag2 ]
 129
 130 #
 131 # used to detect a cross-reference, after markup tags have been stripped
 132 #
 133 re_crossref = re.compile( r'@(\w*)' )
 134
 135 #
 136 # used to detect italic and bold styles in paragraph text
 137 #
 138 re_italic = re.compile( r'_(\w+)_' )
 139 re_bold   = re.compile( r'\*(\w+)\*' )
 140
 141 #
 142 # used to detect the end of commented source lines
 143 #
 144 re_source_sep = re.compile( r'\s*/\*\s*\*/' )
 145
 146 #
 147 # used to perform cross-reference within source output
 148 #
 149 re_source_crossref = re.compile( r'(\W*)(\w*)' )
 150
 151 #
 152 # a list of reserved source keywords
 153 #
 154 re_source_keywords = re.compile( '''( typedef |
 155                                        struct |
 156                                        enum   |
 157                                        union  |
 158                                        const  |
 159                                        char   |
 160                                        int    |
 161                                        short  |
 162                                        long   |
 163                                        void   |
 164                                        signed |
 165                                        unsigned |
 166                                        \#include |
 167                                        \#define  |
 168                                        \#undef   |
 169                                        \#if      |
 170                                        \#ifdef   |
 171                                        \#ifndef  |
 172                                        \#else    |
 173                                        \#endif   )''', re.VERBOSE )
 174
 175 ################################################################
 176 ##
 177 ##  SOURCE BLOCK CLASS
 178 ##
 179 ##   A SourceProcessor is in charge or reading a C source file
 180 ##   and decomposing it into a series of different "SourceBlocks".
 181 ##   each one of these blocks can be made of the following data:
 182 ##
 183 ##   - A documentation comment block that starts with "/**" and
 184 ##     whose exact format will be discussed later
 185 ##
 186 ##   - normal sources lines, include comments
 187 ##
 188 ##   the important fields in a text block are the following ones:
 189 ##
 190 ##     self.lines   : a list of text lines for the corresponding block
 191 ##
 192 ##     self.content : for documentation comment blocks only, this is the
 193 ##                    block content that has been "unboxed" from its
 194 ##                    decoration. This is None for all other blocks
 195 ##                    (i.e. sources or ordinary comments with no starting
 196 ##                     markup tag)
 197 ##
 198 class SourceBlock:
 199     def __init__( self, processor, filename, lineno, lines ):
 200         self.processor = processor
 201         self.filename  = filename
 202         self.lineno    = lineno
 203         self.lines     = lines
 204         self.format    = processor.format
 205         self.content   = []
 206
 207         if self.format == None:
 208             return
 209
 210         words = []
 211
 212         # extract comment lines
 213         lines = []
 214
 215         for line0 in self.lines[1:]:
 216             m = self.format.column.match( line0 )
 217             if m:
 218                 lines.append( m.group(1) )
 219
 220         # now, look for a markup tag
 221         for l in lines:
 222             l = string.strip(l)
 223             if len(l) > 0:
 224                 for tag in re_markup_tags:
 225                     if tag.match( l ):
 226                         self.content = lines
 227                 return
 228
 229     def location( self ):
 230         return "(" + self.filename + ":" + repr(self.lineno) + ")"
 231
 232
 233     # debugging only - not used in normal operations
 234     def dump( self ):
 235
 236         if self.content:
 237             print "{{{content start---"
 238             for l in self.content:
 239                 print l
 240             print "---content end}}}"
 241             return
 242
 243         fmt = ""
 244         if self.format:
 245             fmt = repr(self.format.id) + " "
 246
 247         for line in self.lines:
 248             print line
 249
 250
 251 ################################################################
 252 ##
 253 ##  SOURCE PROCESSOR CLASS
 254 ##
 255 ##   The SourceProcessor is in charge or reading a C source file
 256 ##   and decomposing it into a series of different "SourceBlock"
 257 ##   objects.
 258 ##
 259 ##   each one of these blocks can be made of the following data:
 260 ##
 261 ##   - A documentation comment block that starts with "/**" and
 262 ##     whose exact format will be discussed later
 263 ##
 264 ##   - normal sources lines, include comments
 265 ##
 266 ##
 267 class SourceProcessor:
 268
 269     def  __init__( self ):
 270         """initialize a source processor"""
 271         self.blocks   = []
 272         self.filename = None
 273         self.format   = None
 274         self.lines    = []
 275
 276     def  reset( self ):
 277         """reset a block processor, clean all its blocks"""
 278         self.blocks = []
 279         self.format = None
 280
 281
 282     def  parse_file( self, filename ):
 283         """parse a C source file, and adds its blocks to the processor's list"""
 284
 285         self.reset()
 286
 287         self.filename = filename
 288
 289         fileinput.close()
 290         self.format    = None
 291         self.lineno    = 0
 292         self.lines     = []
 293
 294         for line in fileinput.input( filename ):
 295
 296             # strip trailing newlines, important on Windows machines !!
 297             if  line[-1] == '\012':
 298                 line = line[0:-1]
 299
 300             if self.format == None:
 301                 self.process_normal_line( line )
 302
 303             else:
 304                 if self.format.end.match( line ):
 305                     # that's a normal block end, add it to lines and
 306                     # create a new block
 307                     # self.lines.append( line )
 308                     self.add_block_lines()
 309
 310                 elif self.format.column.match( line ):
 311                     # that's a normal column line, add it to 'lines'
 312                     self.lines.append( line )
 313
 314                 else:
 315                     # humm.. this is an unexcepted block end,
 316                     # create a new block, but don't process the line
 317                     self.add_block_lines()
 318
 319                     # we need to process the line again
 320                     self.process_normal_line( line )
 321
 322         # record the last lines
 323         self.add_block_lines()
 324
 325
 326
 327     def process_normal_line( self, line ):
 328         """process a normal line and check if it's the start of a new block"""
 329         for f in re_source_block_formats:
 330           if f.start.match( line ):
 331             self.add_block_lines()
 332             self.format = f
 333             self.lineno = fileinput.filelineno()
 334
 335         self.lines.append( line )
 336
 337
 338
 339     def add_block_lines( self ):
 340         """add the current accumulated lines, and create a new block"""
 341         if self.lines != []:
 342             block = SourceBlock( self, self.filename, self.lineno, self.lines )
 343
 344             self.blocks.append( block )
 345             self.format = None
 346             self.lines  = []
 347
 348
 349     # debugging only, not used in normal operations
 350     def dump( self ):
 351         """print all blocks in a processor"""
 352         for b in self.blocks:
 353             b.dump()
 354
 355 # eof