cdc639c6f8548113131b6b3214debe410095d344
[lldb.git] / clang / utils / creduce-clang-crash.py
1 #!/usr/bin/env python
2 """Calls C-Reduce to create a minimal reproducer for clang crashes.
3
4 Output files:
5   *.reduced.sh -- crash reproducer with minimal arguments
6   *.reduced.cpp -- the reduced file
7   *.test.sh -- interestingness test for C-Reduce
8 """
9
10 from __future__ import print_function
11 from argparse import ArgumentParser, RawTextHelpFormatter
12 import os
13 import re
14 import stat
15 import sys
16 import subprocess
17 import pipes
18 import shlex
19 import tempfile
20 import shutil
21 from distutils.spawn import find_executable
22
23 verbose = False
24 creduce_cmd = None
25 clang_cmd = None
26
27 def verbose_print(*args, **kwargs):
28   if verbose:
29     print(*args, **kwargs)
30
31 def check_file(fname):
32   fname = os.path.normpath(fname)
33   if not os.path.isfile(fname):
34     sys.exit("ERROR: %s does not exist" % (fname))
35   return fname
36
37 def check_cmd(cmd_name, cmd_dir, cmd_path=None):
38   """
39   Returns absolute path to cmd_path if it is given,
40   or absolute path to cmd_dir/cmd_name.
41   """
42   if cmd_path:
43     # Make the path absolute so the creduce test can be run from any directory.
44     cmd_path = os.path.abspath(cmd_path)
45     cmd = find_executable(cmd_path)
46     if cmd:
47       return cmd
48     sys.exit("ERROR: executable `%s` not found" % (cmd_path))
49
50   cmd = find_executable(cmd_name, path=cmd_dir)
51   if cmd:
52     return cmd
53
54   if not cmd_dir:
55     cmd_dir = "$PATH"
56   sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
57
58 def quote_cmd(cmd):
59   return ' '.join(pipes.quote(arg) for arg in cmd)
60
61 def write_to_script(text, filename):
62   with open(filename, 'w') as f:
63     f.write(text)
64   os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
65
66 class Reduce(object):
67   def __init__(self, crash_script, file_to_reduce):
68     crash_script_name, crash_script_ext = os.path.splitext(crash_script)
69     file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
70
71     self.testfile = file_reduce_name + '.test.sh'
72     self.crash_script = crash_script_name + '.reduced' + crash_script_ext
73     self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
74     shutil.copy(file_to_reduce, self.file_to_reduce)
75
76     self.clang = clang_cmd
77     self.clang_args = []
78     self.expected_output = []
79     self.needs_stack_trace = False
80     self.creduce_flags = ["--tidy"]
81
82     self.read_clang_args(crash_script, file_to_reduce)
83     self.read_expected_output()
84
85   def get_crash_cmd(self, cmd=None, args=None, filename=None):
86     if not cmd:
87       cmd = self.clang
88     if not args:
89       args = self.clang_args
90     if not filename:
91       filename = self.file_to_reduce
92
93     return [cmd] + args + [filename]
94
95   def read_clang_args(self, crash_script, filename):
96     print("\nReading arguments from crash script...")
97     with open(crash_script) as f:
98       # Assume clang call is the first non comment line.
99       cmd = []
100       for line in f:
101         if not line.lstrip().startswith('#'):
102           cmd = shlex.split(line)
103           break
104     if not cmd:
105       sys.exit("Could not find command in the crash script.");
106
107     # Remove clang and filename from the command
108     # Assume the last occurrence of the filename is the clang input file
109     del cmd[0]
110     for i in range(len(cmd)-1, -1, -1):
111       if cmd[i] == filename:
112         del cmd[i]
113         break
114     self.clang_args = cmd
115     verbose_print("Clang arguments:", quote_cmd(self.clang_args))
116
117   def read_expected_output(self):
118     print("\nGetting expected crash output...")
119     p = subprocess.Popen(self.get_crash_cmd(),
120                          stdout=subprocess.PIPE,
121                          stderr=subprocess.STDOUT)
122     crash_output, _ = p.communicate()
123     result = []
124
125     # Remove color codes
126     ansi_escape = r'\x1b\[[0-?]*m'
127     crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
128
129     # Look for specific error messages
130     regexes = [r"Assertion .+ failed", # Linux assert()
131                r"Assertion failed: .+,", # FreeBSD/Mac assert()
132                r"fatal error: error in backend: .+",
133                r"LLVM ERROR: .+",
134                r"UNREACHABLE executed at .+?!",
135                r"LLVM IR generation of declaration '.+'",
136                r"Generating code for declaration '.+'",
137                r"\*\*\* Bad machine code: .+ \*\*\*"]
138     for msg_re in regexes:
139       match = re.search(msg_re, crash_output)
140       if match:
141         msg = match.group(0)
142         result = [msg]
143         print("Found message:", msg)
144         break
145
146     # If no message was found, use the top five stack trace functions,
147     # ignoring some common functions
148     # Five is a somewhat arbitrary number; the goal is to get a small number
149     # of identifying functions with some leeway for common functions
150     if not result:
151       self.needs_stack_trace = True
152       stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
153       filters = ["PrintStackTrace", "RunSignalHandlers", "CleanupOnSignal",
154                  "HandleCrash", "SignalHandler", "__restore_rt", "gsignal", "abort"]
155       def skip_function(func_name):
156         return any(name in func_name for name in filters)
157
158       matches = re.findall(stacktrace_re, crash_output)
159       result = [x for x in matches if x and not skip_function(x)][:5]
160       for msg in result:
161         print("Found stack trace function:", msg)
162
163     if not result:
164       print("ERROR: no crash was found")
165       print("The crash output was:\n========\n%s========" % crash_output)
166       sys.exit(1)
167
168     self.expected_output = result
169
170   def check_expected_output(self, args=None, filename=None):
171     if not args:
172       args = self.clang_args
173     if not filename:
174       filename = self.file_to_reduce
175
176     p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
177                          stdout=subprocess.PIPE,
178                          stderr=subprocess.STDOUT)
179     crash_output, _ = p.communicate()
180     return all(msg in crash_output.decode('utf-8') for msg in
181                self.expected_output)
182
183   def write_interestingness_test(self):
184     print("\nCreating the interestingness test...")
185
186     # Disable symbolization if it's not required to avoid slow symbolization.
187     disable_symbolization = ''
188     if not self.needs_stack_trace:
189       disable_symbolization = 'export LLVM_DISABLE_SYMBOLIZATION=1'
190
191     output = """#!/bin/bash
192 %s
193 if %s >& t.log ; then
194   exit 1
195 fi
196 """ % (disable_symbolization, quote_cmd(self.get_crash_cmd()))
197
198     for msg in self.expected_output:
199       output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg)
200
201     write_to_script(output, self.testfile)
202     self.check_interestingness()
203
204   def check_interestingness(self):
205     testfile = os.path.abspath(self.testfile)
206
207     # Check that the test considers the original file interesting
208     with open(os.devnull, 'w') as devnull:
209       returncode = subprocess.call(testfile, stdout=devnull)
210     if returncode:
211       sys.exit("The interestingness test does not pass for the original file.")
212
213     # Check that an empty file is not interesting
214     # Instead of modifying the filename in the test file, just run the command
215     with tempfile.NamedTemporaryFile() as empty_file:
216       is_interesting = self.check_expected_output(filename=empty_file.name)
217     if is_interesting:
218       sys.exit("The interestingness test passes for an empty file.")
219
220   def clang_preprocess(self):
221     print("\nTrying to preprocess the source file...")
222     with tempfile.NamedTemporaryFile() as tmpfile:
223       cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
224       cmd_preprocess_no_lines = cmd_preprocess + ['-P']
225       try:
226         subprocess.check_call(cmd_preprocess_no_lines)
227         if self.check_expected_output(filename=tmpfile.name):
228           print("Successfully preprocessed with line markers removed")
229           shutil.copy(tmpfile.name, self.file_to_reduce)
230         else:
231           subprocess.check_call(cmd_preprocess)
232           if self.check_expected_output(filename=tmpfile.name):
233             print("Successfully preprocessed without removing line markers")
234             shutil.copy(tmpfile.name, self.file_to_reduce)
235           else:
236             print("No longer crashes after preprocessing -- "
237                   "using original source")
238       except subprocess.CalledProcessError:
239         print("Preprocessing failed")
240
241   @staticmethod
242   def filter_args(args, opts_equal=[], opts_startswith=[],
243                   opts_one_arg_startswith=[]):
244     result = []
245     skip_next = False
246     for arg in args:
247       if skip_next:
248         skip_next = False
249         continue
250       if any(arg == a for a in opts_equal):
251         continue
252       if any(arg.startswith(a) for a in opts_startswith):
253         continue
254       if any(arg.startswith(a) for a in opts_one_arg_startswith):
255         skip_next = True
256         continue
257       result.append(arg)
258     return result
259
260   def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
261     new_args = self.filter_args(args, **kwargs)
262
263     if extra_arg:
264       if extra_arg in new_args:
265         new_args.remove(extra_arg)
266       new_args.append(extra_arg)
267
268     if (new_args != args and
269         self.check_expected_output(args=new_args)):
270       if msg:
271         verbose_print(msg)
272       return new_args
273     return args
274
275   def try_remove_arg_by_index(self, args, index):
276     new_args = args[:index] + args[index+1:]
277     removed_arg = args[index]
278
279     # Heuristic for grouping arguments:
280     # remove next argument if it doesn't start with "-"
281     if index < len(new_args) and not new_args[index].startswith('-'):
282       del new_args[index]
283       removed_arg += ' ' + args[index+1]
284
285     if self.check_expected_output(args=new_args):
286       verbose_print("Removed", removed_arg)
287       return new_args, index
288     return args, index+1
289
290   def simplify_clang_args(self):
291     """Simplify clang arguments before running C-Reduce to reduce the time the
292     interestingness test takes to run.
293     """
294     print("\nSimplifying the clang command...")
295
296     # Remove some clang arguments to speed up the interestingness test
297     new_args = self.clang_args
298     new_args = self.try_remove_args(new_args,
299                                     msg="Removed debug info options",
300                                     opts_startswith=["-gcodeview",
301                                                      "-debug-info-kind=",
302                                                      "-debugger-tuning="])
303
304     new_args = self.try_remove_args(new_args,
305                                     msg="Removed --show-includes",
306                                     opts_startswith=["--show-includes"])
307     # Not suppressing warnings (-w) sometimes prevents the crash from occurring
308     # after preprocessing
309     new_args = self.try_remove_args(new_args,
310                                     msg="Replaced -W options with -w",
311                                     extra_arg='-w',
312                                     opts_startswith=["-W"])
313     new_args = self.try_remove_args(new_args,
314                                     msg="Replaced optimization level with -O0",
315                                     extra_arg="-O0",
316                                     opts_startswith=["-O"])
317
318     # Try to remove compilation steps
319     new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
320                                     extra_arg="-emit-llvm")
321     new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
322                                     extra_arg="-fsyntax-only")
323
324     # Try to make implicit int an error for more sensible test output
325     new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
326                                     opts_equal=["-w"],
327                                     extra_arg="-Werror=implicit-int")
328
329     self.clang_args = new_args
330     verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
331
332   def reduce_clang_args(self):
333     """Minimize the clang arguments after running C-Reduce, to get the smallest
334     command that reproduces the crash on the reduced file.
335     """
336     print("\nReducing the clang crash command...")
337
338     new_args = self.clang_args
339
340     # Remove some often occurring args
341     new_args = self.try_remove_args(new_args, msg="Removed -D options",
342                                     opts_startswith=["-D"])
343     new_args = self.try_remove_args(new_args, msg="Removed -D options",
344                                     opts_one_arg_startswith=["-D"])
345     new_args = self.try_remove_args(new_args, msg="Removed -I options",
346                                     opts_startswith=["-I"])
347     new_args = self.try_remove_args(new_args, msg="Removed -I options",
348                                     opts_one_arg_startswith=["-I"])
349     new_args = self.try_remove_args(new_args, msg="Removed -W options",
350                                     opts_startswith=["-W"])
351
352     # Remove other cases that aren't covered by the heuristic
353     new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
354                                     opts_one_arg_startswith=["-mllvm"])
355
356     i = 0
357     while i < len(new_args):
358       new_args, i = self.try_remove_arg_by_index(new_args, i)
359
360     self.clang_args = new_args
361
362     reduced_cmd = quote_cmd(self.get_crash_cmd())
363     write_to_script(reduced_cmd, self.crash_script)
364     print("Reduced command:", reduced_cmd)
365
366   def run_creduce(self):
367     print("\nRunning C-Reduce...")
368     try:
369       p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
370                            [self.testfile, self.file_to_reduce])
371       p.communicate()
372     except KeyboardInterrupt:
373       # Hack to kill C-Reduce because it jumps into its own pgid
374       print('\n\nctrl-c detected, killed creduce')
375       p.kill()
376
377 def main():
378   global verbose
379   global creduce_cmd
380   global clang_cmd
381
382   parser = ArgumentParser(description=__doc__,
383                           formatter_class=RawTextHelpFormatter)
384   parser.add_argument('crash_script', type=str, nargs=1,
385                       help="Name of the script that generates the crash.")
386   parser.add_argument('file_to_reduce', type=str, nargs=1,
387                       help="Name of the file to be reduced.")
388   parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
389                       help="Path to the LLVM bin directory.")
390   parser.add_argument('--clang', dest='clang', type=str,
391                       help="The path to the `clang` executable. "
392                       "By default uses the llvm-bin directory.")
393   parser.add_argument('--creduce', dest='creduce', type=str,
394                       help="The path to the `creduce` executable. "
395                       "Required if `creduce` is not in PATH environment.")
396   parser.add_argument('-v', '--verbose', action='store_true')
397   args = parser.parse_args()
398
399   verbose = args.verbose
400   llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
401   creduce_cmd = check_cmd('creduce', None, args.creduce)
402   clang_cmd = check_cmd('clang', llvm_bin, args.clang)
403
404   crash_script = check_file(args.crash_script[0])
405   file_to_reduce = check_file(args.file_to_reduce[0])
406
407   r = Reduce(crash_script, file_to_reduce)
408
409   r.simplify_clang_args()
410   r.write_interestingness_test()
411   r.clang_preprocess()
412   r.run_creduce()
413   r.reduce_clang_args()
414
415 if __name__ == '__main__':
416   main()