Consider ASan messages interesting for creduce
[lldb.git] / clang / utils / creduce-clang-crash.py
1 #!/usr/bin/env python
2 """Calls C-Reduce to create a minimal reproducer for clang crashes.
3
4 Output files:
5   *.reduced.sh -- crash reproducer with minimal arguments
6   *.reduced.cpp -- the reduced file
7   *.test.sh -- interestingness test for C-Reduce
8 """
9
10 from __future__ import print_function
11 from argparse import ArgumentParser, RawTextHelpFormatter
12 import os
13 import re
14 import stat
15 import sys
16 import subprocess
17 import pipes
18 import shlex
19 import tempfile
20 import shutil
21 from distutils.spawn import find_executable
22
23 verbose = False
24 creduce_cmd = None
25 clang_cmd = None
26
27 def verbose_print(*args, **kwargs):
28   if verbose:
29     print(*args, **kwargs)
30
31 def check_file(fname):
32   fname = os.path.normpath(fname)
33   if not os.path.isfile(fname):
34     sys.exit("ERROR: %s does not exist" % (fname))
35   return fname
36
37 def check_cmd(cmd_name, cmd_dir, cmd_path=None):
38   """
39   Returns absolute path to cmd_path if it is given,
40   or absolute path to cmd_dir/cmd_name.
41   """
42   if cmd_path:
43     # Make the path absolute so the creduce test can be run from any directory.
44     cmd_path = os.path.abspath(cmd_path)
45     cmd = find_executable(cmd_path)
46     if cmd:
47       return cmd
48     sys.exit("ERROR: executable `%s` not found" % (cmd_path))
49
50   cmd = find_executable(cmd_name, path=cmd_dir)
51   if cmd:
52     return cmd
53
54   if not cmd_dir:
55     cmd_dir = "$PATH"
56   sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
57
58 def quote_cmd(cmd):
59   return ' '.join(pipes.quote(arg) for arg in cmd)
60
61 def write_to_script(text, filename):
62   with open(filename, 'w') as f:
63     f.write(text)
64   os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
65
66 class Reduce(object):
67   def __init__(self, crash_script, file_to_reduce):
68     crash_script_name, crash_script_ext = os.path.splitext(crash_script)
69     file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
70
71     self.testfile = file_reduce_name + '.test.sh'
72     self.crash_script = crash_script_name + '.reduced' + crash_script_ext
73     self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
74     shutil.copy(file_to_reduce, self.file_to_reduce)
75
76     self.clang = clang_cmd
77     self.clang_args = []
78     self.expected_output = []
79     self.needs_stack_trace = False
80     self.creduce_flags = ["--tidy"]
81
82     self.read_clang_args(crash_script, file_to_reduce)
83     self.read_expected_output()
84
85   def get_crash_cmd(self, cmd=None, args=None, filename=None):
86     if not cmd:
87       cmd = self.clang
88     if not args:
89       args = self.clang_args
90     if not filename:
91       filename = self.file_to_reduce
92
93     return [cmd] + args + [filename]
94
95   def read_clang_args(self, crash_script, filename):
96     print("\nReading arguments from crash script...")
97     with open(crash_script) as f:
98       # Assume clang call is the first non comment line.
99       cmd = []
100       for line in f:
101         if not line.lstrip().startswith('#'):
102           cmd = shlex.split(line)
103           break
104     if not cmd:
105       sys.exit("Could not find command in the crash script.");
106
107     # Remove clang and filename from the command
108     # Assume the last occurrence of the filename is the clang input file
109     del cmd[0]
110     for i in range(len(cmd)-1, -1, -1):
111       if cmd[i] == filename:
112         del cmd[i]
113         break
114     self.clang_args = cmd
115     verbose_print("Clang arguments:", quote_cmd(self.clang_args))
116
117   def read_expected_output(self):
118     print("\nGetting expected crash output...")
119     p = subprocess.Popen(self.get_crash_cmd(),
120                          stdout=subprocess.PIPE,
121                          stderr=subprocess.STDOUT)
122     crash_output, _ = p.communicate()
123     result = []
124
125     # Remove color codes
126     ansi_escape = r'\x1b\[[0-?]*m'
127     crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
128
129     # Look for specific error messages
130     regexes = [r"Assertion .+ failed", # Linux assert()
131                r"Assertion failed: .+,", # FreeBSD/Mac assert()
132                r"fatal error: error in backend: .+",
133                r"LLVM ERROR: .+",
134                r"UNREACHABLE executed at .+?!",
135                r"LLVM IR generation of declaration '.+'",
136                r"Generating code for declaration '.+'",
137                r"\*\*\* Bad machine code: .+ \*\*\*",
138                r"ERROR: .*Sanitizer: [^ ]+ "]
139     for msg_re in regexes:
140       match = re.search(msg_re, crash_output)
141       if match:
142         msg = match.group(0)
143         result = [msg]
144         print("Found message:", msg)
145         break
146
147     # If no message was found, use the top five stack trace functions,
148     # ignoring some common functions
149     # Five is a somewhat arbitrary number; the goal is to get a small number
150     # of identifying functions with some leeway for common functions
151     if not result:
152       self.needs_stack_trace = True
153       stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
154       filters = ["PrintStackTrace", "RunSignalHandlers", "CleanupOnSignal",
155                  "HandleCrash", "SignalHandler", "__restore_rt", "gsignal", "abort"]
156       def skip_function(func_name):
157         return any(name in func_name for name in filters)
158
159       matches = re.findall(stacktrace_re, crash_output)
160       result = [x for x in matches if x and not skip_function(x)][:5]
161       for msg in result:
162         print("Found stack trace function:", msg)
163
164     if not result:
165       print("ERROR: no crash was found")
166       print("The crash output was:\n========\n%s========" % crash_output)
167       sys.exit(1)
168
169     self.expected_output = result
170
171   def check_expected_output(self, args=None, filename=None):
172     if not args:
173       args = self.clang_args
174     if not filename:
175       filename = self.file_to_reduce
176
177     p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
178                          stdout=subprocess.PIPE,
179                          stderr=subprocess.STDOUT)
180     crash_output, _ = p.communicate()
181     return all(msg in crash_output.decode('utf-8') for msg in
182                self.expected_output)
183
184   def write_interestingness_test(self):
185     print("\nCreating the interestingness test...")
186
187     # Disable symbolization if it's not required to avoid slow symbolization.
188     disable_symbolization = ''
189     if not self.needs_stack_trace:
190       disable_symbolization = 'export LLVM_DISABLE_SYMBOLIZATION=1'
191
192     output = """#!/bin/bash
193 %s
194 if %s >& t.log ; then
195   exit 1
196 fi
197 """ % (disable_symbolization, quote_cmd(self.get_crash_cmd()))
198
199     for msg in self.expected_output:
200       output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg)
201
202     write_to_script(output, self.testfile)
203     self.check_interestingness()
204
205   def check_interestingness(self):
206     testfile = os.path.abspath(self.testfile)
207
208     # Check that the test considers the original file interesting
209     with open(os.devnull, 'w') as devnull:
210       returncode = subprocess.call(testfile, stdout=devnull)
211     if returncode:
212       sys.exit("The interestingness test does not pass for the original file.")
213
214     # Check that an empty file is not interesting
215     # Instead of modifying the filename in the test file, just run the command
216     with tempfile.NamedTemporaryFile() as empty_file:
217       is_interesting = self.check_expected_output(filename=empty_file.name)
218     if is_interesting:
219       sys.exit("The interestingness test passes for an empty file.")
220
221   def clang_preprocess(self):
222     print("\nTrying to preprocess the source file...")
223     with tempfile.NamedTemporaryFile() as tmpfile:
224       cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
225       cmd_preprocess_no_lines = cmd_preprocess + ['-P']
226       try:
227         subprocess.check_call(cmd_preprocess_no_lines)
228         if self.check_expected_output(filename=tmpfile.name):
229           print("Successfully preprocessed with line markers removed")
230           shutil.copy(tmpfile.name, self.file_to_reduce)
231         else:
232           subprocess.check_call(cmd_preprocess)
233           if self.check_expected_output(filename=tmpfile.name):
234             print("Successfully preprocessed without removing line markers")
235             shutil.copy(tmpfile.name, self.file_to_reduce)
236           else:
237             print("No longer crashes after preprocessing -- "
238                   "using original source")
239       except subprocess.CalledProcessError:
240         print("Preprocessing failed")
241
242   @staticmethod
243   def filter_args(args, opts_equal=[], opts_startswith=[],
244                   opts_one_arg_startswith=[]):
245     result = []
246     skip_next = False
247     for arg in args:
248       if skip_next:
249         skip_next = False
250         continue
251       if any(arg == a for a in opts_equal):
252         continue
253       if any(arg.startswith(a) for a in opts_startswith):
254         continue
255       if any(arg.startswith(a) for a in opts_one_arg_startswith):
256         skip_next = True
257         continue
258       result.append(arg)
259     return result
260
261   def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
262     new_args = self.filter_args(args, **kwargs)
263
264     if extra_arg:
265       if extra_arg in new_args:
266         new_args.remove(extra_arg)
267       new_args.append(extra_arg)
268
269     if (new_args != args and
270         self.check_expected_output(args=new_args)):
271       if msg:
272         verbose_print(msg)
273       return new_args
274     return args
275
276   def try_remove_arg_by_index(self, args, index):
277     new_args = args[:index] + args[index+1:]
278     removed_arg = args[index]
279
280     # Heuristic for grouping arguments:
281     # remove next argument if it doesn't start with "-"
282     if index < len(new_args) and not new_args[index].startswith('-'):
283       del new_args[index]
284       removed_arg += ' ' + args[index+1]
285
286     if self.check_expected_output(args=new_args):
287       verbose_print("Removed", removed_arg)
288       return new_args, index
289     return args, index+1
290
291   def simplify_clang_args(self):
292     """Simplify clang arguments before running C-Reduce to reduce the time the
293     interestingness test takes to run.
294     """
295     print("\nSimplifying the clang command...")
296
297     # Remove some clang arguments to speed up the interestingness test
298     new_args = self.clang_args
299     new_args = self.try_remove_args(new_args,
300                                     msg="Removed debug info options",
301                                     opts_startswith=["-gcodeview",
302                                                      "-debug-info-kind=",
303                                                      "-debugger-tuning="])
304
305     new_args = self.try_remove_args(new_args,
306                                     msg="Removed --show-includes",
307                                     opts_startswith=["--show-includes"])
308     # Not suppressing warnings (-w) sometimes prevents the crash from occurring
309     # after preprocessing
310     new_args = self.try_remove_args(new_args,
311                                     msg="Replaced -W options with -w",
312                                     extra_arg='-w',
313                                     opts_startswith=["-W"])
314     new_args = self.try_remove_args(new_args,
315                                     msg="Replaced optimization level with -O0",
316                                     extra_arg="-O0",
317                                     opts_startswith=["-O"])
318
319     # Try to remove compilation steps
320     new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
321                                     extra_arg="-emit-llvm")
322     new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
323                                     extra_arg="-fsyntax-only")
324
325     # Try to make implicit int an error for more sensible test output
326     new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
327                                     opts_equal=["-w"],
328                                     extra_arg="-Werror=implicit-int")
329
330     self.clang_args = new_args
331     verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
332
333   def reduce_clang_args(self):
334     """Minimize the clang arguments after running C-Reduce, to get the smallest
335     command that reproduces the crash on the reduced file.
336     """
337     print("\nReducing the clang crash command...")
338
339     new_args = self.clang_args
340
341     # Remove some often occurring args
342     new_args = self.try_remove_args(new_args, msg="Removed -D options",
343                                     opts_startswith=["-D"])
344     new_args = self.try_remove_args(new_args, msg="Removed -D options",
345                                     opts_one_arg_startswith=["-D"])
346     new_args = self.try_remove_args(new_args, msg="Removed -I options",
347                                     opts_startswith=["-I"])
348     new_args = self.try_remove_args(new_args, msg="Removed -I options",
349                                     opts_one_arg_startswith=["-I"])
350     new_args = self.try_remove_args(new_args, msg="Removed -W options",
351                                     opts_startswith=["-W"])
352
353     # Remove other cases that aren't covered by the heuristic
354     new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
355                                     opts_one_arg_startswith=["-mllvm"])
356
357     i = 0
358     while i < len(new_args):
359       new_args, i = self.try_remove_arg_by_index(new_args, i)
360
361     self.clang_args = new_args
362
363     reduced_cmd = quote_cmd(self.get_crash_cmd())
364     write_to_script(reduced_cmd, self.crash_script)
365     print("Reduced command:", reduced_cmd)
366
367   def run_creduce(self):
368     print("\nRunning C-Reduce...")
369     try:
370       p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
371                            [self.testfile, self.file_to_reduce])
372       p.communicate()
373     except KeyboardInterrupt:
374       # Hack to kill C-Reduce because it jumps into its own pgid
375       print('\n\nctrl-c detected, killed creduce')
376       p.kill()
377
378 def main():
379   global verbose
380   global creduce_cmd
381   global clang_cmd
382
383   parser = ArgumentParser(description=__doc__,
384                           formatter_class=RawTextHelpFormatter)
385   parser.add_argument('crash_script', type=str, nargs=1,
386                       help="Name of the script that generates the crash.")
387   parser.add_argument('file_to_reduce', type=str, nargs=1,
388                       help="Name of the file to be reduced.")
389   parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
390                       help="Path to the LLVM bin directory.")
391   parser.add_argument('--clang', dest='clang', type=str,
392                       help="The path to the `clang` executable. "
393                       "By default uses the llvm-bin directory.")
394   parser.add_argument('--creduce', dest='creduce', type=str,
395                       help="The path to the `creduce` executable. "
396                       "Required if `creduce` is not in PATH environment.")
397   parser.add_argument('-v', '--verbose', action='store_true')
398   args = parser.parse_args()
399
400   verbose = args.verbose
401   llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
402   creduce_cmd = check_cmd('creduce', None, args.creduce)
403   clang_cmd = check_cmd('clang', llvm_bin, args.clang)
404
405   crash_script = check_file(args.crash_script[0])
406   file_to_reduce = check_file(args.file_to_reduce[0])
407
408   r = Reduce(crash_script, file_to_reduce)
409
410   r.simplify_clang_args()
411   r.write_interestingness_test()
412   r.clang_preprocess()
413   r.run_creduce()
414   r.reduce_clang_args()
415
416 if __name__ == '__main__':
417   main()