2 # pep8.py - Check Python source code formatting, according to PEP 8
3 # Copyright (C) 2006 Johann C. Rocholl <johann@browsershots.org>
5 # Permission is hereby granted, free of charge, to any person
6 # obtaining a copy of this software and associated documentation files
7 # (the "Software"), to deal in the Software without restriction,
8 # including without limitation the rights to use, copy, modify, merge,
9 # publish, distribute, sublicense, and/or sell copies of the Software,
10 # and to permit persons to whom the Software is furnished to do so,
11 # subject to the following conditions:
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 Check Python source code formatting, according to PEP 8:
27 http://www.python.org/dev/peps/pep-0008/
29 For usage and a list of options, try this:
32 This program and its regression test suite live here:
33 http://svn.browsershots.org/trunk/devtools/pep8/
34 http://trac.browsershots.org/browser/trunk/devtools/pep8/
36 Groups of errors and warnings:
47 You can add checks to this program by writing plugins. Each plugin is
48 a simple function that is called for each line of source code, either
52 - Raw line of text from the input file.
55 - Multi-line statements converted to a single line.
56 - Stripped left and right.
57 - Contents of strings replaced with 'xxx' of same length.
60 The check function requests physical or logical lines by the name of
63 def maximum_line_length(physical_line)
64 def extraneous_whitespace(logical_line)
65 def blank_lines(logical_line, blank_lines, indent_level, line_number)
67 The last example above demonstrates how check plugins can request
68 additional information with extra arguments. All attributes of the
69 Checker object are available. Some examples:
71 lines: a list of the raw lines from the input file
72 tokens: the tokens that contribute to this logical line
73 line_number: line number in the input file
74 blank_lines: blank lines before this one
75 indent_char: first indentation character in this file (' ' or '\t')
76 indent_level: indentation (with tabs expanded to multiples of 8)
77 previous_indent_level: indentation on previous line
78 previous_logical: previous logical line
80 The docstring of each check function shall be the relevant part of
81 text from PEP 8. It is printed if the user enables --show-pep8.
91 from optparse import OptionParser
92 from keyword import iskeyword
93 from fnmatch import fnmatch
96 __revision__ = '$Rev$'
98 default_exclude = '.svn,CVS,*.pyc,*.pyo'
100 indent_match = re.compile(r'([ \t]*)').match
101 raise_comma_match = re.compile(r'raise\s+\w+\s*(,)').match
104 + - * / % ^ & | = < > >> <<
105 += -= *= /= %= ^= &= |= == <= >= >>= <<=
114 ##############################################################################
115 # Plugins (check functions) for physical lines
116 ##############################################################################
119 def tabs_or_spaces(physical_line, indent_char):
121 Never mix tabs and spaces.
123 The most popular way of indenting Python is with spaces only. The
124 second-most popular way is with tabs only. Code indented with a mixture
125 of tabs and spaces should be converted to using spaces exclusively. When
126 invoking the Python command line interpreter with the -t option, it issues
127 warnings about code that illegally mixes tabs and spaces. When using -tt
128 these warnings become errors. These options are highly recommended!
130 indent = indent_match(physical_line).group(1)
131 for offset, char in enumerate(indent):
132 if char != indent_char:
133 return offset, "E101 indentation contains mixed spaces and tabs"
136 def tabs_obsolete(physical_line):
138 For new projects, spaces-only are strongly recommended over tabs. Most
139 editors have features that make this easy to do.
141 indent = indent_match(physical_line).group(1)
142 if indent.count('\t'):
143 return indent.index('\t'), "W191 indentation contains tabs"
146 def trailing_whitespace(physical_line):
148 JCR: Trailing whitespace is superfluous.
150 physical_line = physical_line.rstrip('\n') # chr(10), newline
151 physical_line = physical_line.rstrip('\r') # chr(13), carriage return
152 physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
153 stripped = physical_line.rstrip()
154 if physical_line != stripped:
155 return len(stripped), "W291 trailing whitespace"
158 def trailing_blank_lines(physical_line, lines, line_number):
160 JCR: Trailing blank lines are superfluous.
162 if physical_line.strip() == '' and line_number == len(lines):
163 return 0, "W391 blank line at end of file"
166 def missing_newline(physical_line):
168 JCR: The last line should have a newline.
170 if physical_line.rstrip() == physical_line:
171 return len(physical_line), "W292 no newline at end of file"
174 def maximum_line_length(physical_line):
176 Limit all lines to a maximum of 79 characters.
178 There are still many devices around that are limited to 80 character
179 lines; plus, limiting windows to 80 characters makes it possible to have
180 several windows side-by-side. The default wrapping on such devices looks
181 ugly. Therefore, please limit all lines to a maximum of 79 characters.
182 For flowing long blocks of text (docstrings or comments), limiting the
183 length to 72 characters is recommended.
185 length = len(physical_line.rstrip())
187 return 79, "E501 line too long (%d characters)" % length
190 ##############################################################################
191 # Plugins (check functions) for logical lines
192 ##############################################################################
195 def extraneous_whitespace(logical_line):
197 Avoid extraneous whitespace in the following situations:
199 - Immediately inside parentheses, brackets or braces.
201 - Immediately before a comma, semicolon, or colon.
205 found = line.find(char + ' ')
207 return found + 1, "E201 whitespace after '%s'" % char
209 found = line.find(' ' + char)
210 if found > -1 and line[found - 1] != ',':
211 return found, "E202 whitespace before '%s'" % char
213 found = line.find(' ' + char)
215 return found, "E203 whitespace before '%s'" % char
218 def missing_whitespace(logical_line):
220 JCR: Each comma, semicolon or colon should be followed by whitespace.
223 for index in range(len(line) - 1):
225 if char in ',;:' and line[index + 1] != ' ':
226 before = line[:index]
227 if char == ':' and before.count('[') > before.count(']'):
228 continue # Slice syntax, no space required
229 return index, "E231 missing whitespace after '%s'" % char
232 def indentation(logical_line, previous_logical, indent_char,
233 indent_level, previous_indent_level):
235 Use 4 spaces per indentation level.
237 For really old code that you don't want to mess up, you can continue to
240 if indent_char == ' ' and indent_level % 4:
241 return 0, "E111 indentation is not a multiple of four"
242 indent_expect = previous_logical.endswith(':')
243 if indent_expect and indent_level <= previous_indent_level:
244 return 0, "E112 expected an indented block"
245 if indent_level > previous_indent_level and not indent_expect:
246 return 0, "E113 unexpected indentation"
249 def whitespace_before_parameters(logical_line, tokens):
251 Avoid extraneous whitespace in the following situations:
253 - Immediately before the open parenthesis that starts the argument
254 list of a function call.
256 - Immediately before the open parenthesis that starts an indexing or
259 prev_type = tokens[0][0]
260 prev_text = tokens[0][1]
261 prev_end = tokens[0][3]
262 for index in range(1, len(tokens)):
263 token_type, text, start, end, line = tokens[index]
264 if (token_type == tokenize.OP and
266 start != prev_end and
267 prev_type == tokenize.NAME and
268 (index < 2 or tokens[index - 2][1] != 'class') and
269 (not iskeyword(prev_text))):
270 return prev_end, "E211 whitespace before '%s'" % text
271 prev_type = token_type
276 def whitespace_around_operator(logical_line):
278 Avoid extraneous whitespace in the following situations:
280 - More than one space around an assignment (or other) operator to
281 align it with another.
284 for operator in operators:
285 found = line.find(' ' + operator)
287 return found, "E221 multiple spaces before operator"
288 found = line.find(operator + ' ')
290 return found, "E222 multiple spaces after operator"
291 found = line.find('\t' + operator)
293 return found, "E223 tab before operator"
294 found = line.find(operator + '\t')
296 return found, "E224 tab after operator"
299 def whitespace_around_comma(logical_line):
301 Avoid extraneous whitespace in the following situations:
303 - More than one space around an assignment (or other) operator to
304 align it with another.
306 JCR: This should also be applied around comma etc.
309 for separator in ',;:':
310 found = line.find(separator + ' ')
312 return found + 1, "E241 multiple spaces after '%s'" % separator
313 found = line.find(separator + '\t')
315 return found + 1, "E242 tab after '%s'" % separator
318 def imports_on_separate_lines(logical_line):
320 Imports should usually be on separate lines.
323 if line.startswith('import '):
324 found = line.find(',')
326 return found, "E401 multiple imports on one line"
329 def compound_statements(logical_line):
331 Compound statements (multiple statements on the same line) are
332 generally discouraged.
335 found = line.find(':')
336 if -1 < found < len(line) - 1:
337 before = line[:found]
338 if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
339 before.count('[') <= before.count(']') and # [1:2] (slice)
340 not re.search(r'\blambda\b', before)): # lambda x: x
341 return found, "E701 multiple statements on one line (colon)"
342 found = line.find(';')
344 return found, "E702 multiple statements on one line (semicolon)"
347 def python_3000_has_key(logical_line):
349 The {}.has_key() method will be removed in the future version of
350 Python. Use the 'in' operation instead, like:
355 pos = logical_line.find('.has_key(')
357 return pos, "W601 .has_key() is deprecated, use 'in'"
360 def python_3000_raise_comma(logical_line):
362 When raising an exception, use "raise ValueError('message')"
363 instead of the older form "raise ValueError, 'message'".
365 The paren-using form is preferred because when the exception arguments
366 are long or include string formatting, you don't need to use line
367 continuation characters thanks to the containing parentheses. The older
368 form will be removed in Python 3000.
370 match = raise_comma_match(logical_line)
372 return match.start(1), "W602 deprecated form of raising exception"
375 ##############################################################################
377 ##############################################################################
380 def expand_indent(line):
382 Return the amount of indentation.
383 Tabs are expanded to the next multiple of 8.
385 >>> expand_indent(' ')
387 >>> expand_indent('\\t')
389 >>> expand_indent(' \\t')
391 >>> expand_indent(' \\t')
393 >>> expand_indent(' \\t')
399 result = result / 8 * 8 + 8
407 ##############################################################################
408 # Framework to run all checks
409 ##############################################################################
413 """Print a message."""
414 # print >> sys.stderr, options.prog + ': ' + text
415 # print >> sys.stderr, text
419 def find_checks(argument_name):
421 Find all globally visible functions where the first argument name
422 starts with argument_name.
425 function_type = type(find_checks)
426 for name, function in globals().iteritems():
427 if type(function) is function_type:
428 args = inspect.getargspec(function)[0]
429 if len(args) >= 1 and args[0].startswith(argument_name):
430 checks.append((name, function, args))
435 def mute_string(text):
437 Replace contents with 'xxx' to prevent syntax matching.
439 >>> mute_string('"abc"')
441 >>> mute_string("'''abc'''")
443 >>> mute_string("r'abc'")
448 # String modifiers (e.g. u or r)
449 if text.endswith('"'):
450 start += text.index('"')
451 elif text.endswith("'"):
452 start += text.index("'")
454 if text.endswith('"""') or text.endswith("'''"):
457 return text[:start] + 'x' * (end - start) + text[end:]
462 Load a Python source file, tokenize it, check coding style.
465 def __init__(self, filename):
466 self.filename = filename
467 self.lines = file(filename).readlines()
468 self.physical_checks = find_checks('physical_line')
469 self.logical_checks = find_checks('logical_line')
470 options.counters['physical lines'] = \
471 options.counters.get('physical lines', 0) + len(self.lines)
475 Get the next line from the input buffer.
477 self.line_number += 1
478 if self.line_number > len(self.lines):
480 return self.lines[self.line_number - 1]
482 def readline_check_physical(self):
484 Check and return the next physical line. This method can be
485 used to feed tokenize.generate_tokens.
487 line = self.readline()
489 self.check_physical(line)
492 def run_check(self, check, argument_names):
497 for name in argument_names:
498 arguments.append(getattr(self, name))
499 return check(*arguments)
501 def check_physical(self, line):
503 Run all physical checks on a raw input line.
505 self.physical_line = line
506 if self.indent_char is None and len(line) and line[0] in ' \t':
507 self.indent_char = line[0]
508 for name, check, argument_names in self.physical_checks:
509 result = self.run_check(check, argument_names)
510 if result is not None:
511 offset, text = result
512 self.report_error(self.line_number, offset, text, check)
514 def build_tokens_line(self):
516 Build a logical line from tokens.
522 for token in self.tokens:
523 token_type, text = token[0:2]
524 if token_type in (tokenize.COMMENT, tokenize.NL,
525 tokenize.INDENT, tokenize.DEDENT,
528 if token_type == tokenize.STRING:
529 text = mute_string(text)
531 end_line, end = previous[3]
532 start_line, start = token[2]
533 if end_line != start_line: # different row
534 if self.lines[end_line - 1][end - 1] not in '{[(':
537 elif end != start: # different column
538 fill = self.lines[end_line - 1][end:start]
541 self.mapping.append((length, token))
545 self.logical_line = ''.join(logical)
546 assert self.logical_line.lstrip() == self.logical_line
547 assert self.logical_line.rstrip() == self.logical_line
549 def check_logical(self):
551 Build a line from tokens and run all logical checks on it.
553 options.counters['logical lines'] = \
554 options.counters.get('logical lines', 0) + 1
555 self.build_tokens_line()
556 first_line = self.lines[self.mapping[0][1][2][0] - 1]
557 indent = first_line[:self.mapping[0][1][2][1]]
558 self.previous_indent_level = self.indent_level
559 self.indent_level = expand_indent(indent)
560 if options.verbose >= 2:
561 print self.logical_line[:80].rstrip()
562 for name, check, argument_names in self.logical_checks:
563 if options.verbose >= 3:
565 result = self.run_check(check, argument_names)
566 if result is not None:
567 offset, text = result
568 if type(offset) is tuple:
569 original_number, original_offset = offset
571 for token_offset, token in self.mapping:
572 if offset >= token_offset:
573 original_number = token[2][0]
574 original_offset = (token[2][1]
575 + offset - token_offset)
576 self.report_error(original_number, original_offset,
578 self.previous_logical = self.logical_line
582 Run all checks on the input file.
586 self.indent_char = None
587 self.indent_level = 0
588 self.previous_logical = ''
592 for token in tokenize.generate_tokens(self.readline_check_physical):
593 # print tokenize.tok_name[token[0]], repr(token)
594 self.tokens.append(token)
595 token_type, text = token[0:2]
596 if token_type == tokenize.OP and text in '([{':
598 if token_type == tokenize.OP and text in '}])':
600 if token_type == tokenize.NEWLINE and not parens:
604 if token_type == tokenize.NL and not parens:
605 self.blank_lines += 1
607 if token_type == tokenize.COMMENT:
608 source_line = token[4]
609 token_start = token[2][1]
610 if source_line[:token_start].strip() == '':
612 return self.file_errors
614 def report_error(self, line_number, offset, text, check):
616 Report an error, according to options.
618 if options.quiet == 1 and not self.file_errors:
619 message(self.filename)
620 self.file_errors += 1
622 options.counters[code] = options.counters.get(code, 0) + 1
623 options.messages[code] = text[5:]
626 if options.testsuite:
627 base = os.path.basename(self.filename)[:4]
630 if base[0] == 'E' and code[0] == 'W':
632 if ignore_code(code):
634 if options.counters[code] == 1 or options.repeat:
635 message("%s:%s:%d: %s" %
636 (self.filename, line_number, offset + 1, text))
637 if options.show_source:
638 line = self.lines[line_number - 1]
639 message(line.rstrip())
640 message(' ' * offset + '^')
641 if options.show_pep8:
642 message(check.__doc__.lstrip('\n').rstrip())
645 def input_file(filename):
647 Run all checks on a Python source file.
649 if excluded(filename) or not filename_match(filename):
652 message('checking ' + filename)
653 options.counters['files'] = options.counters.get('files', 0) + 1
654 errors = Checker(filename).check_all()
655 if options.testsuite and not errors:
656 message("%s: %s" % (filename, "no errors found"))
660 def input_dir(dirname):
662 Check all Python source files in this directory and all subdirectories.
664 dirname = dirname.rstrip('/')
665 if excluded(dirname):
668 for root, dirs, files in os.walk(dirname):
670 message('directory ' + root)
671 options.counters['directories'] = \
672 options.counters.get('directories', 0) + 1
678 for filename in files:
679 errors += input_file(os.path.join(root, filename))
683 def excluded(filename):
685 Check if options.exclude contains a pattern that matches filename.
687 basename = os.path.basename(filename)
688 for pattern in options.exclude:
689 if fnmatch(basename, pattern):
690 # print basename, 'excluded because it matches', pattern
694 def filename_match(filename):
696 Check if options.filename contains a pattern that matches filename.
697 If options.filename is unspecified, this always returns True.
699 if not options.filename:
701 for pattern in options.filename:
702 if fnmatch(filename, pattern):
706 def ignore_code(code):
708 Check if options.ignore contains a prefix of the error code.
710 for ignore in options.ignore:
711 if code.startswith(ignore):
715 def get_error_statistics():
716 """Get error statistics."""
717 return get_statistics("E")
720 def get_warning_statistics():
721 """Get warning statistics."""
722 return get_statistics("W")
725 def get_statistics(prefix=''):
727 Get statistics for message codes that start with the prefix.
729 prefix='' matches all errors and warnings
730 prefix='E' matches all errors
731 prefix='W' matches all warnings
732 prefix='E4' matches all errors that have to do with imports
735 keys = options.messages.keys()
738 if key.startswith(prefix):
739 stats.append('%-7s %s %s' %
740 (options.counters[key], key, options.messages[key]))
744 def print_statistics(prefix=''):
745 """Print overall statistics (number of errors and warnings)."""
746 for line in get_statistics(prefix):
750 def print_benchmark(elapsed):
752 Print benchmark numbers.
754 print '%-7.2f %s' % (elapsed, 'seconds elapsed')
755 keys = ['directories', 'files',
756 'logical lines', 'physical lines']
758 if key in options.counters:
759 print '%-7d %s per second (%d total)' % (
760 options.counters[key] / elapsed, key,
761 options.counters[key])
764 def process_options(arglist=None):
766 Process options passed either via arglist or via command line args.
769 usage = "%prog [options] input ..."
770 parser = OptionParser(usage)
771 parser.add_option('-v', '--verbose', default=0, action='count',
772 help="print status messages, or debug with -vv")
773 parser.add_option('-q', '--quiet', default=0, action='count',
774 help="report only file names, or nothing with -qq")
775 parser.add_option('--exclude', metavar='patterns', default=default_exclude,
776 help="skip matches (default %s)" % default_exclude)
777 parser.add_option('--filename', metavar='patterns',
778 help="only check matching files (e.g. *.py)")
779 parser.add_option('--ignore', metavar='errors', default='',
780 help="skip errors and warnings (e.g. E4,W)")
781 parser.add_option('--repeat', action='store_true',
782 help="show all occurrences of the same error")
783 parser.add_option('--show-source', action='store_true',
784 help="show source code for each error")
785 parser.add_option('--show-pep8', action='store_true',
786 help="show text of PEP 8 for each error")
787 parser.add_option('--statistics', action='store_true',
788 help="count errors and warnings")
789 parser.add_option('--benchmark', action='store_true',
790 help="measure processing speed")
791 parser.add_option('--testsuite', metavar='dir',
792 help="run regression tests from dir")
793 parser.add_option('--doctest', action='store_true',
794 help="run doctest on myself")
795 options, args = parser.parse_args(arglist)
796 if options.testsuite:
797 args.append(options.testsuite)
799 parser.error('input not specified')
800 options.prog = os.path.basename(sys.argv[0])
801 options.exclude = options.exclude.split(',')
802 for index in range(len(options.exclude)):
803 options.exclude[index] = options.exclude[index].rstrip('/')
805 options.filename = options.filename.split(',')
807 options.ignore = options.ignore.split(',')
810 options.counters = {}
811 options.messages = {}
818 Parse options and run checks on Python source.
820 options, args = process_options()
823 return doctest.testmod()
824 start_time = time.time()
827 if os.path.isdir(path):
828 errors += input_dir(path)
830 errors += input_file(path)
831 elapsed = time.time() - start_time
832 if options.statistics:
834 if options.benchmark:
835 print_benchmark(elapsed)
838 if __name__ == '__main__':