tools: Make python_gut more correct.

- Using the ast module to correctly determine the line numbers
  of the function bodies.
- Should have done it this way in the beginning, but didn't know
  about ast module tracking line numbers.
- No more heuristics.
- Copes with different size indentations.
This commit is contained in:
Andrew Hamilton 2021-05-07 19:58:41 +10:00
parent c8638c1bbe
commit d471934946
2 changed files with 25 additions and 60 deletions

View file

@ -6,66 +6,44 @@
This can be useful when initially reading a codebase.
"""
import re
import ast
import sys
USAGE = """Usage: gut.py <python file>
USAGE = """Usage: gut.py <python-file>
# gut.py test.py"""
INDENT_SIZE = 4
TAB_SIZE = 4
def _function_body_lines(module_contents):
ranges = []
class FuncNodeVisitor(ast.NodeVisitor):
def _line_range(self, body):
return body[0].lineno - 1, body[-1].end_lineno
def _indentation_of_line(line):
indentation = 0
for character in line:
if character == " ":
indentation += 1
elif character == "\t":
indentation += TAB_SIZE
elif character == "\n":
return None
else: # Is a non-whitespace character.
return indentation
def visit_FunctionDef(self, node):
ranges.append(self._line_range(node.body))
def _is_start_line_of_signature(line):
return re.match(r"^\s*(async)?\s*def\s", line) is not None
def _is_end_line_of_signature(line):
return (re.match(r".*\):\s*\n$", line) is not None or
re.match(r".*\):\s*#.*\n$", line) is not None)
def visit_AsyncFunctionDef(self, node):
ranges.append(self._line_range(node.body))
visitor = FuncNodeVisitor()
tree = ast.parse(module_contents)
visitor.visit(tree)
return ranges
def gut_module(module_contents):
"""Gut a string of a module's contents."""
SIGNATURE, BODY, TOP_LEVEL = 1, 2, 3
state = TOP_LEVEL
body_depth = 0
result = []
for line in module_contents.splitlines(keepends=True):
indent = _indentation_of_line(line)
if state == BODY and indent is not None and \
indent < body_depth:
state = TOP_LEVEL
result.append("\n")
if state == TOP_LEVEL and _is_start_line_of_signature(line):
state = SIGNATURE
body_depth = indent + INDENT_SIZE
if state == SIGNATURE and _is_end_line_of_signature(line):
result.append(line)
state = BODY
elif state != BODY:
result.append(line)
return "".join(result)
ranges = _function_body_lines(module_contents)
lines = module_contents.splitlines(keepends=True)
deleted = 0
for start_line, end_line in ranges:
del lines[start_line-deleted:end_line-deleted]
deleted += (end_line - start_line)
return "".join(lines)
def main(module_path):
"""Gut the module at module_path."""
with open(module_path) as module_file:
print(gut_module(module_file.read()))

View file

@ -90,27 +90,16 @@ class GutTestCase(unittest.TestCase):
def test_multiline_signature(self):
program = textwrap.dedent("""
def bar(a, b
def bar(a, b,
c, d):
a = 1
""")
expected = textwrap.dedent("""
def bar(a, b
def bar(a, b,
c, d):
""")
self.assertEqual(gut.gut_module(program), expected)
def test_tab_in_indentation(self):
program = textwrap.dedent("""
def bar():
a = 1
\tb=2
""")
expected = textwrap.dedent("""
def bar():
""")
self.assertEqual(gut.gut_module(program), expected)
def test_comment_in_signature_line(self):
program = textwrap.dedent("""
def bar(): # comment
@ -142,9 +131,6 @@ class GutTestCase(unittest.TestCase):
""")
expected = textwrap.dedent("""
def bar():
# comment
pass
""")
self.assertEqual(gut.gut_module(program), expected)
@ -153,6 +139,7 @@ class GutTestCase(unittest.TestCase):
def bar():
pass
pass
# comment
pass
""")