From d471934946e0138d666619be9fd3a48be42da936 Mon Sep 17 00:00:00 2001 From: Andrew Hamilton Date: Fri, 7 May 2021 19:58:41 +1000 Subject: [PATCH] tools: Make python_gut more correct. - Using the ast module to correctly determine the line numbers of the function bodies. - Should have done it this way in the beginning, but didn't know about ast module tracking line numbers. - No more heuristics. - Copes with different size indentations. --- eris/gut.py | 66 ++++++++++++++++------------------------------- tests/gut_test.py | 19 +++----------- 2 files changed, 25 insertions(+), 60 deletions(-) diff --git a/eris/gut.py b/eris/gut.py index 67e1645..4d973f7 100755 --- a/eris/gut.py +++ b/eris/gut.py @@ -6,66 +6,44 @@ This can be useful when initially reading a codebase. """ -import re +import ast import sys -USAGE = """Usage: gut.py +USAGE = """Usage: gut.py # gut.py test.py""" -INDENT_SIZE = 4 -TAB_SIZE = 4 +def _function_body_lines(module_contents): + ranges = [] + class FuncNodeVisitor(ast.NodeVisitor): + def _line_range(self, body): + return body[0].lineno - 1, body[-1].end_lineno -def _indentation_of_line(line): - indentation = 0 - for character in line: - if character == " ": - indentation += 1 - elif character == "\t": - indentation += TAB_SIZE - elif character == "\n": - return None - else: # Is a non-whitespace character. - return indentation + def visit_FunctionDef(self, node): + ranges.append(self._line_range(node.body)) - -def _is_start_line_of_signature(line): - return re.match(r"^\s*(async)?\s*def\s", line) is not None - - -def _is_end_line_of_signature(line): - return (re.match(r".*\):\s*\n$", line) is not None or - re.match(r".*\):\s*#.*\n$", line) is not None) + def visit_AsyncFunctionDef(self, node): + ranges.append(self._line_range(node.body)) + visitor = FuncNodeVisitor() + tree = ast.parse(module_contents) + visitor.visit(tree) + return ranges def gut_module(module_contents): - """Gut a string of a module's contents.""" - SIGNATURE, BODY, TOP_LEVEL = 1, 2, 3 - state = TOP_LEVEL - body_depth = 0 - result = [] - for line in module_contents.splitlines(keepends=True): - indent = _indentation_of_line(line) - if state == BODY and indent is not None and \ - indent < body_depth: - state = TOP_LEVEL - result.append("\n") - if state == TOP_LEVEL and _is_start_line_of_signature(line): - state = SIGNATURE - body_depth = indent + INDENT_SIZE - if state == SIGNATURE and _is_end_line_of_signature(line): - result.append(line) - state = BODY - elif state != BODY: - result.append(line) - return "".join(result) + ranges = _function_body_lines(module_contents) + lines = module_contents.splitlines(keepends=True) + deleted = 0 + for start_line, end_line in ranges: + del lines[start_line-deleted:end_line-deleted] + deleted += (end_line - start_line) + return "".join(lines) def main(module_path): - """Gut the module at module_path.""" with open(module_path) as module_file: print(gut_module(module_file.read())) diff --git a/tests/gut_test.py b/tests/gut_test.py index 17df75a..b103992 100755 --- a/tests/gut_test.py +++ b/tests/gut_test.py @@ -90,27 +90,16 @@ class GutTestCase(unittest.TestCase): def test_multiline_signature(self): program = textwrap.dedent(""" - def bar(a, b + def bar(a, b, c, d): a = 1 """) expected = textwrap.dedent(""" - def bar(a, b + def bar(a, b, c, d): """) self.assertEqual(gut.gut_module(program), expected) - def test_tab_in_indentation(self): - program = textwrap.dedent(""" - def bar(): - a = 1 - \tb=2 - """) - expected = textwrap.dedent(""" - def bar(): - """) - self.assertEqual(gut.gut_module(program), expected) - def test_comment_in_signature_line(self): program = textwrap.dedent(""" def bar(): # comment @@ -142,9 +131,6 @@ class GutTestCase(unittest.TestCase): """) expected = textwrap.dedent(""" def bar(): - - # comment - pass """) self.assertEqual(gut.gut_module(program), expected) @@ -153,6 +139,7 @@ class GutTestCase(unittest.TestCase): def bar(): pass pass + # comment pass """)