From 5e02753c44d8703d1aba36cd9403d211e22df1bc Mon Sep 17 00:00:00 2001 From: limwz01 <117669574+limwz01@users.noreply.github.com> Date: Tue, 6 Jun 2023 09:54:57 +0800 Subject: [PATCH] Fix unexpected whitespace removal outside line range * save the whitespace prefix in the pytree node even for those occurring with comments * disable the comment splicer's removal of trailing whitespace * add back the whitespace before emitting unformatted line * remove trailing whitespace in comments for enabled lines later when printing TODO: this still will remove trailing whitespace in line just before line range and will ignore trailing whitespace in last line of line range --- yapf/pytree/comment_splicer.py | 130 ++++++++++++++++++++++++-------- yapf/pytree/pytree_unwrapper.py | 8 +- yapf/yapflib/format_token.py | 3 +- yapf/yapflib/reformatter.py | 24 +++++- 4 files changed, 131 insertions(+), 34 deletions(-) diff --git a/yapf/pytree/comment_splicer.py b/yapf/pytree/comment_splicer.py index ae5ffe66f..5c728815f 100644 --- a/yapf/pytree/comment_splicer.py +++ b/yapf/pytree/comment_splicer.py @@ -73,12 +73,13 @@ def _VisitNodeRec(node): # result of the way pytrees are organized, this node can be under # an inappropriate parent. comment_column -= len(comment_prefix.lstrip()) - pytree_utils.InsertNodesAfter( - _CreateCommentsFromPrefix( - comment_prefix, - comment_lineno, - comment_column, - standalone=False), prev_leaf[0]) + nodes, child.prefix = _CreateCommentsFromPrefix( + comment_prefix, + comment_lineno, + comment_column, + "", + standalone=False) + pytree_utils.InsertNodesAfter(nodes, prev_leaf[0]) elif child.type == token.DEDENT: # Comment prefixes on DEDENT nodes also deserve special treatment, # because their final placement depends on their prefix. @@ -101,21 +102,28 @@ def _VisitNodeRec(node): # In this case, we need to split them up ourselves. # Split into groups of comments at decreasing levels of indentation + comment_group_pre = [] comment_groups = [] comment_column = None - for cmt in comment_prefix.split('\n'): + comment_split = comment_prefix.split('\n') + for cmt in comment_split[:-1]: col = cmt.find('#') if col < 0: if comment_column is None: # Skip empty lines at the top of the first comment group - comment_lineno += 1 + comment_group_pre.append(cmt) continue elif comment_column is None or col < comment_column: comment_column = col comment_indent = cmt[:comment_column] - comment_groups.append((comment_column, comment_indent, [])) + comment_groups.append( + (comment_column, comment_indent, comment_group_pre)) + comment_group_pre = [] comment_groups[-1][-1].append(cmt) + prefix = "" + child.prefix = "" + ancestor_at_indent = None # Insert a node for each group for comment_column, comment_indent, comment_group in comment_groups: ancestor_at_indent = _FindAncestorAtIndent(child, comment_indent) @@ -123,13 +131,22 @@ def _VisitNodeRec(node): InsertNodes = pytree_utils.InsertNodesBefore # pylint: disable=invalid-name # noqa else: InsertNodes = pytree_utils.InsertNodesAfter # pylint: disable=invalid-name # noqa - InsertNodes( - _CreateCommentsFromPrefix( - '\n'.join(comment_group) + '\n', - comment_lineno, - comment_column, - standalone=True), ancestor_at_indent) + nodes, prefix = _CreateCommentsFromPrefix( + '\n'.join(comment_group) + '\n', + comment_lineno, + comment_column, + prefix, + standalone=True) + InsertNodes(nodes, ancestor_at_indent) comment_lineno += len(comment_group) + if ancestor_at_indent: + leaf = _FindFirstLeafAfter(nodes[-1]) + # print( + # repr(leaf), + # repr(prefix), + # repr(comment_split[-1]), + # file=sys.stderr) + leaf.prefix = prefix + comment_split[-1] else: # Otherwise there are two cases. # @@ -163,10 +180,9 @@ def _VisitNodeRec(node): if comment_end < node_with_line_parent.lineno - 1: node_with_line_parent = node_with_line_parent.parent - pytree_utils.InsertNodesBefore( - _CreateCommentsFromPrefix( - comment_prefix, comment_lineno, 0, standalone=True), - node_with_line_parent) + nodes, child.prefix = _CreateCommentsFromPrefix( + comment_prefix, comment_lineno, 0, "", standalone=True) + pytree_utils.InsertNodesBefore(nodes, node_with_line_parent) break else: if comment_lineno == prev_leaf[0].lineno: @@ -190,10 +206,11 @@ def _VisitNodeRec(node): comment_column = ( len(comment_prefix[rindex:]) - len(comment_prefix[rindex:].lstrip())) - comments = _CreateCommentsFromPrefix( + comments, child.prefix = _CreateCommentsFromPrefix( comment_prefix, comment_lineno, comment_column, + "", standalone=False) pytree_utils.InsertNodesBefore(comments, child) break @@ -206,6 +223,7 @@ def _VisitNodeRec(node): def _CreateCommentsFromPrefix(comment_prefix, comment_lineno, comment_column, + prefix, standalone=False): """Create pytree nodes to represent the given comment prefix. @@ -226,29 +244,40 @@ def _CreateCommentsFromPrefix(comment_prefix, comments = [] lines = comment_prefix.split('\n') + # print(comment_lineno, repr(lines), repr(prefix), file=sys.stderr) index = 0 while index < len(lines): comment_block = [] - while index < len(lines) and lines[index].lstrip().startswith('#'): - comment_block.append(lines[index].strip()) - index += 1 - - if comment_block: + lstrip = lines[index].lstrip() + if lstrip.startswith('#'): + # get whitespace on the left + prefix += lines[index][:len(lines[index]) - len(lstrip)] + # get all lines of block + while True: + comment_block.append(lines[index].lstrip()) + index += 1 + if not (index < len(lines) and lines[index].lstrip().startswith("#")): + break + + # print(repr(comment_block), repr(prefix), file=sys.stderr) new_lineno = comment_lineno + index - 1 - comment_block[0] = comment_block[0].strip() - comment_block[-1] = comment_block[-1].strip() comment_leaf = pytree.Leaf( type=token.COMMENT, value='\n'.join(comment_block), - context=('', (new_lineno, comment_column))) + context=('', (new_lineno, comment_column)), + prefix=prefix) + prefix = "" comment_node = comment_leaf if not standalone else pytree.Node( pygram.python_symbols.simple_stmt, [comment_leaf]) comments.append(comment_node) - - while index < len(lines) and not lines[index].lstrip(): + else: + prefix += lines[index] index += 1 + if index < len(lines): + prefix += "\n" - return comments + # print("prefix", repr(prefix), file=sys.stderr) + return comments, prefix # "Standalone line nodes" are tree nodes that have to start a new line in Python @@ -287,6 +316,45 @@ def _FindNodeWithStandaloneLineParent(node): return _FindNodeWithStandaloneLineParent(node.parent) +def _FindFirstLeafAfter(node): + """Find the first node after the given node. + + Arguments: + node: node to start from + + Returns: + The first node after the given node or None + """ + next_sibling = node.next_sibling + if next_sibling: + return _FindFirstLeafAt(next_sibling) + if node.parent: + return _FindFirstLeafAfter(node.parent) + return None + + +def _FindFirstLeafAt(node): + """Find the first leaf of the given node. + + Arguments: + node: node to start from + + Returns: + The first leaf or None + """ + if isinstance(node, pytree.Leaf): + return node + else: + for child in node.children: + leaf = _FindFirstLeafAt(child) + if leaf is not None: + return leaf + next_sibling = node.next_sibling + if next_sibling: + return _FindFirstLeafAt(next_sibling) + return None + + # "Statement nodes" are standalone statements. The don't have to start a new # line. _STATEMENT_NODES = frozenset(['simple_stmt']) | _STANDALONE_LINE_NODES diff --git a/yapf/pytree/pytree_unwrapper.py b/yapf/pytree/pytree_unwrapper.py index ba1e0c423..5a1a2d91e 100644 --- a/yapf/pytree/pytree_unwrapper.py +++ b/yapf/pytree/pytree_unwrapper.py @@ -85,6 +85,7 @@ class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor): def __init__(self): # A list of all logical lines finished visiting so far. self._logical_lines = [] + self.prefix = "" # Builds up a "current" logical line while visiting pytree nodes. Some nodes # will finish a line and start a new one. @@ -297,12 +298,17 @@ def DefaultLeafVisit(self, leaf): Arguments: leaf: the leaf to visit. """ + self.prefix += leaf.prefix if leaf.type in _WHITESPACE_TOKENS: self._StartNewLine() + if leaf.type == grammar_token.NEWLINE: + self.prefix += "\n" elif leaf.type != grammar_token.COMMENT or leaf.value.strip(): # Add non-whitespace tokens and comments that aren't empty. self._cur_logical_line.AppendToken( - format_token.FormatToken(leaf, pytree_utils.NodeName(leaf))) + format_token.FormatToken(leaf, pytree_utils.NodeName(leaf), + self.prefix)) + self.prefix = "" _BRACKET_MATCH = {')': '(', '}': '{', ']': '['} diff --git a/yapf/yapflib/format_token.py b/yapf/yapflib/format_token.py index c572391e3..e1310eb9d 100644 --- a/yapf/yapflib/format_token.py +++ b/yapf/yapflib/format_token.py @@ -82,7 +82,7 @@ class FormatToken(object): newlines: The number of newlines needed before this token. """ - def __init__(self, node, name): + def __init__(self, node, name, prefix): """Constructor. Arguments: @@ -95,6 +95,7 @@ def __init__(self, node, name): self.column = node.column self.lineno = node.lineno self.value = node.value + self.prefix = prefix if self.is_continuation: self.value = node.value.rstrip() diff --git a/yapf/yapflib/reformatter.py b/yapf/yapflib/reformatter.py index b7c883e5f..6967ecd6d 100644 --- a/yapf/yapflib/reformatter.py +++ b/yapf/yapflib/reformatter.py @@ -19,11 +19,13 @@ Reformat(): the main function exported by this module. """ +import json import collections import heapq import re from lib2to3 import pytree from lib2to3.pgen2 import token +import sys from yapf.pytree import pytree_utils from yapf.yapflib import format_decision_state @@ -73,6 +75,21 @@ def Reformat(llines, verify=False, lines=None): if lline.disable or _LineHasContinuationMarkers(lline): _RetainHorizontalSpacing(lline) _RetainRequiredVerticalSpacing(lline, prev_line, lines) + if not _LineHasContinuationMarkers(lline): + for token in lline.tokens: + # print( + # repr(token.value) + ":" + str(token.lineno) + ":" + + # json.dumps(token.prefix) + ":" + + # json.dumps(token.whitespace_prefix), file=sys.stderr) + if token.prefix: + prefix_new = token.whitespace_prefix.split("\n") + if len(prefix_new) >= 2: + prefix_old = token.prefix.split("\n") + offset = len(prefix_new) - len(prefix_old) + prefix = prefix_new[0:max(0, offset)] + prefix_old[ + max(0, -offset):-1] + prefix_new[-1:] + token.whitespace_prefix = "\n".join(prefix) + # print("--> " + repr(token.whitespace_prefix), file=sys.stderr) _EmitLineUnformatted(state) elif (_LineContainsPylintDisableLineTooLong(lline) or @@ -399,7 +416,12 @@ def _FormatFinalLines(final_lines, verify): for tok in line.tokens: if not tok.is_pseudo: formatted_line.append(tok.formatted_whitespace_prefix) - formatted_line.append(tok.value) + # print( + # repr(tok.formatted_whitespace_prefix), + # repr(tok.value), + # file=sys.stderr) + formatted_line.append("\n".join(map(str.rstrip, tok.value.split( + "\n"))) if not line.disable and tok.is_comment else tok.value) elif (not tok.next_token.whitespace_prefix.startswith('\n') and not tok.next_token.whitespace_prefix.startswith(' ')): if (tok.previous_token.value == ':' or