Skip to content

Commit 89f02d2

Browse files
authored
More cleanup of code for Python before 3.8, fix astroid import (#156)
1 parent eb1e401 commit 89f02d2

File tree

4 files changed

+110
-151
lines changed

4 files changed

+110
-151
lines changed

README.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ The API Reference is here: http://asttokens.readthedocs.io/en/latest/api-index.h
3232

3333
Usage
3434
-----
35-
ASTTokens works with both Python2 and Python3.
3635

3736
ASTTokens can annotate both trees built by `ast <https://docs.python.org/2/library/ast.html>`_,
3837
AND those built by `astroid <https://github.com/PyCQA/astroid>`_.

asttokens/asttokens.py

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,6 @@ class ASTTokens(ASTTextBase):
104104

105105
def __init__(self, source_text, parse=False, tree=None, filename='<unknown>', tokens=None):
106106
# type: (Any, bool, Optional[Module], str, Iterable[TokenInfo]) -> None
107-
# FIXME: Strictly, the type of source_text is one of the six string types, but hard to specify with mypy given
108-
# https://mypy.readthedocs.io/en/stable/common_issues.html#variables-vs-type-aliases
109-
110107
super(ASTTokens, self).__init__(source_text, filename)
111108

112109
self._tree = ast.parse(source_text, filename) if parse else tree
@@ -292,9 +289,6 @@ class ASTText(ASTTextBase):
292289
"""
293290
def __init__(self, source_text, tree=None, filename='<unknown>'):
294291
# type: (Any, Optional[Module], str) -> None
295-
# FIXME: Strictly, the type of source_text is one of the six string types, but hard to specify with mypy given
296-
# https://mypy.readthedocs.io/en/stable/common_issues.html#variables-vs-type-aliases
297-
298292
super(ASTText, self).__init__(source_text, filename)
299293

300294
self._tree = tree
@@ -327,10 +321,6 @@ def _get_text_positions_tokenless(self, node, padded):
327321
"""
328322
Version of ``get_text_positions()`` that doesn't use tokens.
329323
"""
330-
if sys.version_info[:2] < (3, 8): # pragma: no cover
331-
# This is just for mpypy
332-
raise AssertionError("This method should only be called internally after checking supports_tokenless()")
333-
334324
if is_module(node):
335325
# Modules don't have position info, so just return the range of the whole text.
336326
# The token-using method does something different, but its behavior seems weird and inconsistent.
@@ -413,16 +403,14 @@ def get_text_positions(self, node, padded):
413403
return self.asttokens.get_text_positions(node, padded)
414404

415405

416-
# Node types that _get_text_positions_tokenless doesn't support. Only relevant for Python 3.8+.
417-
_unsupported_tokenless_types = () # type: Tuple[str, ...]
418-
if sys.version_info[:2] >= (3, 8):
419-
# no lineno
420-
_unsupported_tokenless_types += ("arguments", "Arguments", "withitem")
421-
if sys.version_info[:2] == (3, 8):
422-
# _get_text_positions_tokenless works incorrectly for these types due to bugs in Python 3.8.
423-
_unsupported_tokenless_types += ("arg", "Starred")
424-
# no lineno in 3.8
425-
_unsupported_tokenless_types += ("Slice", "ExtSlice", "Index", "keyword")
406+
# Node types that _get_text_positions_tokenless doesn't support.
407+
# These initial values are missing lineno.
408+
_unsupported_tokenless_types = ("arguments", "Arguments", "withitem") # type: Tuple[str, ...]
409+
if sys.version_info[:2] == (3, 8):
410+
# _get_text_positions_tokenless works incorrectly for these types due to bugs in Python 3.8.
411+
_unsupported_tokenless_types += ("arg", "Starred")
412+
# no lineno in 3.8
413+
_unsupported_tokenless_types += ("Slice", "ExtSlice", "Index", "keyword")
426414

427415

428416
def supports_tokenless(node=None):
@@ -434,7 +422,6 @@ def supports_tokenless(node=None):
434422
435423
The following cases are not supported:
436424
437-
- Python 3.7 and earlier
438425
- PyPy
439426
- ``ast.arguments`` / ``astroid.Arguments``
440427
- ``ast.withitem``
@@ -459,6 +446,5 @@ def supports_tokenless(node=None):
459446
)
460447
)
461448
)
462-
and sys.version_info[:2] >= (3, 8)
463449
and 'pypy' not in sys.version.lower()
464450
)

asttokens/mark_tokens.py

Lines changed: 14 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
from . import util
2323
from .asttokens import ASTTokens
24-
from .util import AstConstant
2524
from .astroid_compat import astroid_node_classes as nc, BaseContainer as AstroidBaseContainer
2625

2726
if TYPE_CHECKING:
@@ -177,13 +176,6 @@ def handle_comp(self, open_brace, node, first_token, last_token):
177176
util.expect_token(before, token.OP, open_brace)
178177
return (before, last_token)
179178

180-
# Python 3.8 fixed the starting position of list comprehensions:
181-
# https://bugs.python.org/issue31241
182-
if sys.version_info < (3, 8):
183-
def visit_listcomp(self, node, first_token, last_token):
184-
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
185-
return self.handle_comp('[', node, first_token, last_token)
186-
187179
def visit_comprehension(self,
188180
node, # type: AstNode
189181
first_token, # type: util.Token
@@ -296,26 +288,19 @@ def handle_bare_tuple(self, node, first_token, last_token):
296288
last_token = maybe_comma
297289
return (first_token, last_token)
298290

299-
if sys.version_info >= (3, 8):
300-
# In Python3.8 parsed tuples include parentheses when present.
301-
def handle_tuple_nonempty(self, node, first_token, last_token):
302-
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
303-
assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer)
304-
# It's a bare tuple if the first token belongs to the first child. The first child may
305-
# include extraneous parentheses (which don't create new nodes), so account for those too.
306-
child = node.elts[0]
307-
if TYPE_CHECKING:
308-
child = cast(AstNode, child)
309-
child_first, child_last = self._gobble_parens(child.first_token, child.last_token, True)
310-
if first_token == child_first:
311-
return self.handle_bare_tuple(node, first_token, last_token)
312-
return (first_token, last_token)
313-
else:
314-
# Before python 3.8, parsed tuples do not include parens.
315-
def handle_tuple_nonempty(self, node, first_token, last_token):
316-
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
317-
(first_token, last_token) = self.handle_bare_tuple(node, first_token, last_token)
318-
return self._gobble_parens(first_token, last_token, False)
291+
# In Python3.8 parsed tuples include parentheses when present.
292+
def handle_tuple_nonempty(self, node, first_token, last_token):
293+
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
294+
assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer)
295+
# It's a bare tuple if the first token belongs to the first child. The first child may
296+
# include extraneous parentheses (which don't create new nodes), so account for those too.
297+
child = node.elts[0]
298+
if TYPE_CHECKING:
299+
child = cast(AstNode, child)
300+
child_first, child_last = self._gobble_parens(child.first_token, child.last_token, True)
301+
if first_token == child_first:
302+
return self.handle_bare_tuple(node, first_token, last_token)
303+
return (first_token, last_token)
319304

320305
def visit_tuple(self, node, first_token, last_token):
321306
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
@@ -417,19 +402,15 @@ def visit_num(self, node, first_token, last_token):
417402
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
418403
return self.handle_num(node, cast(ast.Num, node).n, first_token, last_token)
419404

420-
# In Astroid, the Num and Str nodes are replaced by Const.
421405
def visit_const(self, node, first_token, last_token):
422406
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
423-
assert isinstance(node, AstConstant) or isinstance(node, nc.Const)
407+
assert isinstance(node, ast.Constant) or isinstance(node, nc.Const)
424408
if isinstance(node.value, numbers.Number):
425409
return self.handle_num(node, node.value, first_token, last_token)
426410
elif isinstance(node.value, (str, bytes)):
427411
return self.visit_str(node, first_token, last_token)
428412
return (first_token, last_token)
429413

430-
# In Python >= 3.6, there is a similar class 'Constant' for literals
431-
# In 3.8 it became the type produced by ast.parse
432-
# https://bugs.python.org/issue32892
433414
visit_constant = visit_const
434415

435416
def visit_keyword(self, node, first_token, last_token):

asttokens/util.py

Lines changed: 88 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,21 @@
2020
import tokenize
2121
from abc import ABCMeta
2222
from ast import Module, expr, AST
23-
from typing import Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union, cast, Any, TYPE_CHECKING
24-
25-
import astroid
26-
23+
from functools import lru_cache
24+
from typing import (
25+
Callable,
26+
Dict,
27+
Iterable,
28+
Iterator,
29+
List,
30+
Optional,
31+
Tuple,
32+
Union,
33+
cast,
34+
Any,
35+
TYPE_CHECKING,
36+
Type,
37+
)
2738

2839
if TYPE_CHECKING: # pragma: no cover
2940
from .astroid_compat import NodeNG
@@ -67,13 +78,6 @@ def __str__(self):
6778
return token_repr(self.type, self.string)
6879

6980

70-
if sys.version_info >= (3, 6):
71-
AstConstant = ast.Constant
72-
else:
73-
class AstConstant:
74-
value = object()
75-
76-
7781
def match_token(token, tok_type, tok_str=None):
7882
# type: (Token, int, Optional[str]) -> bool
7983
"""Returns true if token is of the given type and, if a string is given, has that string."""
@@ -91,22 +95,13 @@ def expect_token(token, tok_type, tok_str=None):
9195
token_repr(tok_type, tok_str), str(token),
9296
token.start[0], token.start[1] + 1))
9397

94-
# These were previously defined in tokenize.py and distinguishable by being greater than
95-
# token.N_TOKEN. As of python3.7, they are in token.py, and we check for them explicitly.
96-
if sys.version_info >= (3, 7):
97-
def is_non_coding_token(token_type):
98-
# type: (int) -> bool
99-
"""
100-
These are considered non-coding tokens, as they don't affect the syntax tree.
101-
"""
102-
return token_type in (token.NL, token.COMMENT, token.ENCODING)
103-
else:
104-
def is_non_coding_token(token_type):
105-
# type: (int) -> bool
106-
"""
107-
These are considered non-coding tokens, as they don't affect the syntax tree.
108-
"""
109-
return token_type >= token.N_TOKENS
98+
99+
def is_non_coding_token(token_type):
100+
# type: (int) -> bool
101+
"""
102+
These are considered non-coding tokens, as they don't affect the syntax tree.
103+
"""
104+
return token_type in (token.NL, token.COMMENT, token.ENCODING)
110105

111106

112107
def generate_tokens(text):
@@ -201,10 +196,19 @@ def is_expr_stmt(node):
201196
return node.__class__.__name__ == 'Expr'
202197

203198

199+
200+
CONSTANT_CLASSES: Tuple[Type, ...] = (ast.Constant,)
201+
try:
202+
from astroid import Const
203+
CONSTANT_CLASSES += (Const,)
204+
except ImportError: # pragma: no cover
205+
# astroid is not available
206+
pass
207+
204208
def is_constant(node):
205209
# type: (AstNode) -> bool
206210
"""Returns whether node is a Constant node."""
207-
return isinstance(node, (ast.Constant, astroid.Const))
211+
return isinstance(node, CONSTANT_CLASSES)
208212

209213

210214
def is_ellipsis(node):
@@ -421,72 +425,61 @@ def last_stmt(node):
421425
return node
422426

423427

424-
if sys.version_info[:2] >= (3, 8):
425-
from functools import lru_cache
426428

427-
@lru_cache(maxsize=None)
428-
def fstring_positions_work():
429-
# type: () -> bool
430-
"""
431-
The positions attached to nodes inside f-string FormattedValues have some bugs
432-
that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729.
433-
This checks for those bugs more concretely without relying on the Python version.
434-
Specifically this checks:
435-
- Values with a format spec or conversion
436-
- Repeated (i.e. identical-looking) expressions
437-
- f-strings implicitly concatenated over multiple lines.
438-
- Multiline, triple-quoted f-strings.
439-
"""
440-
source = """(
441-
f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
442-
f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
443-
f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}"
444-
f'''
445-
{s} {t}
446-
{u} {v}
447-
'''
448-
)"""
449-
tree = ast.parse(source)
450-
name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)]
451-
name_positions = [(node.lineno, node.col_offset) for node in name_nodes]
452-
positions_are_unique = len(set(name_positions)) == len(name_positions)
453-
correct_source_segments = all(
454-
ast.get_source_segment(source, node) == node.id
455-
for node in name_nodes
456-
)
457-
return positions_are_unique and correct_source_segments
429+
@lru_cache(maxsize=None)
430+
def fstring_positions_work():
431+
# type: () -> bool
432+
"""
433+
The positions attached to nodes inside f-string FormattedValues have some bugs
434+
that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729.
435+
This checks for those bugs more concretely without relying on the Python version.
436+
Specifically this checks:
437+
- Values with a format spec or conversion
438+
- Repeated (i.e. identical-looking) expressions
439+
- f-strings implicitly concatenated over multiple lines.
440+
- Multiline, triple-quoted f-strings.
441+
"""
442+
source = """(
443+
f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
444+
f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}"
445+
f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}"
446+
f'''
447+
{s} {t}
448+
{u} {v}
449+
'''
450+
)"""
451+
tree = ast.parse(source)
452+
name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)]
453+
name_positions = [(node.lineno, node.col_offset) for node in name_nodes]
454+
positions_are_unique = len(set(name_positions)) == len(name_positions)
455+
correct_source_segments = all(
456+
ast.get_source_segment(source, node) == node.id
457+
for node in name_nodes
458+
)
459+
return positions_are_unique and correct_source_segments
458460

459-
def annotate_fstring_nodes(tree):
460-
# type: (ast.AST) -> None
461-
"""
462-
Add a special attribute `_broken_positions` to nodes inside f-strings
463-
if the lineno/col_offset cannot be trusted.
464-
"""
465-
if sys.version_info >= (3, 12):
466-
# f-strings were weirdly implemented until https://peps.python.org/pep-0701/
467-
# In Python 3.12, inner nodes have sensible positions.
468-
return
469-
for joinedstr in walk(tree, include_joined_str=True):
470-
if not isinstance(joinedstr, ast.JoinedStr):
471-
continue
472-
for part in joinedstr.values:
473-
# The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird.
474-
setattr(part, '_broken_positions', True) # use setattr for mypy
475-
476-
if isinstance(part, ast.FormattedValue):
477-
if not fstring_positions_work():
478-
for child in walk(part.value):
479-
setattr(child, '_broken_positions', True)
480-
481-
if part.format_spec: # this is another JoinedStr
482-
# Again, the standard positions span the full f-string.
483-
setattr(part.format_spec, '_broken_positions', True)
484-
485-
else:
486-
def fstring_positions_work():
487-
# type: () -> bool
488-
return False
489-
490-
def annotate_fstring_nodes(_tree):
491-
# type: (ast.AST) -> None
492-
pass
461+
def annotate_fstring_nodes(tree):
462+
# type: (ast.AST) -> None
463+
"""
464+
Add a special attribute `_broken_positions` to nodes inside f-strings
465+
if the lineno/col_offset cannot be trusted.
466+
"""
467+
if sys.version_info >= (3, 12):
468+
# f-strings were weirdly implemented until https://peps.python.org/pep-0701/
469+
# In Python 3.12, inner nodes have sensible positions.
470+
return
471+
for joinedstr in walk(tree, include_joined_str=True):
472+
if not isinstance(joinedstr, ast.JoinedStr):
473+
continue
474+
for part in joinedstr.values:
475+
# The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird.
476+
setattr(part, '_broken_positions', True) # use setattr for mypy
477+
478+
if isinstance(part, ast.FormattedValue):
479+
if not fstring_positions_work():
480+
for child in walk(part.value):
481+
setattr(child, '_broken_positions', True)
482+
483+
if part.format_spec: # this is another JoinedStr
484+
# Again, the standard positions span the full f-string.
485+
setattr(part.format_spec, '_broken_positions', True)

0 commit comments

Comments
 (0)