Skip to content

Commit 5d8c317

Browse files
committed
IdentifierFinder: now a Protocol
1 parent 6e2f014 commit 5d8c317

File tree

7 files changed

+87
-59
lines changed

7 files changed

+87
-59
lines changed

src/cedarscript_editor/cedarscript_editor.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
from cedarscript_ast_parser.cedarscript_ast_parser import MarkerCompatible, RelativeMarker, \
1212
RelativePositionType, Region, SingleFileClause
1313
from text_manipulation import (
14-
IndentationInfo, IdentifierBoundaries, RangeSpec, read_file, write_file, bow_to_search_range
14+
IndentationInfo, IdentifierBoundaries, RangeSpec, read_file, write_file, bow_to_search_range, IdentifierFinder
1515
)
1616

17-
from .tree_sitter_identifier_finder import IdentifierFinder
17+
from .tree_sitter_identifier_finder import TreeSitterIdentifierFinder
1818

1919

2020
class CEDARScriptEditorException(Exception):
@@ -109,7 +109,7 @@ def _update_command(self, cmd: UpdateCommand):
109109
src = read_file(file_path)
110110
lines = src.splitlines()
111111

112-
identifier_finder = IdentifierFinder(file_path, src, RangeSpec.EMPTY)
112+
identifier_finder = TreeSitterIdentifierFinder(file_path, src, RangeSpec.EMPTY)
113113

114114
search_range = RangeSpec.EMPTY
115115
move_src_range = None
@@ -247,7 +247,7 @@ def _create_command(self, cmd: CreateCommand) -> str:
247247

248248
def find_index_range_for_region(region: BodyOrWhole | Marker | Segment | RelativeMarker,
249249
lines: Sequence[str],
250-
identifier_finder: IdentifierFinder,
250+
identifier_finder_IS_IT_USED: IdentifierFinder,
251251
search_range: RangeSpec | IdentifierBoundaries | None = None,
252252
) -> RangeSpec:
253253
# BodyOrWhole | RelativeMarker | MarkerOrSegment
@@ -267,7 +267,7 @@ def find_index_range_for_region(region: BodyOrWhole | Marker | Segment | Relativ
267267
pass
268268
case _:
269269
# TODO transform to RangeSpec
270-
mos = IdentifierFinder("TODO?.py", lines, RangeSpec.EMPTY)(mos, search_range).body
270+
mos = TreeSitterIdentifierFinder("TODO?.py", lines, RangeSpec.EMPTY)(mos, search_range).body
271271
index_range = mos.to_search_range(
272272
lines,
273273
search_range.start if search_range else 0,

src/cedarscript_editor/tree_sitter_identifier_finder.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from grep_ast import filename_to_lang
88
from text_manipulation.indentation_kit import get_line_indent_count
99
from text_manipulation.range_spec import IdentifierBoundaries, RangeSpec, ParentInfo, ParentRestriction
10+
from text_manipulation import IdentifierFinder
1011
from tree_sitter_languages import get_language, get_parser
1112

1213
from .tree_sitter_identifier_queries import LANG_TO_TREE_SITTER_QUERY
@@ -20,7 +21,7 @@
2021
_log = logging.getLogger(__name__)
2122

2223

23-
class IdentifierFinder:
24+
class TreeSitterIdentifierFinder(IdentifierFinder):
2425
"""Finds identifiers in source code based on markers and parent restrictions.
2526
2627
Attributes:
@@ -44,11 +45,11 @@ def __init__(self, fname: str, source: str | Sequence[str], parent_restriction:
4445
if langstr is None:
4546
self.language = None
4647
self.query_info = None
47-
_log.info(f"[IdentifierFinder] NO LANGUAGE for `{fname}`")
48+
_log.info(f"[TreeSitterIdentifierFinder] NO LANGUAGE for `{fname}`")
4849
return
4950
self.query_info: dict[str, dict[str, str]] = LANG_TO_TREE_SITTER_QUERY[langstr]
5051
self.language = get_language(langstr)
51-
_log.info(f"[IdentifierFinder] Selected {self.language}")
52+
_log.info(f"[TreeSitterIdentifierFinder] Selected {self.language}")
5253
self.tree = get_parser(langstr).parse(bytes(source, "utf-8"))
5354

5455
def __call__(

src/text_manipulation/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1-
from .indentation_kit import IndentationInfo
2-
from .range_spec import IdentifierBoundaries, RangeSpec
1+
from .line_kit import get_line_indent_count, extract_indentation
2+
from .range_spec import RangeSpec, IdentifierBoundaries
33
from .text_editor_kit import read_file, write_file, bow_to_search_range
4+
from .cst_kit import IdentifierFinder
5+
from .indentation_kit import IndentationInfo
46

57
__all__ = [
68
"IndentationInfo",
79
"IdentifierBoundaries",
10+
"IdentifierFinder",
811
"RangeSpec",
912
"read_file",
1013
"write_file",

src/text_manipulation/cst_kit.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from typing import runtime_checkable, Protocol, Sequence
2+
3+
from cedarscript_ast_parser import Marker, Segment, RelativeMarker, RelativePositionType, MarkerType, BodyOrWhole
4+
5+
from .range_spec import IdentifierBoundaries, RangeSpec, ParentRestriction
6+
from .text_editor_kit import read_file, write_file, bow_to_search_range
7+
8+
9+
@runtime_checkable
10+
class IdentifierFinder(Protocol):
11+
"""Protocol for finding identifiers in source code."""
12+
13+
def __call__(
14+
self, mos: Marker | Segment, parent_restriction: ParentRestriction = None
15+
) -> IdentifierBoundaries | RangeSpec | None:
16+
"""Find identifier boundaries for a given marker or segment."""
17+
pass

src/text_manipulation/indentation_kit.py

Lines changed: 8 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -19,53 +19,9 @@
1919
from math import gcd
2020
from typing import NamedTuple
2121

22+
from .cst_kit import IdentifierFinder
2223

23-
def get_line_indent_count_from_lines(lines: Sequence[str], index: int) -> int:
24-
return get_line_indent_count(lines[index])
25-
26-
27-
def get_line_indent_count(line: str) -> int:
28-
"""
29-
Count the number of leading whitespace characters in a line.
30-
31-
Args:
32-
line (str): The input line to analyze.
33-
34-
Returns:
35-
int: The number of leading whitespace characters.
36-
37-
Example:
38-
>>> get_line_indent_count(" Hello")
39-
4
40-
>>> get_line_indent_count("\t\tWorld")
41-
2
42-
"""
43-
return len(line) - len(line.lstrip())
44-
45-
46-
def extract_indentation(line: str) -> str:
47-
"""
48-
Extract the leading whitespace from a given line.
49-
50-
This function identifies and returns the leading whitespace characters
51-
(spaces or tabs) from the beginning of the input line.
52-
53-
Args:
54-
line (str): The input line to process.
55-
56-
Returns:
57-
str: The leading whitespace of the line.
58-
59-
Examples:
60-
>>> extract_indentation(" Hello")
61-
' '
62-
>>> extract_indentation("\t\tWorld")
63-
'\t\t'
64-
>>> extract_indentation("No indentation")
65-
''
66-
"""
67-
return line[:len(line) - len(line.lstrip())]
68-
24+
from .line_kit import get_line_indent_count, extract_indentation
6925

7026
class IndentationInfo(NamedTuple):
7127
"""
@@ -117,7 +73,8 @@ def default(cls) -> 'IndentationInfo':
11773
@classmethod
11874
def shift_indentation(cls,
11975
content: Sequence[str], target_lines: Sequence[str], target_reference_indentation_count: int,
120-
relindent_level: int | None
76+
relindent_level: int | None = None,
77+
identifier_finder: IdentifierFinder | None = None
12178
) -> list[str]:
12279
"""
12380
Returns 'content' with shifted indentation based on a relative indent level and a reference indentation count.
@@ -165,7 +122,10 @@ def _shift_indentation(
165122
return [raw_line_adjuster(line) for line in content]
166123

167124
@classmethod
168-
def from_content(cls, content: str | Sequence[str]) -> 'IndentationInfo':
125+
def from_content(
126+
cls, content: str | Sequence[str],
127+
identifier_finder: IdentifierFinder | None = None
128+
) -> 'IndentationInfo':
169129
"""
170130
Analyzes the indentation in the given content and creates an IndentationInfo instance.
171131

src/text_manipulation/line_kit.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from typing import Sequence
2+
3+
from collections.abc import Sequence
4+
5+
def get_line_indent_count_from_lines(lines: Sequence[str], index: int) -> int:
6+
return get_line_indent_count(lines[index])
7+
8+
def get_line_indent_count(line: str) -> int:
9+
"""
10+
Count the number of leading whitespace characters in a line.
11+
12+
Args:
13+
line (str): The input line to analyze.
14+
15+
Returns:
16+
int: The number of leading whitespace characters.
17+
18+
Example:
19+
>>> get_line_indent_count(" Hello")
20+
4
21+
>>> get_line_indent_count("\t\tWorld")
22+
2
23+
"""
24+
return len(line) - len(line.lstrip())
25+
26+
def extract_indentation(line: str) -> str:
27+
"""
28+
Extract the leading whitespace from a given line.
29+
30+
This function identifies and returns the leading whitespace characters
31+
(spaces or tabs) from the beginning of the input line.
32+
33+
Args:
34+
line (str): The input line to process.
35+
36+
Returns:
37+
str: The leading whitespace of the line.
38+
39+
Examples:
40+
>>> extract_indentation(" Hello")
41+
' '
42+
>>> extract_indentation("\t\tWorld")
43+
'\t\t'
44+
>>> extract_indentation("No indentation")
45+
''
46+
"""
47+
return line[:len(line) - len(line.lstrip())]

src/text_manipulation/range_spec.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
from cedarscript_ast_parser import Marker, RelativeMarker, RelativePositionType, MarkerType, BodyOrWhole
1919

20-
from .indentation_kit import get_line_indent_count_from_lines
20+
from .line_kit import get_line_indent_count_from_lines
2121

2222
MATCH_TYPES = ('exact', 'stripped', 'normalized', 'partial')
2323

0 commit comments

Comments
 (0)