From 7ee5e70ee7745d5f62713466c719b5d93b3bbfbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tam=C3=A1s=20T=C3=B3th?= Date: Fri, 5 Apr 2024 20:31:22 +0200 Subject: [PATCH] Only allow letters as first character in `_modname` (#1067) Related: * runtimeverification/k#4158 --------- Co-authored-by: devops --- docs/conf.py | 4 +-- package/version | 2 +- pyproject.toml | 2 +- src/pyk/kast/outer_lexer.py | 34 +++++++++++++++++-------- src/tests/unit/kast/test_outer_lexer.py | 4 +-- 5 files changed, 29 insertions(+), 17 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index a97a46368..1c3d4eb3e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -9,8 +9,8 @@ project = 'pyk' author = 'Runtime Verification, Inc' copyright = '2024, Runtime Verification, Inc' -version = '0.1.774' -release = '0.1.774' +version = '0.1.775' +release = '0.1.775' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/package/version b/package/version index 1213e6286..75dd97f91 100644 --- a/package/version +++ b/package/version @@ -1 +1 @@ -0.1.774 +0.1.775 diff --git a/pyproject.toml b/pyproject.toml index aa73eeeb9..f59ff3b72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "pyk" -version = "0.1.774" +version = "0.1.775" description = "" authors = [ "Runtime Verification, Inc. ", diff --git a/src/pyk/kast/outer_lexer.py b/src/pyk/kast/outer_lexer.py index ce33539e4..015d9de04 100644 --- a/src/pyk/kast/outer_lexer.py +++ b/src/pyk/kast/outer_lexer.py @@ -113,8 +113,9 @@ class Token(NamedTuple): _DIGIT: Final = set('0123456789') _LOWER: Final = set('abcdefghijklmnopqrstuvwxyz') _UPPER: Final = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ') -_ALPHA: Final = set().union(_LOWER).union(_UPPER) -_ALNUM: Final = set(_ALPHA).union(_DIGIT) +_ALPHA: Final = _LOWER.union(_UPPER) +_ALNUM: Final = _ALPHA.union(_DIGIT) +_WORD: Final = {'_'}.union(_ALNUM) class State(Enum): @@ -456,29 +457,42 @@ def _hash_upper_id(la: str, it: Iterator[str]) -> tuple[Token, str]: _MODNAME_KEYWORDS: Final = {'private', 'public'} -_MODNAME_CHARS: Final = {'-', '_'}.union(_ALNUM) def _modname(la: str, it: Iterator) -> tuple[Token, str]: + r"""[a-zA-Z]\w*(-\w+)*""" + la = _skip_ws_and_comments(la, it) consumed = [] - if la == '#': + if la not in _ALPHA: + raise _unexpected_character(la) + + consumed.append(la) + la = next(it, '') + + while la in _WORD: consumed.append(la) la = next(it, '') - if not la: - raise _unexpected_character(la) + while True: + if la != '-': + break + + consumed.append(la) + la = next(it, '') - allow_dash = False - while la in _MODNAME_CHARS: - if la == '-' and not allow_dash: + if la not in _WORD: raise _unexpected_character(la) - allow_dash = la != '-' + consumed.append(la) la = next(it, '') + while la in _WORD: + consumed.append(la) + la = next(it, '') + text = ''.join(consumed) if text in _MODNAME_KEYWORDS: return _KEYWORDS[text], la diff --git a/src/tests/unit/kast/test_outer_lexer.py b/src/tests/unit/kast/test_outer_lexer.py index 9550bec55..923fd8c54 100644 --- a/src/tests/unit/kast/test_outer_lexer.py +++ b/src/tests/unit/kast/test_outer_lexer.py @@ -304,10 +304,8 @@ def test_default(text: str, expected_token: Token, expected_remaining: str) -> N ('private MODULE', Token('private', TokenType.KW_PRIVATE), ' MODULE'), ('public', Token('public', TokenType.KW_PUBLIC), ''), ('module', Token('module', TokenType.MODNAME), ''), - ('module ', Token('module', TokenType.MODNAME), ' '), ('MODULE', Token('MODULE', TokenType.MODNAME), ''), - ('#module', Token('#module', TokenType.MODNAME), ''), - ('#module#module', Token('#module', TokenType.MODNAME), '#module'), + ('module#module', Token('module', TokenType.MODNAME), '#module'), ('mo-du-le', Token('mo-du-le', TokenType.MODNAME), ''), ('m0-DU_l3', Token('m0-DU_l3', TokenType.MODNAME), ''), ('TEST-MODULE', Token('TEST-MODULE', TokenType.MODNAME), ''),