Skip to content

Commit 6a15239

Browse files
committed
Lexer: isTag: allow whole tag (except first char) to be any non-space (#3957)
Instead of walking the string and stopping at the first non-isIdentifier() character (see Lexer::isIdentifierStart(), isIdentifierNext() and isSingleCharOperator()), walk all the way to the end of the word. This allows even punctuation and other characters to be used in tags. We still need to use isIdentifierStart() for the first character, to disambiguate it from a negative number or subtraction. Apparently there is no command context available, so the parser cannot "know" whether it's doing a "task calc" and have different parse rules. The lexing seems to happen before breaking the arguments down into commands for dispatch.
1 parent 9e2dd40 commit 6a15239

File tree

2 files changed

+33
-9
lines changed

2 files changed

+33
-9
lines changed

src/Lexer.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,7 @@ bool Lexer::isSet(std::string& token, Lexer::Type& type) {
708708
////////////////////////////////////////////////////////////////////////////////
709709
// Lexer::Type::tag
710710
// ^ | '(' | ')' | <unicodeWhitespace>
711-
// [ +|- ] <isIdentifierStart> [ <isIdentifierNext> ]*
711+
// [ +|- ] <isIdentifierStart> [ <word> ]
712712
bool Lexer::isTag(std::string& token, Lexer::Type& type) {
713713
std::size_t marker = _cursor;
714714

@@ -721,14 +721,12 @@ bool Lexer::isTag(std::string& token, Lexer::Type& type) {
721721
++marker;
722722

723723
if (isIdentifierStart(_text[marker])) {
724-
utf8_next_char(_text, marker);
725-
726-
while (isIdentifierNext(_text[marker])) utf8_next_char(_text, marker);
727-
728-
token = _text.substr(_cursor, marker - _cursor);
729-
type = Lexer::Type::tag;
730-
_cursor = marker;
731-
return true;
724+
if (readWord(_text, marker, token)) {
725+
token = _text.substr(_cursor, marker - _cursor);
726+
type = Lexer::Type::tag;
727+
_cursor = marker;
728+
return true;
729+
}
732730
}
733731
}
734732

test/tag.test.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,32 @@ def test_tag_filter_partial_match(self):
594594
self.assertNotIn("three", out)
595595
self.assertIn("four", out)
596596

597+
class TestIssue3957(TestCase):
598+
def setUp(self):
599+
"""Executed before each test in the class"""
600+
self.t = Task()
601+
602+
def test_tag_dashes_and_utf8_glyphs(self):
603+
"""
604+
- All non-whitespace valid if not first char
605+
- Single char tags work
606+
- UTF-8 can work as first/only chars too
607+
"""
608+
crabglyph = "\U0001f980"
609+
testtags = ['', 'this-test', 'that-test.foo', f"foo-{crabglyph}"]
610+
611+
self.t(f"add{' +'.join(testtags)} one")
612+
code, out, err = self.t("_get 1.tags")
613+
self.assertEqual(sorted(testtags[1:]), sorted(out.strip().split(",")))
614+
615+
self.t("add +x two")
616+
code, out, err = self.t("_get 2.tags")
617+
self.assertEqual("x\n", out)
618+
619+
self.t(f"add +{crabglyph} three")
620+
code, out, err = self.t("_get 3.tags")
621+
self.assertEqual(f"{crabglyph}\n", out)
622+
597623

598624
if __name__ == "__main__":
599625
from simpletap import TAPTestRunner

0 commit comments

Comments
 (0)