Skip to content
This repository was archived by the owner on Jan 25, 2022. It is now read-only.

Add unicode escapes, allow non-ASCII identifiers & others improvements #136

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 38 additions & 17 deletions grammars/rust.cson
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,26 @@
}
'escaped_character': {
'name': 'constant.character.escape.rust'
'match': '\\\\(x[0-9A-Fa-f]{2}|[0-2][0-7]{0,2}|3[0-6][0-7]?|37[0-7]?|[4-7][0-7]?|.)'
'match': '\\\\([trn0\'\"\\\\]|x[0-9A-Fa-f]{2}|$)'
}
'unicode_escaped_character': {
'name': 'constant.character.escape.unicode.rust'
'match': '\\\\u\\{([0-9A-Fa-f]_*){1,6}\\}'
}
'string_literal': {
'comment': 'Double-quote string literal'
'name': 'string.quoted.double.rust'
'begin': 'b?"'
'begin': '"'
'end': '"'
'patterns': [
{ 'include': '#escaped_character' }
{ 'include': '#unicode_escaped_character' }
]
}
'byte_string_literal': {
'comment': 'Double-quote byte string literal'
'name': 'string.byte.quoted.double.rust'
'begin': 'b"'
'end': '"'
'patterns': [
{ 'include': '#escaped_character' }
Expand All @@ -59,7 +73,7 @@
'sigils': {
'comment': 'Sigil'
'name': 'keyword.operator.sigil.rust'
'match': '[&*](?=[a-zA-Z0-9_\\(\\[\\|\\"]+)'
'match': '[&*](?=[a-zA-Z0-9_\\(\\[\\|\\"\\x80-\\xFF]+)'
}
'self': {
'comment': 'Self variable'
Expand Down Expand Up @@ -109,14 +123,14 @@
'lifetime': {
'comment': 'Named lifetime'
'name': 'storage.modifier.lifetime.rust'
'match': '\'([a-zA-Z_][a-zA-Z0-9_]*)\\b'
'match': '\'([a-zA-Z_\\x80-\\xFF][a-zA-Z0-9_\\x80-\\xFF]*)\\b'
'captures': {
'1': { 'name': 'entity.name.lifetime.rust' }
}
}
'ref_lifetime': {
'comment': 'Reference with named lifetime'
'match': '&(\'([a-zA-Z_][a-zA-Z0-9_]*))\\b'
'match': '&(\'([a-zA-Z_\\x80-\\xFF][a-zA-Z0-9_\\x80-\\xFF]*))\\b'
'captures': {
'1': { 'name': 'storage.modifier.lifetime.rust' }
'2': { 'name': 'entity.name.lifetime.rust' }
Expand Down Expand Up @@ -155,7 +169,7 @@
'type': {
'comment': 'A type'
'name': 'entity.name.type.rust'
'match': '\\b([A-Za-z][_A-Za-z0-9]*|_[_A-Za-z0-9]+)\\b'
'match': '\\b([A-Za-z\\x80-\\xFF][_A-Za-z0-9\\x80-\\xFF]*|_[_A-Za-z0-9\\x80-\\xFF]+)\\b'
}
'type_params': {
'comment': 'Type parameters'
Expand Down Expand Up @@ -224,6 +238,7 @@
'end': '\\]'
'patterns': [
{ 'include': '#string_literal' }
{ 'include': '#byte_string_literal' }
{ 'include': '#block_doc_comment' }
{ 'include': '#block_comment' }
{ 'include': '#line_doc_comment' }
Expand All @@ -234,9 +249,15 @@
{
'comment': 'Single-quote string literal (character)'
'name': 'string.quoted.single.rust'
'match': 'b?\'([^\'\\\\]|\\\\(x[0-9A-Fa-f]{2}|[0-2][0-7]{0,2}|3[0-6][0-7]?|37[0-7]?|[4-7][0-7]?|.))\''
'match': '\'([^\'\\\\]|\\\\([trn0\'\"\\\\]|x[0-9A-Fa-f]{2}|u\\{([0-9A-Fa-f]_*){1,6}\\}))\''
}
{
'comment': 'Single-quote byte string literal (character)'
'name': 'string.byte.quoted.single.rust'
'match': 'b\'([^\'\\\\]|\\\\([trn0\'\"\\\\]|x[0-9A-Fa-f]{2}))\''
}
{ 'include': '#string_literal' }
{ 'include': '#byte_string_literal' }
{ 'include': '#raw_string_literal' }
# Numbers
{
Expand All @@ -257,22 +278,22 @@
{
'comment': 'Integer literal (decimal)'
'name': 'constant.numeric.integer.decimal.rust'
'match': '\\b[0-9][0-9_]*([ui](8|16|32|64|128|s|size))?\\b'
'match': '\\b[0-9][0-9_]*([ui](8|16|32|64|128|size))?\\b'
}
{
'comment': 'Integer literal (hexadecimal)'
'name': 'constant.numeric.integer.hexadecimal.rust'
'match': '\\b0x[a-fA-F0-9_]+([ui](8|16|32|64|128|s|size))?\\b'
'match': '\\b0x[a-fA-F0-9_]+([ui](8|16|32|64|128|size))?\\b'
}
{
'comment': 'Integer literal (octal)'
'name': 'constant.numeric.integer.octal.rust'
'match': '\\b0o[0-7_]+([ui](8|16|32|64|128|s|size))?\\b'
'match': '\\b0o[0-7_]+([ui](8|16|32|64|128|size))?\\b'
}
{
'comment': 'Integer literal (binary)'
'name': 'constant.numeric.integer.binary.rust'
'match': '\\b0b[01_]+([ui](8|16|32|64|128|s|size))?\\b'
'match': '\\b0b[01_]+([ui](8|16|32|64|128|size))?\\b'
}
# Language
{
Expand Down Expand Up @@ -369,21 +390,21 @@
# Function and macro calls
{
'comment': 'Invokation of a macro'
'match': '\\b([a-zA-Z_][a-zA-Z0-9_]*\\!)\\s*[({\\[]'
'match': '\\b([a-zA-Z_\\x80-\\xFF][a-zA-Z0-9_\\x80-\\xFF]*\\!)\\s*[({\\[]'
'captures': {
'1': { 'name': 'entity.name.function.macro.rust' }
}
}
{
'comment': 'Function call'
'match': '\\b([A-Za-z][A-Za-z0-9_]*|_[A-Za-z0-9_]+)\\s*\\('
'match': '\\b([A-Za-z\\x80-\\xFF][A-Za-z0-9_\\x80-\\xFF]*|_[A-Za-z0-9_\\x80-\\xFF]+)\\s*\\('
'captures': {
'1': { 'name': 'entity.name.function.rust' }
}
}
{
'comment': 'Function call with type parameters'
'begin': '\\b([A-Za-z][A-Za-z0-9_]*|_[A-Za-z0-9_]+)\\s*(::)(?=\\s*<.*>\\s*\\()'
'begin': '\\b([A-Za-z\\x80-\\xFF][A-Za-z0-9_\\x80-\\xFF]*|_[A-Za-z0-9_\\x80-\\xFF]+)\\s*(::)(?=\\s*<.*>\\s*\\()'
'end': '\\('
'captures': {
'1': { 'name': 'entity.name.function.rust' }
Expand All @@ -396,7 +417,7 @@
# Function definition
{
'comment': 'Function definition'
'begin': '\\b(fn)\\s+([A-Za-z][A-Za-z0-9_]*|_[A-Za-z0-9_]+)'
'begin': '\\b(fn)\\s+([A-Za-z\\x80-\\xFF][A-Za-z0-9_\\x80-\\xFF]*|_[A-Za-z0-9_\\x80-\\xFF]+)'
'end': '[\\{;]'
'beginCaptures': {
'1': { 'name': 'keyword.other.fn.rust' }
Expand Down Expand Up @@ -430,7 +451,7 @@
# Type declaration
{
'comment': 'Type declaration'
'begin': '\\b(enum|struct|trait|union)\\s+([a-zA-Z_][a-zA-Z0-9_]*)'
'begin': '\\b(enum|struct|trait|union)\\s+([a-zA-Z_\\x80-\\xFF][a-zA-Z0-9_\\x80-\\xFF]*)'
'end': '[\\{\\(;]'
'beginCaptures': {
'1': { 'name': 'storage.type.rust' }
Expand All @@ -450,7 +471,7 @@
# Type alias
{
'comment': 'Type alias'
'begin': '\\b(type)\\s+([a-zA-Z_][a-zA-Z0-9_]*)'
'begin': '\\b(type)\\s+([a-zA-Z_\\x80-\\xFF][a-zA-Z0-9_\\x80-\\xFF]*)'
'end': ';'
'beginCaptures': {
'1': { 'name': 'storage.type.rust' }
Expand Down
18 changes: 15 additions & 3 deletions spec/rust-spec.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ describe 'Rust grammar', ->
it 'tokenizes byte strings', ->
{tokens} = grammar.tokenizeLine('text b"This is a bytestring" text')
expect(tokens[0]).toEqual value: 'text ', scopes: ['source.rust']
expect(tokens[2]).toEqual value: 'This is a bytestring', scopes: ['source.rust', 'string.quoted.double.rust']
expect(tokens[2]).toEqual value: 'This is a bytestring', scopes: ['source.rust', 'string.byte.quoted.double.rust']
expect(tokens[4]).toEqual value: ' text', scopes: ['source.rust']

it 'tokenizes raw byte strings', ->
Expand Down Expand Up @@ -170,13 +170,13 @@ describe 'Rust grammar', ->
it 'tokenizes bytes character', ->
{tokens} = grammar.tokenizeLine('text b\'b\' text')
expect(tokens[0]).toEqual value: 'text ', scopes: ['source.rust']
expect(tokens[1]).toEqual value: 'b\'b\'', scopes: ['source.rust', 'string.quoted.single.rust']
expect(tokens[1]).toEqual value: 'b\'b\'', scopes: ['source.rust', 'string.byte.quoted.single.rust']
expect(tokens[2]).toEqual value: ' text', scopes: ['source.rust']

it 'tokenizes escaped bytes characters', ->
{tokens} = grammar.tokenizeLine('text b\'\\x20\' text')
expect(tokens[0]).toEqual value: 'text ', scopes: ['source.rust']
expect(tokens[1]).toEqual value: 'b\'\\x20\'', scopes: ['source.rust', 'string.quoted.single.rust']
expect(tokens[1]).toEqual value: 'b\'\\x20\'', scopes: ['source.rust', 'string.byte.quoted.single.rust']
expect(tokens[2]).toEqual value: ' text', scopes: ['source.rust']

#
Expand Down Expand Up @@ -764,6 +764,18 @@ describe 'Rust grammar', ->
expect(tokens[0][13]).toEqual value: "'", scopes: ['source.rust', 'meta.type_params.rust', 'storage.modifier.lifetime.rust']
expect(tokens[0][14]).toEqual value: 'a', scopes: ['source.rust', 'meta.type_params.rust', 'storage.modifier.lifetime.rust', 'entity.name.lifetime.rust']

#
# non-ASCII identifiers
#

it 'tokenizes non-ASCII identifiers', ->
tokens = grammar.tokenizeLines("*Ωµó\n'hellóñαωΑΩµo\nhellóñαωΑΩµo!();\nhellóñαωΑΩµo();\ntype hellóñαωΑΩµo;")
expect(tokens[0][0]).toEqual value: '*', scopes: ['source.rust', 'keyword.operator.sigil.rust']
expect(tokens[1][1]).toEqual value: 'hellóñαωΑΩµo', scopes: ['source.rust', 'storage.modifier.lifetime.rust', 'entity.name.lifetime.rust']
expect(tokens[2][0]).toEqual value: 'hellóñαωΑΩµo!', scopes: ['source.rust', 'entity.name.function.macro.rust']
expect(tokens[3][0]).toEqual value: 'hellóñαωΑΩµo', scopes: ['source.rust', 'entity.name.function.rust']
expect(tokens[4][2]).toEqual value: 'hellóñαωΑΩµo', scopes: ['source.rust', 'entity.name.type.rust']

#
# impl type modifier
#
Expand Down