Skip to content

Commit 5bfae47

Browse files
committed
parse (frontmatter_content)
1 parent e3adf69 commit 5bfae47

File tree

2 files changed

+72
-14
lines changed

2 files changed

+72
-14
lines changed

grammar.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ module.exports = grammar({
7676
$._inner_block_doc_comment_marker,
7777
$._block_comment_content,
7878
$._line_doc_content,
79-
$.frontmatter,
79+
$._frontmatter_start,
80+
$.frontmatter_content,
81+
$._frontmatter_end,
8082
$._error_sentinel,
8183
],
8284

@@ -1649,6 +1651,12 @@ module.exports = grammar({
16491651

16501652
shebang: _ => /#![\r\f\t\v ]*([^\[\n].*)?\n/,
16511653

1654+
frontmatter: $ => seq(
1655+
$._frontmatter_start,
1656+
$.frontmatter_content,
1657+
$._frontmatter_end,
1658+
),
1659+
16521660
_reserved_identifier: $ => alias(choice(
16531661
'default',
16541662
'union',

src/scanner.c

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,15 @@ enum TokenType {
1313
BLOCK_INNER_DOC_MARKER,
1414
BLOCK_COMMENT_CONTENT,
1515
LINE_DOC_CONTENT,
16-
FRONTMATTER,
16+
FRONTMATTER_START,
17+
FRONTMATTER_CONTENT,
18+
FRONTMATTER_END,
1719
ERROR_SENTINEL
1820
};
1921

2022
typedef struct {
2123
uint8_t opening_hash_count;
24+
uint8_t frontmatter_dashes;
2225
} Scanner;
2326

2427
void *tree_sitter_rust_external_scanner_create() { return ts_calloc(1, sizeof(Scanner)); }
@@ -28,15 +31,16 @@ void tree_sitter_rust_external_scanner_destroy(void *payload) { ts_free((Scanner
2831
unsigned tree_sitter_rust_external_scanner_serialize(void *payload, char *buffer) {
2932
Scanner *scanner = (Scanner *)payload;
3033
buffer[0] = (char)scanner->opening_hash_count;
31-
return 1;
34+
buffer[1] = (char)scanner->frontmatter_dashes;
35+
return 2;
3236
}
3337

3438
void tree_sitter_rust_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
3539
Scanner *scanner = (Scanner *)payload;
3640
scanner->opening_hash_count = 0;
37-
if (length == 1) {
38-
Scanner *scanner = (Scanner *)payload;
41+
if (length == 2) {
3942
scanner->opening_hash_count = buffer[0];
43+
scanner->frontmatter_dashes = buffer[1];
4044
}
4145
}
4246

@@ -332,15 +336,42 @@ static inline bool process_block_comment(TSLexer *lexer, const bool *valid_symbo
332336
return false;
333337
}
334338

335-
static inline bool process_frontmatter(TSLexer *lexer) {
336-
uint8_t opening = 0;
339+
static inline bool process_frontmatter_start(TSLexer *lexer, Scanner *scanner) {
340+
uint8_t amount = 0;
337341
while (lexer->lookahead == '-') {
338-
opening++;
342+
amount++;
339343
advance(lexer);
340344
}
341345

342-
if (opening < 3) {
346+
if (amount < 3) {
343347
return false;
348+
} else {
349+
scanner->frontmatter_dashes = amount;
350+
lexer->result_symbol = FRONTMATTER_START;
351+
352+
// parse optional info string after the initial fence
353+
while (lexer->lookahead != '\n' && !lexer->eof(lexer)) {
354+
advance(lexer);
355+
}
356+
advance(lexer);
357+
358+
return true;
359+
}
360+
}
361+
362+
static inline bool process_frontmatter(TSLexer *lexer, Scanner *scanner) {
363+
// seperately parse empty frontmatter, as tree-sitter strips all whitespace,
364+
// including newlines, so i can't rely on parsing only after a newline in this case.
365+
lexer->mark_end(lexer);
366+
uint8_t amount = 0;
367+
while (lexer->lookahead == '-' && amount < scanner->frontmatter_dashes) {
368+
amount++;
369+
advance(lexer);
370+
}
371+
372+
if (amount == scanner->frontmatter_dashes) {
373+
lexer->result_symbol = FRONTMATTER_CONTENT;
374+
return true;
344375
}
345376

346377
for (;;) {
@@ -349,16 +380,17 @@ static inline bool process_frontmatter(TSLexer *lexer) {
349380
}
350381

351382
if (lexer->lookahead == '\n') {
383+
lexer->mark_end(lexer);
352384
advance(lexer);
353385

354386
uint8_t amount = 0;
355-
while (lexer->lookahead == '-' && amount < opening) {
387+
while (lexer->lookahead == '-' && amount < scanner->frontmatter_dashes) {
356388
amount++;
357389
advance(lexer);
358390
}
359391

360-
if (amount == opening) {
361-
lexer->result_symbol = FRONTMATTER;
392+
if (amount == scanner->frontmatter_dashes) {
393+
lexer->result_symbol = FRONTMATTER_CONTENT;
362394
return true;
363395
}
364396
} else {
@@ -367,6 +399,16 @@ static inline bool process_frontmatter(TSLexer *lexer) {
367399
}
368400
}
369401

402+
static inline bool process_frontmatter_end(TSLexer *lexer, Scanner *scanner) {
403+
advance(lexer);
404+
for (unsigned int amount = 0; amount < scanner->frontmatter_dashes; amount++) {
405+
advance(lexer);
406+
}
407+
408+
lexer->result_symbol = FRONTMATTER_END;
409+
return true;
410+
}
411+
370412
bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
371413
// The documentation states that if the lexical analysis fails for some reason
372414
// they will mark every state as valid and pass it to the external scanner
@@ -425,8 +467,16 @@ bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const
425467
return process_float_literal(lexer);
426468
}
427469

428-
if (valid_symbols[FRONTMATTER]) {
429-
return process_frontmatter(lexer);
470+
if (valid_symbols[FRONTMATTER_START]) {
471+
return process_frontmatter_start(lexer, scanner);
472+
}
473+
474+
if (valid_symbols[FRONTMATTER_CONTENT]) {
475+
return process_frontmatter(lexer, scanner);
476+
}
477+
478+
if (valid_symbols[FRONTMATTER_END]) {
479+
return process_frontmatter_end(lexer, scanner);
430480
}
431481

432482
return false;

0 commit comments

Comments
 (0)