@@ -13,12 +13,15 @@ enum TokenType {
13
13
BLOCK_INNER_DOC_MARKER ,
14
14
BLOCK_COMMENT_CONTENT ,
15
15
LINE_DOC_CONTENT ,
16
- FRONTMATTER ,
16
+ FRONTMATTER_START ,
17
+ FRONTMATTER_CONTENT ,
18
+ FRONTMATTER_END ,
17
19
ERROR_SENTINEL
18
20
};
19
21
20
22
typedef struct {
21
23
uint8_t opening_hash_count ;
24
+ uint8_t frontmatter_dashes ;
22
25
} Scanner ;
23
26
24
27
void * tree_sitter_rust_external_scanner_create () { return ts_calloc (1 , sizeof (Scanner )); }
@@ -28,15 +31,16 @@ void tree_sitter_rust_external_scanner_destroy(void *payload) { ts_free((Scanner
28
31
unsigned tree_sitter_rust_external_scanner_serialize (void * payload , char * buffer ) {
29
32
Scanner * scanner = (Scanner * )payload ;
30
33
buffer [0 ] = (char )scanner -> opening_hash_count ;
31
- return 1 ;
34
+ buffer [1 ] = (char )scanner -> frontmatter_dashes ;
35
+ return 2 ;
32
36
}
33
37
34
38
void tree_sitter_rust_external_scanner_deserialize (void * payload , const char * buffer , unsigned length ) {
35
39
Scanner * scanner = (Scanner * )payload ;
36
40
scanner -> opening_hash_count = 0 ;
37
- if (length == 1 ) {
38
- Scanner * scanner = (Scanner * )payload ;
41
+ if (length == 2 ) {
39
42
scanner -> opening_hash_count = buffer [0 ];
43
+ scanner -> frontmatter_dashes = buffer [1 ];
40
44
}
41
45
}
42
46
@@ -332,15 +336,42 @@ static inline bool process_block_comment(TSLexer *lexer, const bool *valid_symbo
332
336
return false;
333
337
}
334
338
335
- static inline bool process_frontmatter (TSLexer * lexer ) {
336
- uint8_t opening = 0 ;
339
+ static inline bool process_frontmatter_start (TSLexer * lexer , Scanner * scanner ) {
340
+ uint8_t amount = 0 ;
337
341
while (lexer -> lookahead == '-' ) {
338
- opening ++ ;
342
+ amount ++ ;
339
343
advance (lexer );
340
344
}
341
345
342
- if (opening < 3 ) {
346
+ if (amount < 3 ) {
343
347
return false;
348
+ } else {
349
+ scanner -> frontmatter_dashes = amount ;
350
+ lexer -> result_symbol = FRONTMATTER_START ;
351
+
352
+ // parse optional info string after the initial fence
353
+ while (lexer -> lookahead != '\n' && !lexer -> eof (lexer )) {
354
+ advance (lexer );
355
+ }
356
+ advance (lexer );
357
+
358
+ return true;
359
+ }
360
+ }
361
+
362
+ static inline bool process_frontmatter (TSLexer * lexer , Scanner * scanner ) {
363
+ // seperately parse empty frontmatter, as tree-sitter strips all whitespace,
364
+ // including newlines, so i can't rely on parsing only after a newline in this case.
365
+ lexer -> mark_end (lexer );
366
+ uint8_t amount = 0 ;
367
+ while (lexer -> lookahead == '-' && amount < scanner -> frontmatter_dashes ) {
368
+ amount ++ ;
369
+ advance (lexer );
370
+ }
371
+
372
+ if (amount == scanner -> frontmatter_dashes ) {
373
+ lexer -> result_symbol = FRONTMATTER_CONTENT ;
374
+ return true;
344
375
}
345
376
346
377
for (;;) {
@@ -349,16 +380,17 @@ static inline bool process_frontmatter(TSLexer *lexer) {
349
380
}
350
381
351
382
if (lexer -> lookahead == '\n' ) {
383
+ lexer -> mark_end (lexer );
352
384
advance (lexer );
353
385
354
386
uint8_t amount = 0 ;
355
- while (lexer -> lookahead == '-' && amount < opening ) {
387
+ while (lexer -> lookahead == '-' && amount < scanner -> frontmatter_dashes ) {
356
388
amount ++ ;
357
389
advance (lexer );
358
390
}
359
391
360
- if (amount == opening ) {
361
- lexer -> result_symbol = FRONTMATTER ;
392
+ if (amount == scanner -> frontmatter_dashes ) {
393
+ lexer -> result_symbol = FRONTMATTER_CONTENT ;
362
394
return true;
363
395
}
364
396
} else {
@@ -367,6 +399,16 @@ static inline bool process_frontmatter(TSLexer *lexer) {
367
399
}
368
400
}
369
401
402
+ static inline bool process_frontmatter_end (TSLexer * lexer , Scanner * scanner ) {
403
+ advance (lexer );
404
+ for (unsigned int amount = 0 ; amount < scanner -> frontmatter_dashes ; amount ++ ) {
405
+ advance (lexer );
406
+ }
407
+
408
+ lexer -> result_symbol = FRONTMATTER_END ;
409
+ return true;
410
+ }
411
+
370
412
bool tree_sitter_rust_external_scanner_scan (void * payload , TSLexer * lexer , const bool * valid_symbols ) {
371
413
// The documentation states that if the lexical analysis fails for some reason
372
414
// they will mark every state as valid and pass it to the external scanner
@@ -425,8 +467,16 @@ bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const
425
467
return process_float_literal (lexer );
426
468
}
427
469
428
- if (valid_symbols [FRONTMATTER ]) {
429
- return process_frontmatter (lexer );
470
+ if (valid_symbols [FRONTMATTER_START ]) {
471
+ return process_frontmatter_start (lexer , scanner );
472
+ }
473
+
474
+ if (valid_symbols [FRONTMATTER_CONTENT ]) {
475
+ return process_frontmatter (lexer , scanner );
476
+ }
477
+
478
+ if (valid_symbols [FRONTMATTER_END ]) {
479
+ return process_frontmatter_end (lexer , scanner );
430
480
}
431
481
432
482
return false;
0 commit comments