@@ -272,44 +272,11 @@ def build_verbatim margin
272
272
end
273
273
274
274
case type
275
- when :HEADER then
276
- line << '=' * data
277
- _ , _ , peek_column , = peek_token
278
- peek_column ||= column + data
279
- indent = peek_column - column - data
280
- line << ' ' * indent
281
- when :RULE then
282
- width = 2 + data
283
- line << '-' * width
284
- _ , _ , peek_column , = peek_token
285
- peek_column ||= column + width
286
- indent = peek_column - column - width
287
- line << ' ' * indent
288
275
when :BREAK , :TEXT then
289
276
line << data
290
- when :BLOCKQUOTE then
291
- line << '>>>'
292
- peek_type , _ , peek_column = peek_token
293
- if peek_type != :NEWLINE and peek_column
294
- line << ' ' * ( peek_column - column - 3 )
295
- end
296
- else # *LIST_TOKENS
297
- list_marker = case type
298
- when :BULLET then data
299
- when :LABEL then "[#{ data } ]"
300
- when :NOTE then "#{ data } ::"
301
- else # :LALPHA, :NUMBER, :UALPHA
302
- "#{ data } ."
303
- end
304
- line << list_marker
305
- peek_type , _ , peek_column = peek_token
306
- unless peek_type == :NEWLINE then
307
- peek_column ||= column + list_marker . length
308
- indent = peek_column - column - list_marker . length
309
- line << ' ' * indent
310
- end
277
+ else
278
+ raise TypeError , "unexpected token under verbatim: #{ type } "
311
279
end
312
-
313
280
end
314
281
315
282
verbatim << line << "\n " unless line . empty?
@@ -481,11 +448,37 @@ def skip token_type, error = true
481
448
##
482
449
# Turns text +input+ into a stream of tokens
483
450
484
- def tokenize input
451
+ def tokenize ( input )
485
452
setup_scanner input
453
+ margin = @s . pos [ 0 ]
454
+ tokenize_indented ( margin )
455
+ tokenize_input ( margin )
456
+ end
457
+
458
+ def newline! ( pos = nil )
459
+ if pos or ( @s . scan ( / *(?=\r ?\n )/ ) and pos = @s . pos and @s . scan ( /\r ?\n / ) )
460
+ @tokens << [ :NEWLINE , @s . matched , *pos ]
461
+ @s . newline!
462
+ end
463
+ end
486
464
487
- until @s . eos? do
465
+ def tokenize_indented ( column )
466
+ indent = / {#{ column +1 } ,}(?=\S )| *(?=\r ?\n )/
467
+ while @s . scan ( indent )
488
468
pos = @s . pos
469
+ if @s . scan ( /(.+)(?=\r ?\n )?/ )
470
+ @tokens << [ :TEXT , @s . matched , *pos ]
471
+ end
472
+ newline! or break
473
+ end
474
+ end
475
+
476
+ def tokenize_input ( margin )
477
+ column = 0
478
+
479
+ until @s . eos?
480
+ pos = @s . pos
481
+ break if pos [ 0 ] < ( margin ||= pos [ 0 ] )
489
482
490
483
# leading spaces will be reflected by the column of the next token
491
484
# the only thing we loose are trailing spaces at the end of the file
@@ -494,75 +487,84 @@ def tokenize input
494
487
# note: after BULLET, LABEL, etc.,
495
488
# indent will be the column of the next non-newline token
496
489
497
- @tokens << case
498
- # [CR]LF => :NEWLINE
499
- when @s . scan ( /\r ?\n / ) then
500
- token = [ :NEWLINE , @s . matched , *pos ]
501
- @s . newline!
502
- token
503
- # === text => :HEADER then :TEXT
504
- when @s . scan ( /(=+)(\s *)/ ) then
505
- level = @s [ 1 ] . length
506
- header = [ :HEADER , level , *pos ]
507
-
508
- if @s [ 2 ] =~ /^\r ?\n / then
509
- @s . unscan ( @s [ 2 ] )
510
- header
511
- else
512
- pos = @s . pos
513
- @s . scan ( /.*/ )
514
- @tokens << header
515
- [ :TEXT , @s . matched . sub ( /\r $/ , '' ) , *pos ]
516
- end
517
- # --- (at least 3) and nothing else on the line => :RULE
518
- when @s . scan ( /(-{3,}) *\r ?$/ ) then
519
- [ :RULE , @s [ 1 ] . length - 2 , *pos ]
520
- # * or - followed by white space and text => :BULLET
521
- when @s . scan ( /([*-]) +(\S )/ ) then
522
- @s . unscan ( @s [ 2 ] )
523
- [ :BULLET , @s [ 1 ] , *pos ]
524
- # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
525
- when @s . scan ( /([a-z]|\d +)\. +(\S )/i ) then
526
- # FIXME if tab(s), the column will be wrong
527
- # either support tabs everywhere by first expanding them to
528
- # spaces, or assume that they will have been replaced
529
- # before (and provide a check for that at least in debug
530
- # mode)
531
- list_label = @s [ 1 ]
532
- @s . unscan ( @s [ 2 ] )
533
- list_type =
534
- case list_label
535
- when /[a-z]/ then :LALPHA
536
- when /[A-Z]/ then :UALPHA
537
- when /\d / then :NUMBER
538
- else
539
- raise ParseError , "BUG token #{ list_label } "
540
- end
541
- [ list_type , list_label , *pos ]
542
- # [text] followed by spaces or end of line => :LABEL
543
- when @s . scan ( /\[ (.*?)\] ( +|\r ?$)/ ) then
544
- [ :LABEL , @s [ 1 ] , *pos ]
545
- # text:: followed by spaces or end of line => :NOTE
546
- when @s . scan ( /(.*?)::( +|\r ?$)/ ) then
547
- [ :NOTE , @s [ 1 ] , *pos ]
548
- # >>> followed by end of line => :BLOCKQUOTE
549
- when @s . scan ( />>> *(\w +)?$/ ) then
550
- if word = @s [ 1 ]
551
- @s . unscan ( word )
552
- end
553
- [ :BLOCKQUOTE , word , *pos ]
554
- # anything else: :TEXT
555
- else
556
- @s . scan ( /(.*?)( )?\r ?$/ )
557
- token = [ :TEXT , @s [ 1 ] , *pos ]
558
-
559
- if @s [ 2 ] then
560
- @tokens << token
561
- [ :BREAK , @s [ 2 ] , pos [ 0 ] + @s [ 1 ] . length , pos [ 1 ] ]
562
- else
563
- token
564
- end
565
- end
490
+ case
491
+ # [CR]LF => :NEWLINE
492
+ when @s . scan ( /\r ?\n / )
493
+ newline! ( pos )
494
+ next
495
+
496
+ # === text => :HEADER then :TEXT
497
+ when @s . scan ( /(=+)(\s *)/ )
498
+ level = @s [ 1 ] . length
499
+ header = [ :HEADER , level , *pos ]
500
+
501
+ if @s [ 2 ] =~ /^\r ?\n /
502
+ @s . unscan ( @s [ 2 ] )
503
+ @tokens << header
504
+ else
505
+ pos = @s . pos
506
+ @s . scan ( /.*/ )
507
+ @tokens << header
508
+ @tokens << [ :TEXT , @s . matched . sub ( /\r $/ , '' ) , *pos ]
509
+ end
510
+
511
+ # --- (at least 3) and nothing else on the line => :RULE
512
+ when @s . scan ( /(-{3,}) *\r ?$/ )
513
+ @tokens << [ :RULE , @s [ 1 ] . length - 2 , *pos ]
514
+
515
+ # * or - followed by white space and text => :BULLET
516
+ when @s . scan ( /([*-]) +(?=\S )/ )
517
+ @tokens << [ :BULLET , @s [ 1 ] , *pos ]
518
+ tokenize_input ( nil )
519
+
520
+ # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
521
+ when @s . scan ( /([a-z]|\d +)\. +(?=\S )/i )
522
+ # FIXME if tab(s), the column will be wrong
523
+ # either support tabs everywhere by first expanding them to
524
+ # spaces, or assume that they will have been replaced
525
+ # before (and provide a check for that at least in debug
526
+ # mode)
527
+ list_label = @s [ 1 ]
528
+ list_type =
529
+ case list_label
530
+ when /[a-z]/ then :LALPHA
531
+ when /[A-Z]/ then :UALPHA
532
+ when /\d / then :NUMBER
533
+ else
534
+ raise ParseError , "BUG token #{ list_label } "
535
+ end
536
+ @tokens << [ list_type , list_label , *pos ]
537
+ tokenize_input ( nil )
538
+
539
+ # [text] followed by spaces or end of line => :LABEL
540
+ when @s . scan ( /\[ (.*?)\] ( +|\r ?$)/ )
541
+ @tokens << [ :LABEL , @s [ 1 ] , *pos ]
542
+ tokenize_input ( nil )
543
+
544
+ # text:: followed by spaces or end of line => :NOTE
545
+ when @s . scan ( /(.*?)::( +|\r ?$)/ )
546
+ @tokens << [ :NOTE , @s [ 1 ] , *pos ]
547
+ tokenize_input ( nil )
548
+
549
+ # >>> followed by end of line => :BLOCKQUOTE
550
+ when @s . scan ( />>> *(\w +)?\r ?$/ )
551
+ @tokens << [ :BLOCKQUOTE , @s [ 1 ] , *pos ]
552
+ newline!
553
+ tokenize_input ( nil )
554
+
555
+ # anything else: :TEXT
556
+ else
557
+ column = pos [ 0 ]
558
+ @s . scan ( /(.*?)( )?\r ?$/ )
559
+ @tokens << [ :TEXT , @s [ 1 ] , *pos ]
560
+
561
+ if @s [ 2 ]
562
+ @tokens << [ :BREAK , @s [ 2 ] , pos [ 0 ] + @s [ 1 ] . length , pos [ 1 ] ]
563
+ end
564
+ if newline!
565
+ tokenize_indented ( column )
566
+ end
567
+ end
566
568
end
567
569
568
570
self
0 commit comments