@@ -19,7 +19,10 @@ use std::mem::MaybeUninit;
19
19
use std:: ptr:: NonNull ;
20
20
21
21
pub use self :: bindings:: * ;
22
- use ruby_prism_sys:: { pm_comment_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_parse, pm_parser_free, pm_parser_init, pm_parser_t} ;
22
+ use ruby_prism_sys:: {
23
+ pm_buffer_free, pm_buffer_init, pm_buffer_length, pm_buffer_t, pm_buffer_value, pm_comment_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_options_free, pm_options_read, pm_options_t,
24
+ pm_options_version_t, pm_parse, pm_parser_free, pm_parser_init, pm_parser_t, pm_serialize, pm_serialize_parse,
25
+ } ;
23
26
24
27
/// A range in the source file.
25
28
pub struct Location < ' pr > {
@@ -428,6 +431,8 @@ pub struct ParseResult<'pr> {
428
431
source : & ' pr [ u8 ] ,
429
432
parser : NonNull < pm_parser_t > ,
430
433
node : NonNull < pm_node_t > ,
434
+ options_string : Vec < u8 > ,
435
+ options : NonNull < pm_options_t > ,
431
436
}
432
437
433
438
impl < ' pr > ParseResult < ' pr > {
@@ -529,6 +534,16 @@ impl<'pr> ParseResult<'pr> {
529
534
pub fn node ( & self ) -> Node < ' _ > {
530
535
Node :: new ( self . parser , self . node . as_ptr ( ) )
531
536
}
537
+
538
+ /// Returns the serialized representation of the parse result.
539
+ #[ must_use]
540
+ pub fn serialize ( & self ) -> Vec < u8 > {
541
+ let mut buffer = Buffer :: default ( ) ;
542
+ unsafe {
543
+ pm_serialize ( self . parser . as_ptr ( ) , self . node . as_ptr ( ) , & mut buffer. buffer ) ;
544
+ }
545
+ buffer. value ( ) . into ( )
546
+ }
532
547
}
533
548
534
549
impl < ' pr > Drop for ParseResult < ' pr > {
@@ -537,10 +552,176 @@ impl<'pr> Drop for ParseResult<'pr> {
537
552
pm_node_destroy ( self . parser . as_ptr ( ) , self . node . as_ptr ( ) ) ;
538
553
pm_parser_free ( self . parser . as_ptr ( ) ) ;
539
554
drop ( Box :: from_raw ( self . parser . as_ptr ( ) ) ) ;
555
+
556
+ pm_options_free ( self . options . as_ptr ( ) ) ;
557
+ drop ( Box :: from_raw ( self . options . as_ptr ( ) ) ) ;
558
+ }
559
+ }
560
+ }
561
+
562
+ /**
563
+ * A scope of locals surrounding the code that is being parsed.
564
+ */
565
+ #[ derive( Debug , Default , Clone ) ]
566
+ pub struct OptionsScope {
567
+ /** Flags for the set of forwarding parameters in this scope. */
568
+ pub forwarding_flags : u8 ,
569
+ /** The names of the locals in the scope. */
570
+ pub locals : Vec < String > ,
571
+ }
572
+
573
+ /// The options that can be passed to the parser.
574
+ #[ allow( clippy:: struct_excessive_bools) ]
575
+ #[ derive( Debug , Clone ) ]
576
+ pub struct Options {
577
+ /** The name of the file that is currently being parsed. */
578
+ pub filepath : String ,
579
+ /**
580
+ * The line within the file that the parse starts on. This value is
581
+ * 1-indexed.
582
+ */
583
+ pub line : i32 ,
584
+ /**
585
+ * The name of the encoding that the source file is in. Note that this must
586
+ * correspond to a name that can be found with Encoding.find in Ruby.
587
+ */
588
+ pub encoding : String ,
589
+ /**
590
+ * Whether or not the frozen string literal option has been set.
591
+ * May be:
592
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
593
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
594
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
595
+ */
596
+ pub frozen_string_literal : Option < bool > ,
597
+ /** A bitset of the various options that were set on the command line. */
598
+ pub command_line : u8 ,
599
+ /**
600
+ * The version of prism that we should be parsing with. This is used to
601
+ * allow consumers to specify which behavior they want in case they need to
602
+ * parse exactly as a specific version of CRuby.
603
+ */
604
+ pub version : pm_options_version_t ,
605
+ /**
606
+ * Whether or not the encoding magic comments should be respected. This is a
607
+ * niche use-case where you want to parse a file with a specific encoding
608
+ * but ignore any encoding magic comments at the top of the file.
609
+ */
610
+ pub encoding_locked : bool ,
611
+ /**
612
+ * When the file being parsed is the main script, the shebang will be
613
+ * considered for command-line flags (or for implicit -x). The caller needs
614
+ * to pass this information to the parser so that it can behave correctly.
615
+ */
616
+ pub main_script : bool ,
617
+ /**
618
+ * When the file being parsed is considered a "partial" script, jumps will
619
+ * not be marked as errors if they are not contained within loops/blocks.
620
+ * This is used in the case that you're parsing a script that you know will
621
+ * be embedded inside another script later, but you do not have that context
622
+ * yet. For example, when parsing an ERB template that will be evaluated
623
+ * inside another script.
624
+ */
625
+ pub partial_script : bool ,
626
+ /**
627
+ * Whether or not the parser should freeze the nodes that it creates. This
628
+ * makes it possible to have a deeply frozen AST that is safe to share
629
+ * between concurrency primitives.
630
+ */
631
+ pub freeze : bool ,
632
+ /**
633
+ * The scopes surrounding the code that is being parsed. For most parses
634
+ * this will be empty, but for evals it will be the locals that are in scope
635
+ * surrounding the eval. Scopes are ordered from the outermost scope to the
636
+ * innermost one.
637
+ */
638
+ pub scopes : Vec < OptionsScope > ,
639
+ }
640
+
641
+ impl Default for Options {
642
+ fn default ( ) -> Self {
643
+ Self {
644
+ filepath : String :: new ( ) ,
645
+ line : 1 ,
646
+ encoding : String :: new ( ) ,
647
+ frozen_string_literal : None ,
648
+ command_line : 0 ,
649
+ version : pm_options_version_t:: PM_OPTIONS_VERSION_LATEST ,
650
+ encoding_locked : false ,
651
+ main_script : true ,
652
+ partial_script : false ,
653
+ freeze : false ,
654
+ scopes : Vec :: new ( ) ,
655
+ }
656
+ }
657
+ }
658
+
659
+ impl Options {
660
+ #[ allow( clippy:: cast_possible_truncation) ]
661
+ fn to_binary_string ( & self ) -> Vec < u8 > {
662
+ let mut output = Vec :: new ( ) ;
663
+
664
+ output. extend ( ( self . filepath . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
665
+ output. extend ( self . filepath . as_bytes ( ) ) ;
666
+ output. extend ( self . line . to_ne_bytes ( ) ) ;
667
+ output. extend ( ( self . encoding . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
668
+ output. extend ( self . encoding . as_bytes ( ) ) ;
669
+ output. extend ( self . frozen_string_literal . map_or_else ( || 0i8 , |frozen| if frozen { 1 } else { -1 } ) . to_ne_bytes ( ) ) ;
670
+ output. push ( self . command_line ) ;
671
+ output. extend ( ( self . version as u8 ) . to_ne_bytes ( ) ) ;
672
+ output. push ( self . encoding_locked . into ( ) ) ;
673
+ output. push ( self . main_script . into ( ) ) ;
674
+ output. push ( self . partial_script . into ( ) ) ;
675
+ output. push ( self . freeze . into ( ) ) ;
676
+ output. extend ( ( self . scopes . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
677
+ for scope in & self . scopes {
678
+ output. extend ( ( scope. locals . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
679
+ output. extend ( scope. forwarding_flags . to_ne_bytes ( ) ) ;
680
+ for local in & scope. locals {
681
+ output. extend ( ( local. len ( ) as u32 ) . to_ne_bytes ( ) ) ;
682
+ output. extend ( local. as_bytes ( ) ) ;
683
+ }
684
+ }
685
+ output
686
+ }
687
+ }
688
+
689
+ struct Buffer {
690
+ buffer : pm_buffer_t ,
691
+ }
692
+
693
+ impl Default for Buffer {
694
+ fn default ( ) -> Self {
695
+ let buffer = unsafe {
696
+ let mut uninit = MaybeUninit :: < pm_buffer_t > :: uninit ( ) ;
697
+ let initialized = pm_buffer_init ( uninit. as_mut_ptr ( ) ) ;
698
+ assert ! ( initialized) ;
699
+ uninit. assume_init ( )
700
+ } ;
701
+ Self { buffer }
702
+ }
703
+ }
704
+
705
+ impl Buffer {
706
+ fn length ( & self ) -> usize {
707
+ unsafe { pm_buffer_length ( & self . buffer ) }
708
+ }
709
+
710
+ fn value ( & self ) -> & [ u8 ] {
711
+ unsafe {
712
+ let value = pm_buffer_value ( & self . buffer ) ;
713
+ let value = value. cast :: < u8 > ( ) . cast_const ( ) ;
714
+ std:: slice:: from_raw_parts ( value, self . length ( ) )
540
715
}
541
716
}
542
717
}
543
718
719
+ impl Drop for Buffer {
720
+ fn drop ( & mut self ) {
721
+ unsafe { pm_buffer_free ( & mut self . buffer ) }
722
+ }
723
+ }
724
+
544
725
/// Parses the given source string and returns a parse result.
545
726
///
546
727
/// # Panics
@@ -549,25 +730,56 @@ impl<'pr> Drop for ParseResult<'pr> {
549
730
///
550
731
#[ must_use]
551
732
pub fn parse ( source : & [ u8 ] ) -> ParseResult < ' _ > {
733
+ parse_with_options ( source, & Options :: default ( ) )
734
+ }
735
+
736
+ /// Parses the given source string and returns a parse result.
737
+ ///
738
+ /// # Panics
739
+ ///
740
+ /// Panics if the parser fails to initialize.
741
+ ///
742
+ #[ must_use]
743
+ pub fn parse_with_options < ' pr > ( source : & ' pr [ u8 ] , options : & Options ) -> ParseResult < ' pr > {
744
+ let options_string = options. to_binary_string ( ) ;
552
745
unsafe {
553
746
let uninit = Box :: new ( MaybeUninit :: < pm_parser_t > :: uninit ( ) ) ;
554
747
let uninit = Box :: into_raw ( uninit) ;
555
748
556
- pm_parser_init ( ( * uninit) . as_mut_ptr ( ) , source. as_ptr ( ) , source. len ( ) , std:: ptr:: null ( ) ) ;
749
+ let options = Box :: into_raw ( Box :: new ( MaybeUninit :: < pm_options_t > :: zeroed ( ) ) ) ;
750
+ pm_options_read ( ( * options) . as_mut_ptr ( ) , options_string. as_ptr ( ) . cast ( ) ) ;
751
+ let options = NonNull :: new ( ( * options) . assume_init_mut ( ) ) . unwrap ( ) ;
752
+
753
+ pm_parser_init ( ( * uninit) . as_mut_ptr ( ) , source. as_ptr ( ) , source. len ( ) , options. as_ptr ( ) ) ;
557
754
558
755
let parser = ( * uninit) . assume_init_mut ( ) ;
559
756
let parser = NonNull :: new_unchecked ( parser) ;
560
757
561
758
let node = pm_parse ( parser. as_ptr ( ) ) ;
562
759
let node = NonNull :: new_unchecked ( node) ;
563
760
564
- ParseResult { source, parser, node }
761
+ ParseResult { source, parser, node, options_string, options }
762
+ }
763
+ }
764
+
765
+ /// Serializes the given source string and returns a parse result.
766
+ ///
767
+ /// # Panics
768
+ ///
769
+ /// Panics if the parser fails to initialize.
770
+ #[ must_use]
771
+ pub fn serialize_parse ( source : & [ u8 ] , options : & Options ) -> Vec < u8 > {
772
+ let mut buffer = Buffer :: default ( ) ;
773
+ let opts = options. to_binary_string ( ) ;
774
+ unsafe {
775
+ pm_serialize_parse ( & mut buffer. buffer , source. as_ptr ( ) , source. len ( ) , opts. as_ptr ( ) . cast ( ) ) ;
565
776
}
777
+ buffer. value ( ) . into ( )
566
778
}
567
779
568
780
#[ cfg( test) ]
569
781
mod tests {
570
- use super :: parse;
782
+ use super :: { parse, parse_with_options , serialize_parse } ;
571
783
572
784
#[ test]
573
785
fn comments_test ( ) {
@@ -1157,6 +1369,28 @@ end
1157
1369
assert ! ( ( value - 1.0 ) . abs( ) < f64 :: EPSILON ) ;
1158
1370
}
1159
1371
1372
+ #[ test]
1373
+ fn serialize_parse_test ( ) {
1374
+ let source = r#"__FILE__"# ;
1375
+ let options = crate :: Options { filepath : "test.rb" . to_string ( ) , ..Default :: default ( ) } ;
1376
+ let bytes = serialize_parse ( source. as_ref ( ) , & options) ;
1377
+
1378
+ let result = parse_with_options ( source. as_bytes ( ) , & options) ;
1379
+
1380
+ assert_eq ! ( bytes, result. serialize( ) ) ;
1381
+
1382
+ let expected = r#"@ ProgramNode (location: (1,0)-(1,8))
1383
+ +-- locals: []
1384
+ +-- statements:
1385
+ @ StatementsNode (location: (1,0)-(1,8))
1386
+ +-- body: (length: 1)
1387
+ +-- @ SourceFileNode (location: (1,0)-(1,8))
1388
+ +-- StringFlags: nil
1389
+ +-- filepath: "test.rb"
1390
+ "# ;
1391
+ assert_eq ! ( expected, result. node( ) . pretty_print( ) . as_str( ) ) ;
1392
+ }
1393
+
1160
1394
#[ test]
1161
1395
fn node_field_lifetime_test ( ) {
1162
1396
// The code below wouldn't typecheck prior to https://github.com/ruby/prism/pull/2519,
0 commit comments