Skip to content

Commit 0a0f2c2

Browse files
committed
Expose prettyprint/serialize/parse options in rust
Signed-off-by: Samuel Giddins <[email protected]>
1 parent a1e7c89 commit 0a0f2c2

File tree

3 files changed

+283
-7
lines changed

3 files changed

+283
-7
lines changed

rust/ruby-prism-sys/build/main.rs

+21-3
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ impl bindgen::callbacks::ParseCallbacks for Callbacks {
9797
/// This method only generates code in memory here--it doesn't write it to file.
9898
///
9999
fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings {
100-
bindgen::Builder::default()
100+
let mut builder = bindgen::Builder::default()
101101
.derive_default(true)
102102
.generate_block(true)
103103
.generate_comments(true)
@@ -128,6 +128,7 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings {
128128
// Enums
129129
.rustified_non_exhaustive_enum("pm_comment_type_t")
130130
.rustified_non_exhaustive_enum(r"pm_\w+_flags")
131+
.rustified_non_exhaustive_enum("pm_options_version_t")
131132
.rustified_non_exhaustive_enum("pm_node_type")
132133
.rustified_non_exhaustive_enum("pm_pack_encoding")
133134
.rustified_non_exhaustive_enum("pm_pack_endian")
@@ -138,22 +139,39 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings {
138139
.rustified_non_exhaustive_enum("pm_pack_type")
139140
.rustified_non_exhaustive_enum("pm_pack_variant")
140141
// Functions
142+
.allowlist_function("pm_buffer_free")
143+
.allowlist_function("pm_buffer_init")
144+
.allowlist_function("pm_buffer_length")
145+
.allowlist_function("pm_buffer_value")
141146
.allowlist_function("pm_list_empty_p")
142147
.allowlist_function("pm_list_free")
143148
.allowlist_function("pm_node_destroy")
149+
.allowlist_function("pm_options_free")
150+
.allowlist_function("pm_options_read")
144151
.allowlist_function("pm_pack_parse")
145152
.allowlist_function("pm_parse")
146153
.allowlist_function("pm_parser_free")
147154
.allowlist_function("pm_parser_init")
155+
.allowlist_function("pm_prettyprint")
156+
.allowlist_function("pm_serialize_parse")
157+
.allowlist_function("pm_serialize")
148158
.allowlist_function("pm_size_to_native")
159+
.allowlist_function("pm_string_ensure_owned")
149160
.allowlist_function("pm_string_free")
150161
.allowlist_function("pm_string_length")
162+
.allowlist_function("pm_string_shared_init")
151163
.allowlist_function("pm_string_source")
152164
.allowlist_function("pm_version")
153165
// Vars
154166
.allowlist_var(r"^pm_encoding\S+")
155-
.generate()
156-
.expect("Unable to generate prism bindings")
167+
.allowlist_var(r"^PM_OPTIONS_COMMAND\S+");
168+
169+
if let Ok(target) = std::env::var("TARGET") {
170+
if target.contains("wasm") {
171+
builder = builder.clang_arg("-fvisibility=default");
172+
}
173+
}
174+
builder.generate().expect("Unable to generate prism bindings")
157175
}
158176

159177
/// Write the bindings to the `$OUT_DIR/bindings.rs` file. We'll pull these into

rust/ruby-prism/build.rs

+24
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,30 @@ impl<'pr> Node<'pr> {{
616616
writeln!(file, " }}")?;
617617
writeln!(file)?;
618618

619+
writeln!(
620+
file,
621+
r#"
622+
/// Returns a pretty-printed representation of this node.
623+
#[must_use]
624+
pub fn pretty_print(&self) -> String {{
625+
let mut buffer = crate::Buffer::default();
626+
unsafe {{
627+
match *self {{
628+
"#
629+
)?;
630+
for node in &config.nodes {
631+
writeln!(file, " Self::{} {{ pointer, parser, .. }} => pm_prettyprint(&mut buffer.buffer, parser.as_ptr(), pointer.cast()),", node.name)?;
632+
}
633+
writeln!(
634+
file,
635+
r#"
636+
}}
637+
std::str::from_utf8_unchecked(buffer.value()).to_string()
638+
}}
639+
}}
640+
"#
641+
)?;
642+
619643
writeln!(file, " /// Returns the location of this node.")?;
620644
writeln!(file, " #[must_use]")?;
621645
writeln!(file, " pub fn location(&self) -> Location<'pr> {{")?;

rust/ruby-prism/src/lib.rs

+238-4
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ use std::mem::MaybeUninit;
1919
use std::ptr::NonNull;
2020

2121
pub use self::bindings::*;
22-
use ruby_prism_sys::{pm_comment_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_parse, pm_parser_free, pm_parser_init, pm_parser_t};
22+
use ruby_prism_sys::{
23+
pm_buffer_free, pm_buffer_init, pm_buffer_length, pm_buffer_t, pm_buffer_value, pm_comment_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_options_free, pm_options_read, pm_options_t,
24+
pm_options_version_t, pm_parse, pm_parser_free, pm_parser_init, pm_parser_t, pm_serialize, pm_serialize_parse,
25+
};
2326

2427
/// A range in the source file.
2528
pub struct Location<'pr> {
@@ -428,6 +431,8 @@ pub struct ParseResult<'pr> {
428431
source: &'pr [u8],
429432
parser: NonNull<pm_parser_t>,
430433
node: NonNull<pm_node_t>,
434+
options_string: Vec<u8>,
435+
options: NonNull<pm_options_t>,
431436
}
432437

433438
impl<'pr> ParseResult<'pr> {
@@ -529,6 +534,16 @@ impl<'pr> ParseResult<'pr> {
529534
pub fn node(&self) -> Node<'_> {
530535
Node::new(self.parser, self.node.as_ptr())
531536
}
537+
538+
/// Returns the serialized representation of the parse result.
539+
#[must_use]
540+
pub fn serialize(&self) -> Vec<u8> {
541+
let mut buffer = Buffer::default();
542+
unsafe {
543+
pm_serialize(self.parser.as_ptr(), self.node.as_ptr(), &mut buffer.buffer);
544+
}
545+
buffer.value().into()
546+
}
532547
}
533548

534549
impl<'pr> Drop for ParseResult<'pr> {
@@ -537,10 +552,176 @@ impl<'pr> Drop for ParseResult<'pr> {
537552
pm_node_destroy(self.parser.as_ptr(), self.node.as_ptr());
538553
pm_parser_free(self.parser.as_ptr());
539554
drop(Box::from_raw(self.parser.as_ptr()));
555+
556+
pm_options_free(self.options.as_ptr());
557+
drop(Box::from_raw(self.options.as_ptr()));
558+
}
559+
}
560+
}
561+
562+
/**
563+
* A scope of locals surrounding the code that is being parsed.
564+
*/
565+
#[derive(Debug, Default, Clone)]
566+
pub struct OptionsScope {
567+
/** Flags for the set of forwarding parameters in this scope. */
568+
pub forwarding_flags: u8,
569+
/** The names of the locals in the scope. */
570+
pub locals: Vec<String>,
571+
}
572+
573+
/// The options that can be passed to the parser.
574+
#[allow(clippy::struct_excessive_bools)]
575+
#[derive(Debug, Clone)]
576+
pub struct Options {
577+
/** The name of the file that is currently being parsed. */
578+
pub filepath: String,
579+
/**
580+
* The line within the file that the parse starts on. This value is
581+
* 1-indexed.
582+
*/
583+
pub line: i32,
584+
/**
585+
* The name of the encoding that the source file is in. Note that this must
586+
* correspond to a name that can be found with Encoding.find in Ruby.
587+
*/
588+
pub encoding: String,
589+
/**
590+
* Whether or not the frozen string literal option has been set.
591+
* May be:
592+
* - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
593+
* - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
594+
* - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
595+
*/
596+
pub frozen_string_literal: Option<bool>,
597+
/** A bitset of the various options that were set on the command line. */
598+
pub command_line: u8,
599+
/**
600+
* The version of prism that we should be parsing with. This is used to
601+
* allow consumers to specify which behavior they want in case they need to
602+
* parse exactly as a specific version of CRuby.
603+
*/
604+
pub version: pm_options_version_t,
605+
/**
606+
* Whether or not the encoding magic comments should be respected. This is a
607+
* niche use-case where you want to parse a file with a specific encoding
608+
* but ignore any encoding magic comments at the top of the file.
609+
*/
610+
pub encoding_locked: bool,
611+
/**
612+
* When the file being parsed is the main script, the shebang will be
613+
* considered for command-line flags (or for implicit -x). The caller needs
614+
* to pass this information to the parser so that it can behave correctly.
615+
*/
616+
pub main_script: bool,
617+
/**
618+
* When the file being parsed is considered a "partial" script, jumps will
619+
* not be marked as errors if they are not contained within loops/blocks.
620+
* This is used in the case that you're parsing a script that you know will
621+
* be embedded inside another script later, but you do not have that context
622+
* yet. For example, when parsing an ERB template that will be evaluated
623+
* inside another script.
624+
*/
625+
pub partial_script: bool,
626+
/**
627+
* Whether or not the parser should freeze the nodes that it creates. This
628+
* makes it possible to have a deeply frozen AST that is safe to share
629+
* between concurrency primitives.
630+
*/
631+
pub freeze: bool,
632+
/**
633+
* The scopes surrounding the code that is being parsed. For most parses
634+
* this will be empty, but for evals it will be the locals that are in scope
635+
* surrounding the eval. Scopes are ordered from the outermost scope to the
636+
* innermost one.
637+
*/
638+
pub scopes: Vec<OptionsScope>,
639+
}
640+
641+
impl Default for Options {
642+
fn default() -> Self {
643+
Self {
644+
filepath: String::new(),
645+
line: 1,
646+
encoding: String::new(),
647+
frozen_string_literal: None,
648+
command_line: 0,
649+
version: pm_options_version_t::PM_OPTIONS_VERSION_LATEST,
650+
encoding_locked: false,
651+
main_script: true,
652+
partial_script: false,
653+
freeze: false,
654+
scopes: Vec::new(),
655+
}
656+
}
657+
}
658+
659+
impl Options {
660+
#[allow(clippy::cast_possible_truncation)]
661+
fn to_binary_string(&self) -> Vec<u8> {
662+
let mut output = Vec::new();
663+
664+
output.extend((self.filepath.len() as u32).to_ne_bytes());
665+
output.extend(self.filepath.as_bytes());
666+
output.extend(self.line.to_ne_bytes());
667+
output.extend((self.encoding.len() as u32).to_ne_bytes());
668+
output.extend(self.encoding.as_bytes());
669+
output.extend(self.frozen_string_literal.map_or_else(|| 0i8, |frozen| if frozen { 1 } else { -1 }).to_ne_bytes());
670+
output.push(self.command_line);
671+
output.extend((self.version as u8).to_ne_bytes());
672+
output.push(self.encoding_locked.into());
673+
output.push(self.main_script.into());
674+
output.push(self.partial_script.into());
675+
output.push(self.freeze.into());
676+
output.extend((self.scopes.len() as u32).to_ne_bytes());
677+
for scope in &self.scopes {
678+
output.extend((scope.locals.len() as u32).to_ne_bytes());
679+
output.extend(scope.forwarding_flags.to_ne_bytes());
680+
for local in &scope.locals {
681+
output.extend((local.len() as u32).to_ne_bytes());
682+
output.extend(local.as_bytes());
683+
}
684+
}
685+
output
686+
}
687+
}
688+
689+
struct Buffer {
690+
buffer: pm_buffer_t,
691+
}
692+
693+
impl Default for Buffer {
694+
fn default() -> Self {
695+
let buffer = unsafe {
696+
let mut uninit = MaybeUninit::<pm_buffer_t>::uninit();
697+
let initialized = pm_buffer_init(uninit.as_mut_ptr());
698+
assert!(initialized);
699+
uninit.assume_init()
700+
};
701+
Self { buffer }
702+
}
703+
}
704+
705+
impl Buffer {
706+
fn length(&self) -> usize {
707+
unsafe { pm_buffer_length(&self.buffer) }
708+
}
709+
710+
fn value(&self) -> &[u8] {
711+
unsafe {
712+
let value = pm_buffer_value(&self.buffer);
713+
let value = value.cast::<u8>().cast_const();
714+
std::slice::from_raw_parts(value, self.length())
540715
}
541716
}
542717
}
543718

719+
impl Drop for Buffer {
720+
fn drop(&mut self) {
721+
unsafe { pm_buffer_free(&mut self.buffer) }
722+
}
723+
}
724+
544725
/// Parses the given source string and returns a parse result.
545726
///
546727
/// # Panics
@@ -549,25 +730,56 @@ impl<'pr> Drop for ParseResult<'pr> {
549730
///
550731
#[must_use]
551732
pub fn parse(source: &[u8]) -> ParseResult<'_> {
733+
parse_with_options(source, &Options::default())
734+
}
735+
736+
/// Parses the given source string and returns a parse result.
737+
///
738+
/// # Panics
739+
///
740+
/// Panics if the parser fails to initialize.
741+
///
742+
#[must_use]
743+
pub fn parse_with_options<'pr>(source: &'pr [u8], options: &Options) -> ParseResult<'pr> {
744+
let options_string = options.to_binary_string();
552745
unsafe {
553746
let uninit = Box::new(MaybeUninit::<pm_parser_t>::uninit());
554747
let uninit = Box::into_raw(uninit);
555748

556-
pm_parser_init((*uninit).as_mut_ptr(), source.as_ptr(), source.len(), std::ptr::null());
749+
let options = Box::into_raw(Box::new(MaybeUninit::<pm_options_t>::zeroed()));
750+
pm_options_read((*options).as_mut_ptr(), options_string.as_ptr().cast());
751+
let options = NonNull::new((*options).assume_init_mut()).unwrap();
752+
753+
pm_parser_init((*uninit).as_mut_ptr(), source.as_ptr(), source.len(), options.as_ptr());
557754

558755
let parser = (*uninit).assume_init_mut();
559756
let parser = NonNull::new_unchecked(parser);
560757

561758
let node = pm_parse(parser.as_ptr());
562759
let node = NonNull::new_unchecked(node);
563760

564-
ParseResult { source, parser, node }
761+
ParseResult { source, parser, node, options_string, options }
762+
}
763+
}
764+
765+
/// Serializes the given source string and returns a parse result.
766+
///
767+
/// # Panics
768+
///
769+
/// Panics if the parser fails to initialize.
770+
#[must_use]
771+
pub fn serialize_parse(source: &[u8], options: &Options) -> Vec<u8> {
772+
let mut buffer = Buffer::default();
773+
let opts = options.to_binary_string();
774+
unsafe {
775+
pm_serialize_parse(&mut buffer.buffer, source.as_ptr(), source.len(), opts.as_ptr().cast());
565776
}
777+
buffer.value().into()
566778
}
567779

568780
#[cfg(test)]
569781
mod tests {
570-
use super::parse;
782+
use super::{parse, parse_with_options, serialize_parse};
571783

572784
#[test]
573785
fn comments_test() {
@@ -1157,6 +1369,28 @@ end
11571369
assert!((value - 1.0).abs() < f64::EPSILON);
11581370
}
11591371

1372+
#[test]
1373+
fn serialize_parse_test() {
1374+
let source = r#"__FILE__"#;
1375+
let options = crate::Options { filepath: "test.rb".to_string(), ..Default::default() };
1376+
let bytes = serialize_parse(source.as_ref(), &options);
1377+
1378+
let result = parse_with_options(source.as_bytes(), &options);
1379+
1380+
assert_eq!(bytes, result.serialize());
1381+
1382+
let expected = r#"@ ProgramNode (location: (1,0)-(1,8))
1383+
+-- locals: []
1384+
+-- statements:
1385+
@ StatementsNode (location: (1,0)-(1,8))
1386+
+-- body: (length: 1)
1387+
+-- @ SourceFileNode (location: (1,0)-(1,8))
1388+
+-- StringFlags: nil
1389+
+-- filepath: "test.rb"
1390+
"#;
1391+
assert_eq!(expected, result.node().pretty_print().as_str());
1392+
}
1393+
11601394
#[test]
11611395
fn node_field_lifetime_test() {
11621396
// The code below wouldn't typecheck prior to https://github.com/ruby/prism/pull/2519,

0 commit comments

Comments
 (0)