Skip to content

Commit bf6341e

Browse files
committed
Ruby: add support for extracting overlays
1 parent f038e2f commit bf6341e

File tree

5 files changed

+56
-0
lines changed

5 files changed

+56
-0
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ruby/codeql-extractor.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ display_name: "Ruby"
33
version: 0.1.0
44
column_kind: "utf8"
55
legacy_qltest_extraction: true
6+
overlay_support_version: 20250108
67
build_modes:
78
- none
89
github_api_languages:

ruby/extractor/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,6 @@ rayon = "1.10.0"
1717
regex = "1.11.1"
1818
encoding = "0.2"
1919
lazy_static = "1.5.0"
20+
serde_json = "1.0.140"
2021

2122
codeql-extractor = { path = "../../shared/tree-sitter-extractor" }

ruby/extractor/src/extractor.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
use clap::Args;
22
use lazy_static::lazy_static;
33
use rayon::prelude::*;
4+
use serde_json;
45
use std::borrow::Cow;
6+
use std::collections::HashSet;
57
use std::fs;
68
use std::io::BufRead;
79
use std::path::{Path, PathBuf};
@@ -78,6 +80,8 @@ pub fn run(options: Options) -> std::io::Result<()> {
7880

7981
let file_list = fs::File::open(file_paths::path_from_string(&options.file_list))?;
8082

83+
let overlay_changed_files: Option<HashSet<PathBuf>> = get_overlay_changed_files();
84+
8185
let language: Language = tree_sitter_ruby::LANGUAGE.into();
8286
let erb: Language = tree_sitter_embedded_template::LANGUAGE.into();
8387
// Look up tree-sitter kind ids now, to avoid string comparisons when scanning ERB files.
@@ -94,6 +98,13 @@ pub fn run(options: Options) -> std::io::Result<()> {
9498
.try_for_each(|line| {
9599
let mut diagnostics_writer = diagnostics.logger();
96100
let path = PathBuf::from(line).canonicalize()?;
101+
match &overlay_changed_files {
102+
Some(changed_files) if !changed_files.contains(&path) => {
103+
// We are extracting an overlay and this file is not in the list of changes files, so we should skip it.
104+
return Result::Ok(());
105+
}
106+
_ => {},
107+
}
97108
let src_archive_file = file_paths::path_for(&src_archive_dir, &path, "");
98109
let mut source = std::fs::read(&path)?;
99110
let mut needs_conversion = false;
@@ -212,6 +223,12 @@ pub fn run(options: Options) -> std::io::Result<()> {
212223
let mut trap_writer = trap::Writer::new();
213224
extractor::populate_empty_location(&mut trap_writer);
214225
let res = write_trap(&trap_dir, path, &trap_writer, trap_compression);
226+
if let Ok(output_path) = std::env::var("CODEQL_EXTRACTOR_RUBY_OVERLAY_BASE_METADATA_OUT") {
227+
// We're extracting an overlay base. For now, we don't have any metadata we need to store
228+
// that would get read when extracting the overlay, but the CLI expects us to write
229+
// *something*. An empty file will do.
230+
std::fs::write(output_path, b"")?;
231+
}
215232
tracing::info!("Extraction complete");
216233
res
217234
}
@@ -302,6 +319,41 @@ fn skip_space(content: &[u8], index: usize) -> usize {
302319
}
303320
index
304321
}
322+
323+
/**
324+
* If the relevant environment variable has been set by the CLI, indicating that we are extracting
325+
* an overlay, this function reads the JSON file at the path given by its value, and returns a set
326+
* of canonicalized paths of source files that have changed and should therefore be extracted.
327+
*
328+
* If the environment variable is not set (i.e. we're not extracting an overlay), or if the file
329+
* cannot be read, this function returns `None`. In that case, all files should be extracted.
330+
*/
331+
fn get_overlay_changed_files() -> Option<HashSet<PathBuf>> {
332+
let path = std::env::var("CODEQL_EXTRACTOR_RUBY_OVERLAY_CHANGES").ok()?;
333+
let file_content = fs::read_to_string(path).ok()?;
334+
let json_value: serde_json::Value = serde_json::from_str(&file_content).ok()?;
335+
336+
// The JSON file is expected to have the following structure:
337+
// {
338+
// "changes": [
339+
// "relative/path/to/changed/file1.rb",
340+
// "relative/path/to/changed/file2.rb",
341+
// ...
342+
// ]
343+
// }
344+
json_value
345+
.get("changes")?
346+
.as_array()?
347+
.iter()
348+
.map(|change| {
349+
change
350+
.as_str()
351+
.map(|s| PathBuf::from(s).canonicalize().ok())
352+
.flatten()
353+
})
354+
.collect()
355+
}
356+
305357
fn scan_coding_comment(content: &[u8]) -> std::option::Option<Cow<str>> {
306358
let mut index = 0;
307359
// skip UTF-8 BOM marker if there is one

0 commit comments

Comments
 (0)