1
1
use clap:: Args ;
2
2
use lazy_static:: lazy_static;
3
3
use rayon:: prelude:: * ;
4
+ use serde_json;
4
5
use std:: borrow:: Cow ;
6
+ use std:: collections:: HashSet ;
5
7
use std:: fs;
6
8
use std:: io:: BufRead ;
7
9
use std:: path:: { Path , PathBuf } ;
@@ -78,6 +80,8 @@ pub fn run(options: Options) -> std::io::Result<()> {
78
80
79
81
let file_list = fs:: File :: open ( file_paths:: path_from_string ( & options. file_list ) ) ?;
80
82
83
+ let overlay_changed_files: Option < HashSet < PathBuf > > = get_overlay_changed_files ( ) ;
84
+
81
85
let language: Language = tree_sitter_ruby:: LANGUAGE . into ( ) ;
82
86
let erb: Language = tree_sitter_embedded_template:: LANGUAGE . into ( ) ;
83
87
// Look up tree-sitter kind ids now, to avoid string comparisons when scanning ERB files.
@@ -94,6 +98,13 @@ pub fn run(options: Options) -> std::io::Result<()> {
94
98
. try_for_each ( |line| {
95
99
let mut diagnostics_writer = diagnostics. logger ( ) ;
96
100
let path = PathBuf :: from ( line) . canonicalize ( ) ?;
101
+ match & overlay_changed_files {
102
+ Some ( changed_files) if !changed_files. contains ( & path) => {
103
+ // We are extracting an overlay and this file is not in the list of changes files, so we should skip it.
104
+ return Result :: Ok ( ( ) ) ;
105
+ }
106
+ _ => { } ,
107
+ }
97
108
let src_archive_file = file_paths:: path_for ( & src_archive_dir, & path, "" ) ;
98
109
let mut source = std:: fs:: read ( & path) ?;
99
110
let mut needs_conversion = false ;
@@ -212,6 +223,12 @@ pub fn run(options: Options) -> std::io::Result<()> {
212
223
let mut trap_writer = trap:: Writer :: new ( ) ;
213
224
extractor:: populate_empty_location ( & mut trap_writer) ;
214
225
let res = write_trap ( & trap_dir, path, & trap_writer, trap_compression) ;
226
+ if let Ok ( output_path) = std:: env:: var ( "CODEQL_EXTRACTOR_RUBY_OVERLAY_BASE_METADATA_OUT" ) {
227
+ // We're extracting an overlay base. For now, we don't have any metadata we need to store
228
+ // that would get read when extracting the overlay, but the CLI expects us to write
229
+ // *something*. An empty file will do.
230
+ std:: fs:: write ( output_path, b"" ) ?;
231
+ }
215
232
tracing:: info!( "Extraction complete" ) ;
216
233
res
217
234
}
@@ -302,6 +319,41 @@ fn skip_space(content: &[u8], index: usize) -> usize {
302
319
}
303
320
index
304
321
}
322
+
323
+ /**
324
+ * If the relevant environment variable has been set by the CLI, indicating that we are extracting
325
+ * an overlay, this function reads the JSON file at the path given by its value, and returns a set
326
+ * of canonicalized paths of source files that have changed and should therefore be extracted.
327
+ *
328
+ * If the environment variable is not set (i.e. we're not extracting an overlay), or if the file
329
+ * cannot be read, this function returns `None`. In that case, all files should be extracted.
330
+ */
331
+ fn get_overlay_changed_files ( ) -> Option < HashSet < PathBuf > > {
332
+ let path = std:: env:: var ( "CODEQL_EXTRACTOR_RUBY_OVERLAY_CHANGES" ) . ok ( ) ?;
333
+ let file_content = fs:: read_to_string ( path) . ok ( ) ?;
334
+ let json_value: serde_json:: Value = serde_json:: from_str ( & file_content) . ok ( ) ?;
335
+
336
+ // The JSON file is expected to have the following structure:
337
+ // {
338
+ // "changes": [
339
+ // "relative/path/to/changed/file1.rb",
340
+ // "relative/path/to/changed/file2.rb",
341
+ // ...
342
+ // ]
343
+ // }
344
+ json_value
345
+ . get ( "changes" ) ?
346
+ . as_array ( ) ?
347
+ . iter ( )
348
+ . map ( |change| {
349
+ change
350
+ . as_str ( )
351
+ . map ( |s| PathBuf :: from ( s) . canonicalize ( ) . ok ( ) )
352
+ . flatten ( )
353
+ } )
354
+ . collect ( )
355
+ }
356
+
305
357
fn scan_coding_comment ( content : & [ u8 ] ) -> std:: option:: Option < Cow < str > > {
306
358
let mut index = 0 ;
307
359
// skip UTF-8 BOM marker if there is one
0 commit comments