Skip to content

Commit ac8b41a

Browse files
authored
Merge pull request #19680 from github/tausbn/javascript-exclude-obviously-generated-files
JavaScript: Don't extract obviously generated files
2 parents 3fbe348 + e3d9d92 commit ac8b41a

File tree

5 files changed

+104
-4
lines changed

5 files changed

+104
-4
lines changed

javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939

4040
import com.google.gson.Gson;
4141
import com.google.gson.JsonParseException;
42+
import com.semmle.js.extractor.tsconfig.TsConfigJson;
43+
import com.semmle.js.extractor.tsconfig.CompilerOptions;
4244
import com.semmle.js.dependencies.AsyncFetcher;
4345
import com.semmle.js.dependencies.DependencyResolver;
4446
import com.semmle.js.dependencies.packument.PackageJson;
@@ -745,6 +747,26 @@ private CompletableFuture<?> extractSource() throws IOException {
745747
.filter(p -> !isFileTooLarge(p))
746748
.sorted(PATH_ORDERING)
747749
.collect(Collectors.toCollection(() -> new LinkedHashSet<>()));
750+
// gather all output directories specified in tsconfig.json files
751+
final List<Path> outDirs = new ArrayList<>();
752+
for (Path cfg : tsconfigFiles) {
753+
try {
754+
String txt = new WholeIO().read(cfg);
755+
TsConfigJson root = new Gson().fromJson(txt, TsConfigJson.class);
756+
if (root != null && root.getCompilerOptions() != null) {
757+
if (root.getCompilerOptions().getOutDir() == null) {
758+
// no outDir specified, so skip this tsconfig.json
759+
continue;
760+
}
761+
Path odir = cfg.getParent().resolve(root.getCompilerOptions().getOutDir()).toAbsolutePath().normalize();
762+
outDirs.add(odir);
763+
}
764+
} catch (Exception e) {
765+
// ignore malformed tsconfig or missing fields
766+
}
767+
}
768+
// exclude files in output directories as configured in tsconfig.json
769+
filesToExtract.removeIf(f -> outDirs.stream().anyMatch(od -> f.startsWith(od)));
748770

749771
DependencyInstallationResult dependencyInstallationResult = DependencyInstallationResult.empty;
750772
if (!tsconfigFiles.isEmpty()) {
@@ -796,9 +818,19 @@ private CompletableFuture<?> extractFiles(
796818
*/
797819
private boolean isFileDerivedFromTypeScriptFile(Path path, Set<Path> extractedFiles) {
798820
String name = path.getFileName().toString();
799-
if (!name.endsWith(".js"))
821+
// only skip JS variants when a corresponding TS/TSX file was already extracted
822+
if (!(name.endsWith(".js")
823+
|| name.endsWith(".cjs")
824+
|| name.endsWith(".mjs")
825+
|| name.endsWith(".jsx")
826+
|| name.endsWith(".cjsx")
827+
|| name.endsWith(".mjsx"))) {
800828
return false;
801-
String stem = name.substring(0, name.length() - ".js".length());
829+
}
830+
// strip off extension
831+
int dot = name.lastIndexOf('.');
832+
String stem = dot != -1 ? name.substring(0, dot) : name;
833+
// if a TS/TSX file with same base name was extracted, skip this file
802834
for (String ext : FileType.TYPESCRIPT.getExtensions()) {
803835
if (extractedFiles.contains(path.getParent().resolve(stem + ext))) {
804836
return true;
@@ -1154,7 +1186,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
11541186
}
11551187

11561188
// extract TypeScript projects from 'tsconfig.json'
1157-
if (typeScriptMode == TypeScriptMode.FULL
1189+
if (typeScriptMode != TypeScriptMode.NONE
11581190
&& treatAsTSConfig(file.getFileName().toString())
11591191
&& !excludes.contains(file)
11601192
&& isFileIncluded(file)) {
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package com.semmle.js.extractor.tsconfig;
2+
3+
public class CompilerOptions {
4+
private String outDir;
5+
6+
public String getOutDir() {
7+
return outDir;
8+
}
9+
10+
public void setOutDir(String outDir) {
11+
this.outDir = outDir;
12+
}
13+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package com.semmle.js.extractor.tsconfig;
2+
3+
public class TsConfigJson {
4+
private CompilerOptions compilerOptions;
5+
6+
public CompilerOptions getCompilerOptions() {
7+
return compilerOptions;
8+
}
9+
10+
public void setCompilerOptions(CompilerOptions compilerOptions) {
11+
this.compilerOptions = compilerOptions;
12+
}
13+
}

javascript/extractor/test/com/semmle/js/extractor/test/AutoBuildTests.java

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ public void extractTypeScriptFiles(
135135
FileExtractors extractors) {
136136
for (Path f : files) {
137137
actual.add(f.toString());
138+
extractedFiles.add(f);
138139
}
139140
}
140141

@@ -175,7 +176,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
175176

176177
@Test
177178
public void basicTest() throws IOException {
178-
addFile(true, LGTM_SRC, "tst.js");
179+
addFile(false, LGTM_SRC, "tst.js");
179180
addFile(true, LGTM_SRC, "tst.ts");
180181
addFile(true, LGTM_SRC, "tst.html");
181182
addFile(true, LGTM_SRC, "tst.xsjs");
@@ -203,6 +204,43 @@ public void typescriptWrongConfig() throws IOException {
203204
runTest();
204205
}
205206

207+
@Test
208+
public void skipJsFilesDerivedFromTypeScriptFiles() throws IOException {
209+
// JS-derived files (.js, .cjs, .mjs, .jsx, .cjsx, .mjsx) should be skipped when TS indexing
210+
envVars.put("LGTM_INDEX_TYPESCRIPT", "basic");
211+
// Add TypeScript sources
212+
addFile(true, LGTM_SRC, "foo.ts");
213+
addFile(true, LGTM_SRC, "bar.tsx");
214+
// Add derived JS variants (should be skipped)
215+
addFile(false, LGTM_SRC, "foo.js");
216+
addFile(false, LGTM_SRC, "bar.jsx");
217+
addFile(false, LGTM_SRC, "foo.cjs");
218+
addFile(false, LGTM_SRC, "foo.mjs");
219+
addFile(false, LGTM_SRC, "bar.cjsx");
220+
addFile(false, LGTM_SRC, "bar.mjsx");
221+
// A normal JS file without TS counterpart should be extracted
222+
addFile(true, LGTM_SRC, "normal.js");
223+
runTest();
224+
}
225+
226+
@Test
227+
public void skipFilesInTsconfigOutDir() throws IOException {
228+
envVars.put("LGTM_INDEX_TYPESCRIPT", "basic");
229+
// Files under outDir in tsconfig.json should be excluded
230+
// Create tsconfig.json with outDir set to "dist"
231+
addFile(true, LGTM_SRC, "tsconfig.json");
232+
Path config = Paths.get(LGTM_SRC.toString(), "tsconfig.json");
233+
Files.write(config,
234+
"{\"compilerOptions\":{\"outDir\":\"dist\"}}".getBytes(StandardCharsets.UTF_8));
235+
// Add files outside outDir (should be extracted)
236+
addFile(true, LGTM_SRC, "src", "app.ts");
237+
addFile(true, LGTM_SRC, "main.js");
238+
// Add files under dist/outDir (should be skipped)
239+
addFile(false, LGTM_SRC, "dist", "generated.js");
240+
addFile(false, LGTM_SRC, "dist", "sub", "x.js");
241+
runTest();
242+
}
243+
206244
@Test
207245
public void includeFile() throws IOException {
208246
envVars.put("LGTM_INDEX_INCLUDE", "tst.js");
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* The JavaScript extractor now skips generated JavaScript files if the original TypeScript files are already present. It also skips any files in the output directory specified in the `compilerOptions` part of the `tsconfig.json` file.

0 commit comments

Comments
 (0)