diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java index 22d008637c9a..f96211bd5c41 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java +++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java @@ -39,6 +39,8 @@ import com.google.gson.Gson; import com.google.gson.JsonParseException; +import com.semmle.js.extractor.tsconfig.TsConfigJson; +import com.semmle.js.extractor.tsconfig.CompilerOptions; import com.semmle.js.dependencies.AsyncFetcher; import com.semmle.js.dependencies.DependencyResolver; import com.semmle.js.dependencies.packument.PackageJson; @@ -745,6 +747,26 @@ private CompletableFuture extractSource() throws IOException { .filter(p -> !isFileTooLarge(p)) .sorted(PATH_ORDERING) .collect(Collectors.toCollection(() -> new LinkedHashSet<>())); + // gather all output directories specified in tsconfig.json files + final List outDirs = new ArrayList<>(); + for (Path cfg : tsconfigFiles) { + try { + String txt = new WholeIO().read(cfg); + TsConfigJson root = new Gson().fromJson(txt, TsConfigJson.class); + if (root != null && root.getCompilerOptions() != null) { + if (root.getCompilerOptions().getOutDir() == null) { + // no outDir specified, so skip this tsconfig.json + continue; + } + Path odir = cfg.getParent().resolve(root.getCompilerOptions().getOutDir()).toAbsolutePath().normalize(); + outDirs.add(odir); + } + } catch (Exception e) { + // ignore malformed tsconfig or missing fields + } + } + // exclude files in output directories as configured in tsconfig.json + filesToExtract.removeIf(f -> outDirs.stream().anyMatch(od -> f.startsWith(od))); DependencyInstallationResult dependencyInstallationResult = DependencyInstallationResult.empty; if (!tsconfigFiles.isEmpty()) { @@ -796,9 +818,19 @@ private CompletableFuture extractFiles( */ private boolean isFileDerivedFromTypeScriptFile(Path path, Set extractedFiles) { String name = path.getFileName().toString(); - if (!name.endsWith(".js")) + // only skip JS variants when a corresponding TS/TSX file was already extracted + if (!(name.endsWith(".js") + || name.endsWith(".cjs") + || name.endsWith(".mjs") + || name.endsWith(".jsx") + || name.endsWith(".cjsx") + || name.endsWith(".mjsx"))) { return false; - String stem = name.substring(0, name.length() - ".js".length()); + } + // strip off extension + int dot = name.lastIndexOf('.'); + String stem = dot != -1 ? name.substring(0, dot) : name; + // if a TS/TSX file with same base name was extracted, skip this file for (String ext : FileType.TYPESCRIPT.getExtensions()) { if (extractedFiles.contains(path.getParent().resolve(stem + ext))) { return true; @@ -1154,7 +1186,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) } // extract TypeScript projects from 'tsconfig.json' - if (typeScriptMode == TypeScriptMode.FULL + if (typeScriptMode != TypeScriptMode.NONE && treatAsTSConfig(file.getFileName().toString()) && !excludes.contains(file) && isFileIncluded(file)) { diff --git a/javascript/extractor/src/com/semmle/js/extractor/tsconfig/CompilerOptions.java b/javascript/extractor/src/com/semmle/js/extractor/tsconfig/CompilerOptions.java new file mode 100644 index 000000000000..fa7b664f2eb0 --- /dev/null +++ b/javascript/extractor/src/com/semmle/js/extractor/tsconfig/CompilerOptions.java @@ -0,0 +1,13 @@ +package com.semmle.js.extractor.tsconfig; + +public class CompilerOptions { + private String outDir; + + public String getOutDir() { + return outDir; + } + + public void setOutDir(String outDir) { + this.outDir = outDir; + } +} diff --git a/javascript/extractor/src/com/semmle/js/extractor/tsconfig/TsConfigJson.java b/javascript/extractor/src/com/semmle/js/extractor/tsconfig/TsConfigJson.java new file mode 100644 index 000000000000..9e12d5cc0aa9 --- /dev/null +++ b/javascript/extractor/src/com/semmle/js/extractor/tsconfig/TsConfigJson.java @@ -0,0 +1,13 @@ +package com.semmle.js.extractor.tsconfig; + +public class TsConfigJson { + private CompilerOptions compilerOptions; + + public CompilerOptions getCompilerOptions() { + return compilerOptions; + } + + public void setCompilerOptions(CompilerOptions compilerOptions) { + this.compilerOptions = compilerOptions; + } +} diff --git a/javascript/extractor/test/com/semmle/js/extractor/test/AutoBuildTests.java b/javascript/extractor/test/com/semmle/js/extractor/test/AutoBuildTests.java index 0a924d54319a..28c8e593dcd1 100644 --- a/javascript/extractor/test/com/semmle/js/extractor/test/AutoBuildTests.java +++ b/javascript/extractor/test/com/semmle/js/extractor/test/AutoBuildTests.java @@ -135,6 +135,7 @@ public void extractTypeScriptFiles( FileExtractors extractors) { for (Path f : files) { actual.add(f.toString()); + extractedFiles.add(f); } } @@ -175,7 +176,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) @Test public void basicTest() throws IOException { - addFile(true, LGTM_SRC, "tst.js"); + addFile(false, LGTM_SRC, "tst.js"); addFile(true, LGTM_SRC, "tst.ts"); addFile(true, LGTM_SRC, "tst.html"); addFile(true, LGTM_SRC, "tst.xsjs"); @@ -203,6 +204,43 @@ public void typescriptWrongConfig() throws IOException { runTest(); } + @Test + public void skipJsFilesDerivedFromTypeScriptFiles() throws IOException { + // JS-derived files (.js, .cjs, .mjs, .jsx, .cjsx, .mjsx) should be skipped when TS indexing + envVars.put("LGTM_INDEX_TYPESCRIPT", "basic"); + // Add TypeScript sources + addFile(true, LGTM_SRC, "foo.ts"); + addFile(true, LGTM_SRC, "bar.tsx"); + // Add derived JS variants (should be skipped) + addFile(false, LGTM_SRC, "foo.js"); + addFile(false, LGTM_SRC, "bar.jsx"); + addFile(false, LGTM_SRC, "foo.cjs"); + addFile(false, LGTM_SRC, "foo.mjs"); + addFile(false, LGTM_SRC, "bar.cjsx"); + addFile(false, LGTM_SRC, "bar.mjsx"); + // A normal JS file without TS counterpart should be extracted + addFile(true, LGTM_SRC, "normal.js"); + runTest(); + } + + @Test + public void skipFilesInTsconfigOutDir() throws IOException { + envVars.put("LGTM_INDEX_TYPESCRIPT", "basic"); + // Files under outDir in tsconfig.json should be excluded + // Create tsconfig.json with outDir set to "dist" + addFile(true, LGTM_SRC, "tsconfig.json"); + Path config = Paths.get(LGTM_SRC.toString(), "tsconfig.json"); + Files.write(config, + "{\"compilerOptions\":{\"outDir\":\"dist\"}}".getBytes(StandardCharsets.UTF_8)); + // Add files outside outDir (should be extracted) + addFile(true, LGTM_SRC, "src", "app.ts"); + addFile(true, LGTM_SRC, "main.js"); + // Add files under dist/outDir (should be skipped) + addFile(false, LGTM_SRC, "dist", "generated.js"); + addFile(false, LGTM_SRC, "dist", "sub", "x.js"); + runTest(); + } + @Test public void includeFile() throws IOException { envVars.put("LGTM_INDEX_INCLUDE", "tst.js"); diff --git a/javascript/ql/lib/change-notes/2025-06-05-skip-obviously-generated-files.md b/javascript/ql/lib/change-notes/2025-06-05-skip-obviously-generated-files.md new file mode 100644 index 000000000000..16d81cb4cc30 --- /dev/null +++ b/javascript/ql/lib/change-notes/2025-06-05-skip-obviously-generated-files.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The JavaScript extractor now skips generated JavaScript files if the original TypeScript files are already present. It also skips any files in the output directory specified in the `compilerOptions` part of the `tsconfig.json` file.