diff --git a/api/shadow.api b/api/shadow.api index 218f8cb7e..0d90a6c24 100644 --- a/api/shadow.api +++ b/api/shadow.api @@ -329,6 +329,15 @@ public class com/github/jengelman/gradle/plugins/shadow/transformers/ComponentsX public final class com/github/jengelman/gradle/plugins/shadow/transformers/ComponentsXmlResourceTransformer$Companion { } +public class com/github/jengelman/gradle/plugins/shadow/transformers/DeduplicatingResourceTransformer : com/github/jengelman/gradle/plugins/shadow/transformers/PatternFilterableResourceTransformer { + public fun (Lorg/gradle/api/model/ObjectFactory;)V + public fun (Lorg/gradle/api/model/ObjectFactory;Lorg/gradle/api/tasks/util/PatternSet;)V + public fun canTransformResource (Lorg/gradle/api/file/FileTreeElement;)Z + public final fun getObjectFactory ()Lorg/gradle/api/model/ObjectFactory; + public fun hasTransformedResource ()Z + public fun modifyOutputStream (Lorg/apache/tools/zip/ZipOutputStream;Z)V +} + public class com/github/jengelman/gradle/plugins/shadow/transformers/DontIncludeResourceTransformer : com/github/jengelman/gradle/plugins/shadow/transformers/ResourceTransformer { public fun (Lorg/gradle/api/model/ObjectFactory;)V public fun canTransformResource (Lorg/gradle/api/file/FileTreeElement;)Z diff --git a/build.gradle.kts b/build.gradle.kts index a558b798c..151dc231e 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -119,6 +119,7 @@ dependencies { compileOnly(libs.kotlin.gradlePlugin) compileOnly(libs.kotlin.reflect) api(libs.apache.ant) // Types from Ant are exposed in the public API. + implementation(libs.apache.commonsCodec) implementation(libs.apache.commonsIo) implementation(libs.apache.log4j) implementation(libs.asm) diff --git a/docs/changes/README.md b/docs/changes/README.md index 65c431963..098586a10 100644 --- a/docs/changes/README.md +++ b/docs/changes/README.md @@ -21,6 +21,7 @@ enableKotlinModuleRemapping = false } ``` +- Add `DeduplicatingResourceTransformer` to deduplicate on path _and_ content. ([#1859](https://github.com/GradleUp/shadow/pull/1859)) ### Changed diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 66d678c43..f5e69498a 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -7,6 +7,7 @@ pluginPublish = "2.0.0" [libraries] apache-ant = "org.apache.ant:ant:1.10.15" +apache-commonsCodec = "commons-codec:commons-codec:1.20.0" apache-commonsIo = "commons-io:commons-io:2.21.0" apache-log4j = "org.apache.logging.log4j:log4j-core:2.25.2" apache-maven-modelBuilder = "org.apache.maven:maven-model:3.9.11" diff --git a/src/functionalTest/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/TransformersTest.kt b/src/functionalTest/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/TransformersTest.kt index 9da1f572b..b36d8ecb1 100644 --- a/src/functionalTest/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/TransformersTest.kt +++ b/src/functionalTest/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/TransformersTest.kt @@ -2,13 +2,17 @@ package com.github.jengelman.gradle.plugins.shadow.transformers import assertk.all import assertk.assertThat +import assertk.assertions.contains +import assertk.assertions.containsExactlyInAnyOrder import assertk.assertions.isEqualTo import assertk.assertions.isNotEqualTo import assertk.assertions.isNotNull import com.github.jengelman.gradle.plugins.shadow.internal.mainClassAttributeKey import com.github.jengelman.gradle.plugins.shadow.testkit.containsAtLeast +import com.github.jengelman.gradle.plugins.shadow.testkit.containsExactlyInAnyOrder import com.github.jengelman.gradle.plugins.shadow.testkit.containsOnly import com.github.jengelman.gradle.plugins.shadow.testkit.getContent +import com.github.jengelman.gradle.plugins.shadow.testkit.getContents import com.github.jengelman.gradle.plugins.shadow.testkit.getStream import com.github.jengelman.gradle.plugins.shadow.testkit.invariantEolString import com.github.jengelman.gradle.plugins.shadow.testkit.requireResourceAsPath @@ -20,11 +24,73 @@ import kotlin.io.path.readText import kotlin.io.path.writeText import kotlin.reflect.KClass import org.apache.logging.log4j.core.config.plugins.processor.PluginProcessor.PLUGIN_CACHE_FILE +import org.gradle.testkit.runner.TaskOutcome.FAILED import org.junit.jupiter.api.Test import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.MethodSource +import org.junit.jupiter.params.provider.ValueSource class TransformersTest : BaseTransformerTest() { + + @ParameterizedTest + @ValueSource(booleans = [false, true]) + fun deduplicatingResourceTransformer(excludeAll: Boolean) { + val one = buildJarOne { + insert("multiple-contents", "content") + insert("single-source", "content") + insert("same-content-twice", "content") + insert("differing-content-2", "content") + } + val two = buildJarTwo { + insert("multiple-contents", "content-is-different") + insert("same-content-twice", "content") + insert("differing-content-2", "content-is-different") + } + + projectScript.appendText( + transform( + dependenciesBlock = implementationFiles(one, two), + transformerBlock = """ + exclude('multiple-contents') + ${if (excludeAll) "exclude('differing-content-2')" else ""} + """.trimIndent(), + ), + ) + + if (excludeAll) { + runWithSuccess(shadowJarPath) + assertThat(outputShadowedJar).useAll { + containsExactlyInAnyOrder( + // twice: + "multiple-contents", + "multiple-contents", + "single-source", + "same-content-twice", + // twice: + "differing-content-2", + "differing-content-2", + "META-INF/", + "META-INF/MANIFEST.MF", + ) + getContents("multiple-contents").containsExactlyInAnyOrder("content", "content-is-different") + getContent("single-source").isEqualTo("content") + getContent("same-content-twice").isEqualTo("content") + getContents("differing-content-2").containsExactlyInAnyOrder("content", "content-is-different") + } + } else { + val buildResult = runWithFailure(shadowJarPath) + assertThat(buildResult).taskOutcomeEquals(shadowJarPath, FAILED) + assertThat(buildResult.output).contains( + // Keep this list approach for Unix/Windows test compatibility. + "Execution failed for task ':shadowJar'.", + "> Found 1 path duplicate(s) with different content in the shadowed JAR:", + " * differing-content-2", + "differing-content-2 (SHA256: ed7002b439e9ac845f22357d822bac1444730fbdb6016d3ec9432297b9ec9f73)", + "differing-content-2 (SHA256: aa845861bbd4578700e10487d85b25ead8723ee98fbf143df7b7e0bf1cb3385d)", + ) + } + } + @Test fun manifestRetained() { writeClass() diff --git a/src/main/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/DeduplicatingResourceTransformer.kt b/src/main/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/DeduplicatingResourceTransformer.kt new file mode 100644 index 000000000..3c5498a84 --- /dev/null +++ b/src/main/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/DeduplicatingResourceTransformer.kt @@ -0,0 +1,129 @@ +package com.github.jengelman.gradle.plugins.shadow.transformers + +import com.github.jengelman.gradle.plugins.shadow.tasks.FindResourceInClasspath +import java.io.File +import javax.inject.Inject +import org.apache.commons.codec.digest.DigestUtils +import org.apache.tools.zip.ZipOutputStream +import org.gradle.api.GradleException +import org.gradle.api.file.FileTreeElement +import org.gradle.api.model.ObjectFactory +import org.gradle.api.tasks.Internal +import org.gradle.api.tasks.util.PatternSet + +/** + * Transformer to include files with identical content only once in the shadowed JAR. + * + * Multiple files with the same path but different content lead to an error. + * + * Some scenarios for duplicate resources in a shadow jar: + * + * - Duplicate `.class` files + * Having duplicate `.class` files with different content is a situation indicating that the resulting jar is + * built with _incompatible_ classes, likely leading to issues during runtime. + * This situation can happen when one dependency is (also) included in an uber jar. + * + * - Duplicate `META-INF///pom.properties`/`xml` files. + * Some dependencies contain shaded variants of other dependencies. + * Tools that inspect jar files to extract the included dependencies, for example, for license auditing + * use cases or tools that collect information of all included dependencies, may rely on these files. + * Hence, it is desirable to retain the duplicate resource `pom.properties`/`xml` resources. + * + * [DeduplicatingResourceTransformer] checks all entries in the resulting jar. + * It is generally not recommended to use any of the [include] configuration functions. + * + * There are reasons to retain duplicate resources with different contents in the resulting jar. + * This can be achieved with the [exclude] configuration functions. + * + * To exclude a path or pattern from being deduplicated, for example, legit + * `META-INF///pom.properties`/`xml`, configure the transformer with an exclusion + * like the following: + * + * ```kotlin + * tasks.shadowJar { + * transform(DeduplicatingResourceTransformer::class.java) { + * // Keep pom.* files from different Guava versions in the jar. + * exclude("META-INF/maven/com.google.guava/guava/pom.*") + * // Duplicates with different content for all other resource paths will raise an error. + * } + * } + * ``` + * + * *Tip*: the [FindResourceInClasspath] convenience task can be used to find resources in a Gradle + * classpath/configuration. + * + * *Warning* Do **not** combine [PreserveFirstFoundResourceTransformer] with this transformer, + * as they handle duplicates differently and combining them would lead to redundant or unexpected behavior. + */ +@CacheableTransformer +public open class DeduplicatingResourceTransformer( + final override val objectFactory: ObjectFactory, + patternSet: PatternSet, +) : PatternFilterableResourceTransformer(patternSet) { + @get:Internal + internal val sources: MutableMap = mutableMapOf() + + @Inject + public constructor(objectFactory: ObjectFactory) : this(objectFactory, PatternSet()) + + override fun canTransformResource(element: FileTreeElement): Boolean { + val file = element.file + val hash = file.sha256Hex() + + val pathInfos = sources.computeIfAbsent(element.path) { + PathInfos(patternSpec.isSatisfiedBy(element)) + } + val retainInOutput = pathInfos.addFile(hash, file) + + return !retainInOutput + } + + override fun hasTransformedResource(): Boolean = true + + override fun modifyOutputStream(os: ZipOutputStream, preserveFileTimestamps: Boolean) { + val duplicatePaths = duplicateContentViolations() + + if (duplicatePaths.isNotEmpty()) { + val message = buildString { + append("Found ${duplicatePaths.size} path duplicate(s) with different content in the shadowed JAR:\n") + duplicatePaths.forEach { (path, infos) -> + append(" * $path\n") + infos.filesPerHash.forEach { (hash, files) -> + files.forEach { file -> + append(" * ${file.path} (SHA256: $hash)\n") + } + } + } + } + throw GradleException(message) + } + } + + internal fun duplicateContentViolations(): Map = sources.filter { (_, pathInfos) -> + pathInfos.failOnDuplicateContent && pathInfos.uniqueContentCount() > 1 + } + + internal data class PathInfos(val failOnDuplicateContent: Boolean) { + val filesPerHash: MutableMap> = mutableMapOf() + + fun uniqueContentCount() = filesPerHash.size + + fun addFile(hash: String, file: File): Boolean { + val new = hash !in filesPerHash + filesPerHash.getOrPut(hash) { mutableListOf() }.add(file) + return new + } + } + + internal companion object { + fun File.sha256Hex(): String { + try { + return inputStream().use { + DigestUtils.sha256Hex(it) + } + } catch (e: Exception) { + throw RuntimeException("Failed to read data or calculate hash for $this", e) + } + } + } +} diff --git a/src/main/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/PreserveFirstFoundResourceTransformer.kt b/src/main/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/PreserveFirstFoundResourceTransformer.kt index e0aaaa825..a8b6527f8 100644 --- a/src/main/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/PreserveFirstFoundResourceTransformer.kt +++ b/src/main/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/PreserveFirstFoundResourceTransformer.kt @@ -19,6 +19,13 @@ import org.gradle.api.tasks.util.PatternSet * want to ensure that only the first found resource is included in the final JAR. If there are multiple resources with * the same path in a project and its dependencies, the first one found should be the project's. * + * This transformer deduplicates included resources based on the path name. + * See [DeduplicatingResourceTransformer] for a transformer that deduplicates based on the paths and contents of + * the resources. + * + * *Warning* Do **not** combine [DeduplicatingResourceTransformer] with this transformer, + * as they handle duplicates differently and combining them would lead to redundant or unexpected behavior. + * * @see [DuplicatesStrategy] * @see [ShadowJar.getDuplicatesStrategy] */ diff --git a/src/test/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/DeduplicatingResourceTransformerTest.kt b/src/test/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/DeduplicatingResourceTransformerTest.kt new file mode 100644 index 000000000..eba93f87b --- /dev/null +++ b/src/test/kotlin/com/github/jengelman/gradle/plugins/shadow/transformers/DeduplicatingResourceTransformerTest.kt @@ -0,0 +1,109 @@ +package com.github.jengelman.gradle.plugins.shadow.transformers + +import assertk.assertThat +import assertk.assertions.containsExactlyInAnyOrder +import assertk.assertions.containsOnly +import assertk.assertions.isEqualTo +import assertk.assertions.isFalse +import assertk.assertions.isTrue +import com.github.jengelman.gradle.plugins.shadow.transformers.DeduplicatingResourceTransformer.Companion.sha256Hex +import java.io.File +import java.nio.file.Path +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.io.TempDir +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.ValueSource + +class DeduplicatingResourceTransformerTest : BaseTransformerTest() { + + @TempDir + lateinit var tempDir: Path + + private lateinit var file1: File + private lateinit var file2: File + private lateinit var file3: File + + private var hash1 = "" + private var hash3 = "" + + @BeforeEach + fun setupFiles() { + val content1 = "content1" + val content2 = "content2" + + file1 = tempDir.resolve("file1").toFile().apply { + writeText(content1) + } + file2 = tempDir.resolve("file2").toFile().apply { + writeText(content1) + } + file3 = tempDir.resolve("file3").toFile().apply { + writeText(content2) + } + + hash1 = file1.sha256Hex() + hash3 = file3.sha256Hex() + } + + @ParameterizedTest + @ValueSource(booleans = [false, true]) + fun duplicateContent(exclusionCheck: Boolean) = with(transformer) { + if (!exclusionCheck) { + exclude("multiple-contents") + } + + // new path, new file content --> retain resource + assertThat(canTransformResource("multiple-contents", file1)).isFalse() + // same path, same file content --> skip resource + assertThat(canTransformResource("multiple-contents", file2)).isTrue() + // same path, different file content --> retain resource (even if it's a duplicate) + assertThat(canTransformResource("multiple-contents", file3)).isFalse() + + assertThat(canTransformResource("single-source", file1)).isFalse() + + assertThat(canTransformResource("same-content-twice", file1)).isFalse() + assertThat(canTransformResource("same-content-twice", file2)).isTrue() + + assertThat(canTransformResource("differing-content-2", file1)).isFalse() + assertThat(canTransformResource("differing-content-2", file3)).isFalse() + + assertThat(sources.keys).containsExactlyInAnyOrder( + "multiple-contents", + "single-source", + "same-content-twice", + "differing-content-2", + ) + + val pathInfosMultipleContents = sources.getValue("multiple-contents") + assertThat(pathInfosMultipleContents.failOnDuplicateContent).isEqualTo(exclusionCheck) + assertThat(pathInfosMultipleContents.uniqueContentCount()).isEqualTo(2) + assertThat(pathInfosMultipleContents.filesPerHash).containsOnly( + hash1 to listOf(file1, file2), + hash3 to listOf(file3), + ) + + val pathInfosSingleSource = sources.getValue("single-source") + assertThat(pathInfosSingleSource.failOnDuplicateContent).isTrue() + assertThat(pathInfosSingleSource.uniqueContentCount()).isEqualTo(1) + assertThat(pathInfosSingleSource.filesPerHash).containsOnly(hash1 to listOf(file1)) + + val pathInfosSameContentTwice = sources.getValue("same-content-twice") + assertThat(pathInfosSameContentTwice.failOnDuplicateContent).isTrue() + assertThat(pathInfosSameContentTwice.uniqueContentCount()).isEqualTo(1) + assertThat(pathInfosSameContentTwice.filesPerHash).containsOnly(hash1 to listOf(file1, file2)) + + val pathInfosDifferingContent2 = sources.getValue("differing-content-2") + assertThat(pathInfosDifferingContent2.failOnDuplicateContent).isTrue() + assertThat(pathInfosDifferingContent2.uniqueContentCount()).isEqualTo(2) + assertThat(pathInfosDifferingContent2.filesPerHash).containsOnly(hash1 to listOf(file1), hash3 to listOf(file3)) + + if (exclusionCheck) { + assertThat(duplicateContentViolations()).containsOnly( + "multiple-contents" to pathInfosMultipleContents, + "differing-content-2" to pathInfosDifferingContent2, + ) + } else { + assertThat(duplicateContentViolations()).containsOnly("differing-content-2" to pathInfosDifferingContent2) + } + } +} diff --git a/src/testKit/kotlin/com/github/jengelman/gradle/plugins/shadow/testkit/JarPath.kt b/src/testKit/kotlin/com/github/jengelman/gradle/plugins/shadow/testkit/JarPath.kt index 41b41878f..41426f009 100644 --- a/src/testKit/kotlin/com/github/jengelman/gradle/plugins/shadow/testkit/JarPath.kt +++ b/src/testKit/kotlin/com/github/jengelman/gradle/plugins/shadow/testkit/JarPath.kt @@ -2,12 +2,16 @@ package com.github.jengelman.gradle.plugins.shadow.testkit import assertk.Assert import assertk.assertions.containsAtLeast +import assertk.assertions.containsExactlyInAnyOrder import assertk.assertions.containsNone import assertk.assertions.containsOnly import java.io.InputStream import java.nio.file.Path import java.util.jar.JarFile +import java.util.jar.JarInputStream import java.util.zip.ZipFile +import java.util.zip.ZipInputStream +import kotlin.io.path.inputStream /** * A wrapper for [JarFile] that also implements [Path]. @@ -44,6 +48,25 @@ fun ZipFile.getStream(entryName: String): InputStream { fun Assert.getContent(entryName: String) = transform { it.getContent(entryName) } +/** + * Scans the jar file for all entries that match the specified [entryName]. + * Unlike [getContent] or [getStream], which return only one of the matching entries + * (which one is undefined), this function returns all matching entries. + */ +fun Assert.getContents(entryName: String) = transform { actual -> + JarInputStream(actual.path.inputStream()).use { jarInput -> + val contents = mutableListOf() + while (true) { + val entry = jarInput.nextEntry ?: break + if (entry.name == entryName) { + contents.add(jarInput.readAllBytes().toString(Charsets.UTF_8)) + } + jarInput.closeEntry() + } + contents + } +} + fun Assert.getMainAttr(name: String) = transform { it.getMainAttr(name) } /** @@ -64,6 +87,22 @@ fun Assert.containsNone(vararg entries: String) = toEntries().containsN */ fun Assert.containsOnly(vararg entries: String) = toEntries().containsOnly(*entries) +/** + * Ensures the JAR contains exactly the specified entries, including duplicates, in any order. + * Used alone, without [containsAtLeast] or [containsNone]. + */ +fun Assert.containsExactlyInAnyOrder(vararg entries: String) = transform { actual -> + ZipInputStream(actual.path.inputStream()).use { jarInput -> + val allEntries = mutableListOf() + while (true) { + val entry = jarInput.nextEntry ?: break + allEntries.add(entry.name) + jarInput.closeEntry() + } + allEntries + } +}.containsExactlyInAnyOrder(*entries) + private fun Assert.toEntries() = transform { actual -> actual.entries().toList().map { it.name } }