Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
451e92f
Add transformer to deduplicate identical files based content
snazy Nov 15, 2025
a1f55de
review
snazy Nov 18, 2025
eac0124
fix JarPath.getCOntents
snazy Nov 18, 2025
596c529
cleanup
snazy Nov 18, 2025
10fbe9a
simpler hashing + use hex-string
snazy Nov 19, 2025
8705d2b
use the right hex thingy
snazy Nov 19, 2025
55a6dfb
fix test + make clean it's SHA256
snazy Nov 19, 2025
07d4269
Update src/main/kotlin/com/github/jengelman/gradle/plugins/shadow/tra…
snazy Nov 19, 2025
eef142a
Update src/functionalTest/kotlin/com/github/jengelman/gradle/plugins/…
snazy Nov 19, 2025
c9b0bcb
Update src/testKit/kotlin/com/github/jengelman/gradle/plugins/shadow/…
snazy Nov 19, 2025
99ddeda
Update src/testKit/kotlin/com/github/jengelman/gradle/plugins/shadow/…
snazy Nov 19, 2025
fc59d69
add .reset()
snazy Nov 19, 2025
71bbe80
rmi comment
snazy Nov 20, 2025
ef2b6b4
use DigestUtils
snazy Nov 20, 2025
b538fff
review
snazy Nov 20, 2025
53c80ff
Reduce `MessageDigest`
Goooler Nov 21, 2025
5afe7cf
Update `DeduplicatingResourceTransformerTest`
Goooler Nov 21, 2025
9896c55
Update `deduplicatingResourceTransformer`
Goooler Nov 21, 2025
cafc7a6
Update src/testKit/kotlin/com/github/jengelman/gradle/plugins/shadow/…
Goooler Nov 21, 2025
d913899
Update src/main/kotlin/com/github/jengelman/gradle/plugins/shadow/tra…
Goooler Nov 21, 2025
e672648
Update src/main/kotlin/com/github/jengelman/gradle/plugins/shadow/tra…
Goooler Nov 21, 2025
8c1b1c6
flatten
snazy Nov 21, 2025
23ee4ef
use ZIS to get META-INF/MANIFEST.MF and keep functional parity with c…
snazy Nov 21, 2025
d4fd099
close stream
snazy Nov 21, 2025
aceaf89
review
snazy Nov 21, 2025
24e660c
Cleanups
Goooler Dec 5, 2025
76f0768
Simplify the message
Goooler Dec 5, 2025
28d38be
Update src/testKit/kotlin/com/github/jengelman/gradle/plugins/shadow/…
Goooler Dec 5, 2025
471737e
Update src/main/kotlin/com/github/jengelman/gradle/plugins/shadow/tra…
Goooler Dec 5, 2025
cc653e8
Update src/main/kotlin/com/github/jengelman/gradle/plugins/shadow/tra…
Goooler Dec 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions api/shadow.api
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,15 @@ public class com/github/jengelman/gradle/plugins/shadow/transformers/ComponentsX
public final class com/github/jengelman/gradle/plugins/shadow/transformers/ComponentsXmlResourceTransformer$Companion {
}

public class com/github/jengelman/gradle/plugins/shadow/transformers/DeduplicatingResourceTransformer : com/github/jengelman/gradle/plugins/shadow/transformers/PatternFilterableResourceTransformer {
public fun <init> (Lorg/gradle/api/model/ObjectFactory;)V
public fun <init> (Lorg/gradle/api/model/ObjectFactory;Lorg/gradle/api/tasks/util/PatternSet;)V
public fun canTransformResource (Lorg/gradle/api/file/FileTreeElement;)Z
public final fun getObjectFactory ()Lorg/gradle/api/model/ObjectFactory;
public fun hasTransformedResource ()Z
public fun modifyOutputStream (Lorg/apache/tools/zip/ZipOutputStream;Z)V
}

public class com/github/jengelman/gradle/plugins/shadow/transformers/DontIncludeResourceTransformer : com/github/jengelman/gradle/plugins/shadow/transformers/ResourceTransformer {
public fun <init> (Lorg/gradle/api/model/ObjectFactory;)V
public fun canTransformResource (Lorg/gradle/api/file/FileTreeElement;)Z
Expand Down
1 change: 1 addition & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ dependencies {
compileOnly(libs.kotlin.gradlePlugin)
compileOnly(libs.kotlin.reflect)
api(libs.apache.ant) // Types from Ant are exposed in the public API.
implementation(libs.apache.commonsCodec)
implementation(libs.apache.commonsIo)
implementation(libs.apache.log4j)
implementation(libs.asm)
Expand Down
1 change: 1 addition & 0 deletions docs/changes/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
enableKotlinModuleRemapping = false
}
```
- Add `DeduplicatingResourceTransformer` to deduplicate on path _and_ content. ([#1859](https://github.com/GradleUp/shadow/pull/1859))

### Changed

Expand Down
1 change: 1 addition & 0 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ pluginPublish = "2.0.0"

[libraries]
apache-ant = "org.apache.ant:ant:1.10.15"
apache-commonsCodec = "commons-codec:commons-codec:1.20.0"
apache-commonsIo = "commons-io:commons-io:2.21.0"
apache-log4j = "org.apache.logging.log4j:log4j-core:2.25.2"
apache-maven-modelBuilder = "org.apache.maven:maven-model:3.9.11"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@ package com.github.jengelman.gradle.plugins.shadow.transformers

import assertk.all
import assertk.assertThat
import assertk.assertions.contains
import assertk.assertions.containsExactlyInAnyOrder
import assertk.assertions.isEqualTo
import assertk.assertions.isNotEqualTo
import assertk.assertions.isNotNull
import com.github.jengelman.gradle.plugins.shadow.internal.mainClassAttributeKey
import com.github.jengelman.gradle.plugins.shadow.testkit.containsAtLeast
import com.github.jengelman.gradle.plugins.shadow.testkit.containsExactlyInAnyOrder
import com.github.jengelman.gradle.plugins.shadow.testkit.containsOnly
import com.github.jengelman.gradle.plugins.shadow.testkit.getContent
import com.github.jengelman.gradle.plugins.shadow.testkit.getContents
import com.github.jengelman.gradle.plugins.shadow.testkit.getStream
import com.github.jengelman.gradle.plugins.shadow.testkit.invariantEolString
import com.github.jengelman.gradle.plugins.shadow.testkit.requireResourceAsPath
Expand All @@ -20,11 +24,73 @@ import kotlin.io.path.readText
import kotlin.io.path.writeText
import kotlin.reflect.KClass
import org.apache.logging.log4j.core.config.plugins.processor.PluginProcessor.PLUGIN_CACHE_FILE
import org.gradle.testkit.runner.TaskOutcome.FAILED
import org.junit.jupiter.api.Test
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource
import org.junit.jupiter.params.provider.ValueSource

class TransformersTest : BaseTransformerTest() {

@ParameterizedTest
@ValueSource(booleans = [false, true])
fun deduplicatingResourceTransformer(excludeAll: Boolean) {
val one = buildJarOne {
insert("multiple-contents", "content")
insert("single-source", "content")
insert("same-content-twice", "content")
insert("differing-content-2", "content")
}
val two = buildJarTwo {
insert("multiple-contents", "content-is-different")
insert("same-content-twice", "content")
insert("differing-content-2", "content-is-different")
}

projectScript.appendText(
transform<DeduplicatingResourceTransformer>(
dependenciesBlock = implementationFiles(one, two),
transformerBlock = """
exclude('multiple-contents')
${if (excludeAll) "exclude('differing-content-2')" else ""}
""".trimIndent(),
),
)

if (excludeAll) {
runWithSuccess(shadowJarPath)
assertThat(outputShadowedJar).useAll {
containsExactlyInAnyOrder(
// twice:
"multiple-contents",
"multiple-contents",
"single-source",
"same-content-twice",
// twice:
"differing-content-2",
"differing-content-2",
"META-INF/",
"META-INF/MANIFEST.MF",
)
getContents("multiple-contents").containsExactlyInAnyOrder("content", "content-is-different")
getContent("single-source").isEqualTo("content")
getContent("same-content-twice").isEqualTo("content")
getContents("differing-content-2").containsExactlyInAnyOrder("content", "content-is-different")
}
} else {
val buildResult = runWithFailure(shadowJarPath)
assertThat(buildResult).taskOutcomeEquals(shadowJarPath, FAILED)
assertThat(buildResult.output).contains(
// Keep this list approach for Unix/Windows test compatibility.
"Execution failed for task ':shadowJar'.",
"> Found 1 path duplicate(s) with different content in the shadowed JAR:",
" * differing-content-2",
"differing-content-2 (SHA256: ed7002b439e9ac845f22357d822bac1444730fbdb6016d3ec9432297b9ec9f73)",
"differing-content-2 (SHA256: aa845861bbd4578700e10487d85b25ead8723ee98fbf143df7b7e0bf1cb3385d)",
)
}
}

@Test
fun manifestRetained() {
writeClass()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
package com.github.jengelman.gradle.plugins.shadow.transformers

import com.github.jengelman.gradle.plugins.shadow.tasks.FindResourceInClasspath
import java.io.File
import javax.inject.Inject
import org.apache.commons.codec.digest.DigestUtils
import org.apache.tools.zip.ZipOutputStream
import org.gradle.api.GradleException
import org.gradle.api.file.FileTreeElement
import org.gradle.api.model.ObjectFactory
import org.gradle.api.tasks.Internal
import org.gradle.api.tasks.util.PatternSet

/**
* Transformer to include files with identical content only once in the shadowed JAR.
*
* Multiple files with the same path but different content lead to an error.
*
* Some scenarios for duplicate resources in a shadow jar:
*
* - Duplicate `.class` files
* Having duplicate `.class` files with different content is a situation indicating that the resulting jar is
* built with _incompatible_ classes, likely leading to issues during runtime.
* This situation can happen when one dependency is (also) included in an uber jar.
*
* - Duplicate `META-INF/<group-id>/<artifact-id>/pom.properties`/`xml` files.
* Some dependencies contain shaded variants of other dependencies.
* Tools that inspect jar files to extract the included dependencies, for example, for license auditing
* use cases or tools that collect information of all included dependencies, may rely on these files.
* Hence, it is desirable to retain the duplicate resource `pom.properties`/`xml` resources.
*
* [DeduplicatingResourceTransformer] checks all entries in the resulting jar.
* It is generally not recommended to use any of the [include] configuration functions.
*
* There are reasons to retain duplicate resources with different contents in the resulting jar.
* This can be achieved with the [exclude] configuration functions.
*
* To exclude a path or pattern from being deduplicated, for example, legit
* `META-INF/<group-id>/<artifact-id>/pom.properties`/`xml`, configure the transformer with an exclusion
* like the following:
*
* ```kotlin
* tasks.shadowJar {
* transform(DeduplicatingResourceTransformer::class.java) {
* // Keep pom.* files from different Guava versions in the jar.
* exclude("META-INF/maven/com.google.guava/guava/pom.*")
* // Duplicates with different content for all other resource paths will raise an error.
* }
* }
* ```
*
* *Tip*: the [FindResourceInClasspath] convenience task can be used to find resources in a Gradle
* classpath/configuration.
*
* *Warning* Do **not** combine [PreserveFirstFoundResourceTransformer] with this transformer,
* as they handle duplicates differently and combining them would lead to redundant or unexpected behavior.
*/
@CacheableTransformer
public open class DeduplicatingResourceTransformer(
final override val objectFactory: ObjectFactory,
patternSet: PatternSet,
) : PatternFilterableResourceTransformer(patternSet) {
@get:Internal
internal val sources: MutableMap<String, PathInfos> = mutableMapOf()

@Inject
public constructor(objectFactory: ObjectFactory) : this(objectFactory, PatternSet())

override fun canTransformResource(element: FileTreeElement): Boolean {
val file = element.file
val hash = file.sha256Hex()

val pathInfos = sources.computeIfAbsent(element.path) {
PathInfos(patternSpec.isSatisfiedBy(element))
}
val retainInOutput = pathInfos.addFile(hash, file)

return !retainInOutput
}

override fun hasTransformedResource(): Boolean = true

override fun modifyOutputStream(os: ZipOutputStream, preserveFileTimestamps: Boolean) {
val duplicatePaths = duplicateContentViolations()

if (duplicatePaths.isNotEmpty()) {
val message = buildString {
append("Found ${duplicatePaths.size} path duplicate(s) with different content in the shadowed JAR:\n")
duplicatePaths.forEach { (path, infos) ->
append(" * $path\n")
infos.filesPerHash.forEach { (hash, files) ->
files.forEach { file ->
append(" * ${file.path} (SHA256: $hash)\n")
}
}
}
}
throw GradleException(message)
}
}

internal fun duplicateContentViolations(): Map<String, PathInfos> = sources.filter { (_, pathInfos) ->
pathInfos.failOnDuplicateContent && pathInfos.uniqueContentCount() > 1
}

internal data class PathInfos(val failOnDuplicateContent: Boolean) {
val filesPerHash: MutableMap<String, MutableList<File>> = mutableMapOf()

fun uniqueContentCount() = filesPerHash.size

fun addFile(hash: String, file: File): Boolean {
val new = hash !in filesPerHash
filesPerHash.getOrPut(hash) { mutableListOf() }.add(file)
return new
}
}

internal companion object {
fun File.sha256Hex(): String {
try {
return inputStream().use {
DigestUtils.sha256Hex(it)
}
} catch (e: Exception) {
throw RuntimeException("Failed to read data or calculate hash for $this", e)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ import org.gradle.api.tasks.util.PatternSet
* want to ensure that only the first found resource is included in the final JAR. If there are multiple resources with
* the same path in a project and its dependencies, the first one found should be the project's.
*
* This transformer deduplicates included resources based on the path name.
* See [DeduplicatingResourceTransformer] for a transformer that deduplicates based on the paths and contents of
* the resources.
*
* *Warning* Do **not** combine [DeduplicatingResourceTransformer] with this transformer,
* as they handle duplicates differently and combining them would lead to redundant or unexpected behavior.
*
* @see [DuplicatesStrategy]
* @see [ShadowJar.getDuplicatesStrategy]
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package com.github.jengelman.gradle.plugins.shadow.transformers

import assertk.assertThat
import assertk.assertions.containsExactlyInAnyOrder
import assertk.assertions.containsOnly
import assertk.assertions.isEqualTo
import assertk.assertions.isFalse
import assertk.assertions.isTrue
import com.github.jengelman.gradle.plugins.shadow.transformers.DeduplicatingResourceTransformer.Companion.sha256Hex
import java.io.File
import java.nio.file.Path
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.io.TempDir
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.ValueSource

class DeduplicatingResourceTransformerTest : BaseTransformerTest<DeduplicatingResourceTransformer>() {

@TempDir
lateinit var tempDir: Path

private lateinit var file1: File
private lateinit var file2: File
private lateinit var file3: File

private var hash1 = ""
private var hash3 = ""

@BeforeEach
fun setupFiles() {
val content1 = "content1"
val content2 = "content2"

file1 = tempDir.resolve("file1").toFile().apply {
writeText(content1)
}
file2 = tempDir.resolve("file2").toFile().apply {
writeText(content1)
}
file3 = tempDir.resolve("file3").toFile().apply {
writeText(content2)
}

hash1 = file1.sha256Hex()
hash3 = file3.sha256Hex()
}

@ParameterizedTest
@ValueSource(booleans = [false, true])
fun duplicateContent(exclusionCheck: Boolean) = with(transformer) {
if (!exclusionCheck) {
exclude("multiple-contents")
}

// new path, new file content --> retain resource
assertThat(canTransformResource("multiple-contents", file1)).isFalse()
// same path, same file content --> skip resource
assertThat(canTransformResource("multiple-contents", file2)).isTrue()
// same path, different file content --> retain resource (even if it's a duplicate)
assertThat(canTransformResource("multiple-contents", file3)).isFalse()

assertThat(canTransformResource("single-source", file1)).isFalse()

assertThat(canTransformResource("same-content-twice", file1)).isFalse()
assertThat(canTransformResource("same-content-twice", file2)).isTrue()

assertThat(canTransformResource("differing-content-2", file1)).isFalse()
assertThat(canTransformResource("differing-content-2", file3)).isFalse()

assertThat(sources.keys).containsExactlyInAnyOrder(
"multiple-contents",
"single-source",
"same-content-twice",
"differing-content-2",
)

val pathInfosMultipleContents = sources.getValue("multiple-contents")
assertThat(pathInfosMultipleContents.failOnDuplicateContent).isEqualTo(exclusionCheck)
assertThat(pathInfosMultipleContents.uniqueContentCount()).isEqualTo(2)
assertThat(pathInfosMultipleContents.filesPerHash).containsOnly(
hash1 to listOf(file1, file2),
hash3 to listOf(file3),
)

val pathInfosSingleSource = sources.getValue("single-source")
assertThat(pathInfosSingleSource.failOnDuplicateContent).isTrue()
assertThat(pathInfosSingleSource.uniqueContentCount()).isEqualTo(1)
assertThat(pathInfosSingleSource.filesPerHash).containsOnly(hash1 to listOf(file1))

val pathInfosSameContentTwice = sources.getValue("same-content-twice")
assertThat(pathInfosSameContentTwice.failOnDuplicateContent).isTrue()
assertThat(pathInfosSameContentTwice.uniqueContentCount()).isEqualTo(1)
assertThat(pathInfosSameContentTwice.filesPerHash).containsOnly(hash1 to listOf(file1, file2))

val pathInfosDifferingContent2 = sources.getValue("differing-content-2")
assertThat(pathInfosDifferingContent2.failOnDuplicateContent).isTrue()
assertThat(pathInfosDifferingContent2.uniqueContentCount()).isEqualTo(2)
assertThat(pathInfosDifferingContent2.filesPerHash).containsOnly(hash1 to listOf(file1), hash3 to listOf(file3))

if (exclusionCheck) {
assertThat(duplicateContentViolations()).containsOnly(
"multiple-contents" to pathInfosMultipleContents,
"differing-content-2" to pathInfosDifferingContent2,
)
} else {
assertThat(duplicateContentViolations()).containsOnly("differing-content-2" to pathInfosDifferingContent2)
}
}
}
Loading