Skip to content

Commit

Permalink
* Add assemblyUnzipDirectory to AssemblyOption to use a different dir…
Browse files Browse the repository at this point in the history
…ectory for unzipping jars

* Add assemblyCacheDependency task to unzip jar dependencies as a separate task
  • Loading branch information
er1c committed Oct 21, 2021
1 parent b542e09 commit f9e57eb
Show file tree
Hide file tree
Showing 12 changed files with 630 additions and 96 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,25 @@ lazy val app = (project in file("app"))
)
```


### Unzip Caching

When assembling an über artifact, that has many library dependencies, the unzip process can be very IO intensive. These unzipped directories are very suitable for CI systems to persist in between job runs.

```scala
lazy val app = (project in file("app"))
.settings(
assemblyUnzipDirectory := Some(localCacheDirectory.value / "sbt-assembly" / "dependencies"),
assemblyCacheUnzip := true, // this is the default setting
assemblyCacheUseHardLinks := true, // this is experimental but will use a hard link between the files in assemblyUnzipDirectory to assemblyDirectory to avoid additional copy IO
// more settings here ...
)
```

To populate the assemblyUnzipDirectory without a full assembly:

sbt assemblyCacheDependency

Other Things
------------

Expand Down
357 changes: 284 additions & 73 deletions src/main/scala/sbtassembly/Assembly.scala

Large diffs are not rendered by default.

35 changes: 19 additions & 16 deletions src/main/scala/sbtassembly/AssemblyKeys.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,26 @@ import Keys._
import com.eed3si9n.jarjarabrams

trait AssemblyKeys {
lazy val assembly = taskKey[File]("Builds a deployable über JAR")
lazy val assembleArtifact = settingKey[Boolean]("Enables (true) or disables (false) assembling an artifact")
lazy val assemblyOption = taskKey[AssemblyOption]("Configuration for making a deployable über JAR")
lazy val assembledMappings = taskKey[Seq[MappingSet]]("Keeps track of jar origins for each source")
lazy val assembly = taskKey[File]("Builds a deployable über JAR")
lazy val assembleArtifact = settingKey[Boolean]("Enables (true) or disables (false) assembling an artifact")
lazy val assemblyOption = taskKey[AssemblyOption]("Configuration for making a deployable über JAR")
lazy val assembledMappings = taskKey[Seq[MappingSet]]("Keeps track of jar origins for each source")
lazy val assemblyCacheDependency = taskKey[Boolean]("Caches the unzipped products of the dependency JAR files. Requires assemblyCacheUnzip (true) and AssemblyOption.assemblyUnzipCacheDir to be provided.")

lazy val assemblyPackageScala = taskKey[File]("Produces the Scala artifact")
lazy val assemblyPackageDependency = taskKey[File]("Produces the dependency artifact")
lazy val assemblyJarName = taskKey[String]("name of the über jar")
lazy val assemblyDefaultJarName = taskKey[String]("default name of the über jar")
lazy val assemblyOutputPath = taskKey[File]("output path of the über jar")
lazy val assemblyExcludedJars = taskKey[Classpath]("list of excluded jars")
lazy val assemblyMergeStrategy = settingKey[String => MergeStrategy]("mapping from archive member path to merge strategy")
lazy val assemblyShadeRules = settingKey[Seq[jarjarabrams.ShadeRule]]("shading rules backed by jarjar")
lazy val assemblyAppendContentHash = settingKey[Boolean]("Appends SHA-1 fingerprint to the assembly file name")
lazy val assemblyMaxHashLength = settingKey[Int]("Length of SHA-1 fingerprint used for the assembly file name")
lazy val assemblyCacheUnzip = settingKey[Boolean]("Enables (true) or disables (false) cacheing the unzipped products of the dependency JAR files")
lazy val assemblyCacheOutput = settingKey[Boolean]("Enables (true) or disables (false) cacheing the output if the content has not changed")
lazy val assemblyUnzipDirectory = settingKey[Option[File]]("Specify a directory to unzip the products of dependency JAR files (e.g. assemblyUnzipDirectory := Some(localCacheDirectory.value / \"sbt-assembly\" / \"dependencies\"). Default None (uses default assembly directory).")
lazy val assemblyPackageScala = taskKey[File]("Produces the Scala artifact")
lazy val assemblyPackageDependency = taskKey[File]("Produces the dependency artifact")
lazy val assemblyJarName = taskKey[String]("name of the über jar")
lazy val assemblyDefaultJarName = taskKey[String]("default name of the über jar")
lazy val assemblyOutputPath = taskKey[File]("output path of the über jar")
lazy val assemblyExcludedJars = taskKey[Classpath]("list of excluded jars")
lazy val assemblyMergeStrategy = settingKey[String => MergeStrategy]("mapping from archive member path to merge strategy")
lazy val assemblyShadeRules = settingKey[Seq[jarjarabrams.ShadeRule]]("shading rules backed by jarjar")
lazy val assemblyAppendContentHash = settingKey[Boolean]("Appends SHA-1 fingerprint to the assembly file name")
lazy val assemblyMaxHashLength = settingKey[Int]("Length of SHA-1 fingerprint used for the assembly file name")
lazy val assemblyCacheUnzip = settingKey[Boolean]("Enables (true) or disables (false) cacheing the unzipped products of the dependency JAR files")
lazy val assemblyCacheOutput = settingKey[Boolean]("Enables (true) or disables (false) cacheing the output if the content has not changed")
lazy val assemblyCacheUseHardLinks = settingKey[Boolean]("Experimental. Enables (true) or disables (false) using Files.createLink from the unzipped dependency cache to the assembly directory. Requires both paths to be on the same physical filesystem. Default false.")
lazy val assemblyPrependShellScript = settingKey[Option[Seq[String]]]("A launch script to prepend to the über JAR")
}

Expand Down
25 changes: 18 additions & 7 deletions src/main/scala/sbtassembly/AssemblyPlugin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ object AssemblyPlugin extends sbt.AutoPlugin {
import autoImport.{ Assembly => _, baseAssemblySettings => _, _ }

override lazy val globalSettings: Seq[Def.Setting[_]] = Seq(
assemblyUnzipDirectory := None,
assemblyMergeStrategy := MergeStrategy.defaultMergeStrategy,
assemblyShadeRules := Nil,
assemblyExcludedJars := Nil,
Expand All @@ -32,6 +33,7 @@ object AssemblyPlugin extends sbt.AutoPlugin {
assemblyAppendContentHash := false,
assemblyCacheUnzip := true,
assemblyCacheOutput := true,
assemblyCacheUseHardLinks := false,
assemblyPrependShellScript := None
)

Expand All @@ -50,12 +52,13 @@ object AssemblyPlugin extends sbt.AutoPlugin {
)

lazy val baseAssemblySettings: Seq[sbt.Def.Setting[_]] = (Seq(
assembly := Assembly.assemblyTask(assembly).value,
assembledMappings in assembly := Assembly.assembledMappingsTask(assembly).value,
assemblyPackageScala := Assembly.assemblyTask(assemblyPackageScala).value,
assembledMappings in assemblyPackageScala := Assembly.assembledMappingsTask(assemblyPackageScala).value,
assemblyPackageDependency := Assembly.assemblyTask(assemblyPackageDependency).value,
assembledMappings in assemblyPackageDependency := Assembly.assembledMappingsTask(assemblyPackageDependency).value,
assembly := Assembly.assemblyTask(assembly).value,
assembledMappings in assembly := Assembly.assembledMappingsTask(assembly).value,
assemblyPackageScala := Assembly.assemblyTask(assemblyPackageScala).value,
assembledMappings in assemblyPackageScala := Assembly.assembledMappingsTask(assemblyPackageScala).value,
assemblyPackageDependency := Assembly.assemblyTask(assemblyPackageDependency).value,
assembledMappings in assemblyPackageDependency := Assembly.assembledMappingsTask(assemblyPackageDependency).value,
assemblyCacheDependency := Assembly.assemblyCacheDependencyTask(assemblyPackageDependency).value,

// test
test in assembly := { () },
Expand Down Expand Up @@ -94,6 +97,7 @@ object AssemblyPlugin extends sbt.AutoPlugin {
) ++ inTask(assembly)(assemblyOptionSettings)
++ inTask(assemblyPackageScala)(assemblyOptionSettings)
++ inTask(assemblyPackageDependency)(assemblyOptionSettings)
++ inTask(assemblyCacheDependency)(assemblyOptionSettings)
++ Seq(
assemblyOption in assemblyPackageScala ~= {
_.withIncludeBin(false)
Expand All @@ -104,14 +108,20 @@ object AssemblyPlugin extends sbt.AutoPlugin {
_.withIncludeBin(false)
.withIncludeScala(true)
.withIncludeDependency(true)
}
},
assemblyOption in assemblyCacheDependency ~= {
_.withIncludeBin(false)
.withIncludeScala(true)
.withIncludeDependency(true)
},
))

def assemblyOptionSettings: Seq[Setting[_]] = Seq(
assemblyOption := {
val s = streams.value
AssemblyOption()
.withAssemblyDirectory(s.cacheDirectory / "assembly")
.withAssemblyUnzipDirectory(assemblyUnzipDirectory.value)
.withIncludeBin((assembleArtifact in packageBin).value)
.withIncludeScala((assembleArtifact in assemblyPackageScala).value)
.withIncludeDependency((assembleArtifact in assemblyPackageDependency).value)
Expand All @@ -120,6 +130,7 @@ object AssemblyPlugin extends sbt.AutoPlugin {
.withExcludedFiles(Assembly.defaultExcludedFiles)
.withCacheOutput(assemblyCacheOutput.value)
.withCacheUnzip(assemblyCacheUnzip.value)
.withCacheUseHardLinks(assemblyCacheUseHardLinks.value)
.withAppendContentHash(assemblyAppendContentHash.value)
.withPrependShellScript(assemblyPrependShellScript.value)
.withMaxHashLength(assemblyMaxHashLength.?.value)
Expand Down
123 changes: 123 additions & 0 deletions src/main/scala/sbtassembly/AssemblyUtils.scala
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
package sbtassembly

import sbt._

import java.io.{File, InputStream}
import java.util.zip.ZipInputStream
import scala.collection.mutable.HashSet
import ErrorHandling.translate
import PluginCompat._
import Using._

import java.nio.file.{FileSystemException, Files}
import scala.Function.tupled

private[sbtassembly] object AssemblyUtils {
private val PathRE = "([^/]+)/(.*)".r

Expand Down Expand Up @@ -106,4 +110,123 @@ private[sbtassembly] object AssemblyUtils {
}
loop(rootDir, "", Nil).toVector
}


def isHardLinkSupported(sourceDir: File, destDir: File): Boolean = {
assert(sourceDir.isDirectory)
assert(destDir.isDirectory)

withTemporaryFileInDirectory("sbt-assembly", "file", sourceDir) { sourceFile =>
try {
val destFile = destDir / sourceFile.getName
Files.createLink(destFile.toPath, sourceFile.toPath)
IO.delete(destFile)
true
} catch {
case ex: FileSystemException if ex.getMessage().contains("Invalid cross-device link") => false
}
}
}

def withTemporaryFileInDirectory[T](prefix: String, postfix: String, dir: File)(
action: File => T
): T = {
assert(dir.isDirectory)
val file = File.createTempFile(prefix, postfix, dir)
try { action(file) } finally { file.delete(); () }
}

// region copyDirectory

/** This is an experimental port of https://github.com/sbt/io/pull/326 */

def copyDirectory(
source: File,
target: File,
overwrite: Boolean = false,
preserveLastModified: Boolean = false,
preserveExecutable: Boolean = true,
hardLink: Boolean = false
): Unit = {
val sources = PathFinder(source).allPaths pair Path.rebase(source, target)
copy(sources, overwrite, preserveLastModified, preserveExecutable, hardLink)
()
}

def copy(
sources: Traversable[(File, File)],
overwrite: Boolean,
preserveLastModified: Boolean,
preserveExecutable: Boolean,
hardLink: Boolean
): Set[File] =
sources
.map(tupled(copyImpl(overwrite, preserveLastModified, preserveExecutable, hardLink)))
.toSet

private def copyImpl(
overwrite: Boolean,
preserveLastModified: Boolean,
preserveExecutable: Boolean,
hardLink: Boolean
)(from: File, to: File): File = {
if (overwrite || !to.exists || IO.getModifiedTimeOrZero(from) > IO.getModifiedTimeOrZero(to)) {
if (from.isDirectory) {
IO.createDirectory(to)
} else {
IO.createDirectory(to.getParentFile)
copyFile(from, to, preserveLastModified, preserveExecutable, hardLink)
}
}
to
}

def copyFile(
sourceFile: File,
targetFile: File,
preserveLastModified: Boolean,
preserveExecutable: Boolean,
hardLink: Boolean
): Unit = {
// NOTE: when modifying this code, test with larger values of CopySpec.MaxFileSizeBits than default

require(sourceFile.exists, "Source file '" + sourceFile.getAbsolutePath + "' does not exist.")
require(
!sourceFile.isDirectory,
"Source file '" + sourceFile.getAbsolutePath + "' is a directory."
)
if (hardLink) {
if (targetFile.exists) targetFile.delete()
Files.createLink(targetFile.toPath, sourceFile.toPath)
()
} else {
fileInputChannel(sourceFile) { in =>
fileOutputChannel(targetFile) { out =>
// maximum bytes per transfer according to from http://dzone.com/snippets/java-filecopy-using-nio
val max = (64L * 1024 * 1024) - (32 * 1024)
val total = in.size
def loop(offset: Long): Long =
if (offset < total)
loop(offset + out.transferFrom(in, offset, max))
else
offset
val copied = loop(0)
if (copied != in.size)
sys.error(
"Could not copy '" + sourceFile + "' to '" + targetFile + "' (" + copied + "/" + in.size + " bytes copied)"
)
}
}
if (preserveLastModified) {
IO.copyLastModified(sourceFile, targetFile)
()
}
if (preserveExecutable) {
IO.copyExecutable(sourceFile, targetFile)
()
}
}
}

// endregion
}
2 changes: 2 additions & 0 deletions src/sbt-test/caching/caching/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ lazy val root = (project in file(".")).
scalaVersion := "2.11.12",
libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.1" % "test",
libraryDependencies += "ch.qos.logback" % "logback-classic" % "0.9.29" % "runtime",
logLevel := sbt.Level.Info,
logBuffered := false,
assembly / assemblyOption ~= {
_.withCacheOutput(true)
.withCacheUnzip(true)
Expand Down
2 changes: 2 additions & 0 deletions src/sbt-test/caching/caching/project/plugins.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
|Specify this property using the scriptedLaunchOpts -D.""".stripMargin)
else addSbtPlugin("com.eed3si9n" % "sbt-assembly" % pluginVersion)
}

addSbtPlugin("io.github.er1c" % "sbt-scriptedutils" % "0.1.0")
16 changes: 16 additions & 0 deletions src/sbt-test/caching/caching/test
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# check if the file gets created
> clean
> assembly
# Ensure all warnings have time to be printed
$ sleep 500
> checkLogContains Unzipping slf4j-api-1.6.1.jar into output cache
> checkLogContains Unzipping logback-classic-0.9.29.jar into output cache
> checkLogContains Unzipping logback-core-0.9.29.jar into output cache
> checkLogContains Unzipping scala-library-2.11.12.jar into output cache
$ exists target/scala-2.11/foo.jar

# run to cache the hash, then check it's consistent
Expand Down Expand Up @@ -28,3 +34,13 @@ $ delete src/main/resources/foo.txt
> genresource2
> assembly
> check

> clearLog
> assemblyCacheDependency
# Ensure all warnings have time to be printed
$ sleep 1000
> checkLogContains AssemblyOption.assemblyUnzipDirectory must be be supplied. Skipping cache unzip task
> checkLogNotContains Unzipping slf4j-api-1.6.1.jar into unzip cache
> checkLogNotContains Unzipping logback-classic-0.9.29.jar into unzip cache
> checkLogNotContains Unzipping logback-core-0.9.29.jar into unzip cache
> checkLogNotContains Unzipping scala-library-2.11.12.jar into unzip cache
67 changes: 67 additions & 0 deletions src/sbt-test/caching/unzip/build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import org.apache.logging.log4j.Level
import org.apache.logging.log4j.core.{LogEvent => Log4JLogEvent, _}
import org.apache.logging.log4j.core.Filter.Result
import org.apache.logging.log4j.core.appender.AbstractAppender
import org.apache.logging.log4j.core.filter.LevelRangeFilter
import org.apache.logging.log4j.core.layout.PatternLayout

lazy val tempUnzipDir = IO.createTemporaryDirectory

lazy val root = (project in file(".")).
settings(
version := "0.1",
scalaVersion := "2.11.12",
libraryDependencies += "commons-io" % "commons-io" % "2.4",
libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.1" % "test",
libraryDependencies += "ch.qos.logback" % "logback-classic" % "0.9.29" % "runtime",
assembly / assemblyShadeRules := Seq(
ShadeRule
.rename("org.apache.commons.io.**" -> "shadeio.@1")
.inLibrary("commons-io" % "commons-io" % "2.4")
.inProject
),
assemblyUnzipDirectory := Some(tempUnzipDir),
assemblyCacheUseHardLinks := true,
logLevel := sbt.Level.Info,
logBuffered := false,
assembly / assemblyJarName := "foo.jar",
TaskKey[Unit]("checkunzip") := {
val opt = (assembly / assemblyOption).value
val assemblyDir = opt.assemblyDirectory.get
val assemblyUnzipDir = opt.assemblyUnzipDirectory.get
val preShadePath = "org.apache.commons.io".replace('.', java.io.File.separatorChar)
val postShadePath = "shadeio"

val sources = PathFinder(assemblyUnzipDir).allPaths pair Path.rebase(assemblyUnzipDir, assemblyDir)
val ioSources = sources.filter{ case (unzip, _) => unzip.getAbsolutePath.contains(preShadePath) && unzip.isFile }

assert(ioSources.nonEmpty)
sources.map{ _._1 }.foreach{ f => assert(f.exists) }

ioSources.foreach { case (unzipFile, origOutFile) =>
val outputFile = new java.io.File(
origOutFile
.getAbsolutePath
.toString
.replace(preShadePath, postShadePath)
)

assert(unzipFile.exists)
assert(outputFile.exists)
assert(getHashString(unzipFile) != getHashString(outputFile))
}
()
},
TaskKey[Unit]("cleanunzip") := {
IO.delete(tempUnzipDir)
}
)

def getHashString(file: java.io.File): String = {
import java.security.MessageDigest
MessageDigest
.getInstance("SHA-1")
.digest(IO.readBytes(file))
.map( b => "%02x".format(b) )
.mkString
}
Loading

0 comments on commit f9e57eb

Please sign in to comment.