Skip to content

Commit

Permalink
Add assemblyCacheDependency task
Browse files Browse the repository at this point in the history
  • Loading branch information
er1c committed Sep 30, 2021
1 parent ca46624 commit a8226a4
Show file tree
Hide file tree
Showing 9 changed files with 255 additions and 126 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,25 @@ lazy val app = (project in file("app"))
)
```


### Unzip Caching

When assembling an über artifact, that has many library dependencies, the unzip process can be very IO intensive. These unzipped directories are very suitable for CI systems to persist in between job runs.

```scala
lazy val app = (project in file("app"))
.settings(
assemblyUnzipDirectory := Some(localCacheDirectory.value / "sbt-assembly" / "dependencies"),
assemblyCacheUnzip := true, // this is the default setting
assemblyCacheUseHardLinks := true, // this is experimental but will use a hard link between the files in assemblyUnzipDirectory to assemblyDirectory to avoid additional copy IO
// more settings here ...
)
```

To populate the assemblyUnzipDirectory without a full assembly:

sbt assemblyCacheDependency

Other Things
------------

Expand Down
274 changes: 177 additions & 97 deletions src/main/scala/sbtassembly/Assembly.scala
Original file line number Diff line number Diff line change
Expand Up @@ -223,43 +223,19 @@ object Assembly {
): Vector[MappingSet] = {
val assemblyDir = ao.assemblyDirectory.get
val assemblyUnzipDir = ao.assemblyUnzipDirectory.getOrElse(assemblyDir)
val isSameDir: Boolean = assemblyDir == assemblyUnzipDir

if (!ao.cacheUnzip) ao.assemblyUnzipDirectory.foreach{ IO.delete }

if (!assemblyDir.exists) IO.createDirectory(assemblyDir)
if (!assemblyUnzipDir.exists) IO.createDirectory(assemblyUnzipDir)

val useHardLinks: Boolean = ao.useHardLinks && !isSameDir && {
val isHardLinkSupported = AssemblyUtils.isHardLinkSupported(sourceDir = assemblyUnzipDir, destDir = assemblyDir)
if (!isHardLinkSupported) log.warn(s"useHardLinks is enabled, but file system doesn't support hardlinks between from $assemblyUnzipDir to $assemblyDir")
isHardLinkSupported
}

val shadeRules = ao.shadeRules

val (libs: Vector[Attributed[File]], dirs: Vector[Attributed[File]]) =
classpath.toVector.sortBy(_.data.getCanonicalPath).partition(c => ClasspathUtilities.isArchive(c.data))

val depLibs: Set[File] = dependencies.map(_.data).toSet.filter(ClasspathUtilities.isArchive)
val excludedJars: Seq[File] = ao.excludedJars map {_.data}

val scalaLibraries: Vector[String] = {
val scalaVersionParts = VersionNumber(ao.scalaVersion)
val isScala213AndLater = scalaVersionParts.numbers.length>=2 && scalaVersionParts._1.get>=2 && scalaVersionParts._2.get>=13
if (isScala213AndLater) scala213AndLaterLibraries else scalaPre213Libraries
}
val (libsFiltered: Vector[Attributed[File]], dirs: Vector[Attributed[File]]) = getFilteredLibsAndDirs(
classpath = classpath,
dependencies = dependencies,
assemblyOption = ao
)

val libsFiltered: Vector[Attributed[File]] = libs flatMap {
case jar if excludedJars contains jar.data.asFile => None
case jar if isScalaLibraryFile(scalaLibraries, jar.data.asFile) =>
if (ao.includeScala) Some(jar) else None
case jar if depLibs contains jar.data.asFile =>
if (ao.includeDependency) Some(jar) else None
case jar =>
if (ao.includeBin) Some(jar) else None
}
val dirRules: Seq[ShadeRule] = shadeRules.filter(_.isApplicableToCompiling)
val dirRules: Seq[ShadeRule] = ao.shadeRules.filter(_.isApplicableToCompiling)
val dirsFiltered: ParVector[File] =
dirs.par flatMap {
case dir =>
Expand All @@ -272,7 +248,7 @@ object Assembly {
if (dest.exists) {
IO.delete(dest)
}
dest.mkdir()
IO.createDirectory(dest)
IO.copyDirectory(dir.data, dest)
if (dirRules.nonEmpty) {
val mappings = ((dest ** (-DirectoryFilter)).get pair relativeTo(dest)) map {
Expand All @@ -283,72 +259,12 @@ object Assembly {
dest
}

val jarDirs: ParVector[(File, File)] =
for(jar <- libsFiltered.par) yield {
val jarName = jar.data.asFile.getName
val jarRules = shadeRules
.filter(r => r.isApplicableToAll ||
jar.metadata.get(moduleID.key)
.map(m => ModuleCoordinate(m.organization, m.name, m.revision))
.exists(r.isApplicableTo))
val hash = sha1name(jar.data) + "_" + sha1content(jar.data) + "_" + sha1rules(jarRules)

val jarNameFinalPath = assemblyDir / (hash + ".jarName")
val jarNameCachePath = assemblyUnzipDir / (hash + ".jarName")
val jarCacheDir = assemblyUnzipDir / hash
val jarDest = assemblyDir / hash
// If the jar name path does not exist, or is not for this jar, unzip the jar
if (!jarNameFinalPath.exists || IO.read(jarNameFinalPath) != jar.data.getCanonicalPath )
{
log.info("Including: %s".format(jarName))

// Copy/Link from cache location if cache exists and is current
if (ao.cacheUnzip &&
jarNameCachePath.exists && IO.read(jarNameCachePath) == jar.data.getCanonicalPath &&
!jarNameFinalPath.exists
) {
if (useHardLinks) log.info("Creating hardlinks from unzip cache: %s".format(jarName))
else log.info("Copying from unzip cache: %s".format(jarName))
AssemblyUtils.copyDirectory(jarCacheDir, jarDest, hardLink = useHardLinks)
IO.delete(jarNameFinalPath) // write after merge/shade rules applied
// Unzip into cache dir and copy over
} else if (ao.cacheUnzip && jarNameFinalPath != jarNameCachePath) {
IO.delete(jarCacheDir)
jarCacheDir.mkdir()

log.info("Unzipping into unzip cache: %s".format(jarName))
AssemblyUtils.unzip(jar.data, jarCacheDir, log)

if (useHardLinks) log.info("Creating hardlinks from unzip cache: %s".format(jarName))
else log.info("Copying from unzip cache: %s".format(jarName))
AssemblyUtils.copyDirectory(jarCacheDir, jarDest, hardLink = useHardLinks)
// Don't use cache dir, just unzip to output cache
} else {
IO.delete(jarDest)
jarDest.mkdir()
log.info("Unzipping into output cache: %s".format(jarName))
AssemblyUtils.unzip(jar.data, jarDest, log)
}

IO.delete(ao.excludedFiles(Seq(jarDest)))
if (jarRules.nonEmpty) {
val mappings = ((jarDest ** (-DirectoryFilter)).get pair relativeTo(jarDest)) map {
case (k, v) => k.toPath -> v
}
Shader.shadeDirectory(dirRules, jarDest.toPath, mappings, ao.level == Level.Debug)
}

// Write the jarNamePath at the end to minimise the chance of having a
// corrupt cache if the user aborts the build midway through
if (jarNameFinalPath != jarNameCachePath && !jarNameCachePath.exists)
IO.write(jarNameCachePath, jar.data.getCanonicalPath, IO.utf8, false)

IO.write(jarNameFinalPath, jar.data.getCanonicalPath, IO.utf8, false)
} else {
log.info("Including from output cache: %s".format(jarName))
}
(jarDest, jar.data)
}
val jarDirs: ParVector[(File, File)] = processDependencyJars(
libsFiltered,
ao,
isCacheOnly = false,
log
)

log.info("Calculate mappings...")
val base: Vector[File] = dirsFiltered.seq ++ (jarDirs map { _._1 })
Expand All @@ -358,6 +274,26 @@ object Assembly {
retval
}

def assemblyCacheDependency(
classpath: Classpath,
dependencies: Classpath,
assemblyOption: AssemblyOption,
log: Logger
): Boolean = {
if (!assemblyOption.cacheUnzip) sys.error("AssemblyOption.cacheUnzip must be true")
if (assemblyOption.assemblyUnzipDirectory.isEmpty) sys.error("AssemblyOption.assemblyUnzipDiretory must be supplied")

val (libsFiltered: Vector[Attributed[File]], _) = getFilteredLibsAndDirs(
classpath = classpath,
dependencies = dependencies,
assemblyOption = assemblyOption
)

processDependencyJars(libsFiltered, assemblyOption, isCacheOnly = true, log)

true
}

def assemblyTask(key: TaskKey[File]): Initialize[Task[File]] = Def.task {
// Run tests if enabled before assembly task
val _ = (test in key).value
Expand All @@ -383,6 +319,21 @@ object Assembly {
)
}

def assemblyCacheDependencyTask(key: TaskKey[File]): Initialize[Task[Boolean]] = Def.task {
val s = (streams in key).value
val ao = (assemblyOption in key).value
val cp = (fullClasspath in assembly).value
val deps = (externalDependencyClasspath in assembly).value

if (!ao.cacheUnzip || ao.assemblyUnzipDirectory.isEmpty) {
if (!ao.cacheUnzip) s.log.warn("AssemblyOption.cacheUnzip must be true. Skipping unzip task.")
if (ao.assemblyUnzipDirectory.isEmpty) s.log.warn("AssemblyOption.assemblyUnzipDirectory must be be supplied. Skipping cache unzip task.")
false
} else {
assemblyCacheDependency(classpath = cp, dependencies = deps, ao, s.log)
}
}

def isSystemJunkFile(fileName: String): Boolean =
fileName.toLowerCase match {
case ".ds_store" | "thumbs.db" => true
Expand Down Expand Up @@ -414,6 +365,135 @@ object Assembly {
def isScalaLibraryFile(scalaLibraries: Vector[String], file: File): Boolean =
scalaLibraries exists { x => file.getName startsWith x }

private[sbtassembly] def processDependencyJars(
libsFiltered: Vector[Attributed[File]],
assemblyOption: AssemblyOption,
isCacheOnly: Boolean,
log: Logger
): ParVector[(File, File)] = {

val defaultAssemblyDir = assemblyOption.assemblyDirectory.get
val assemblyUnzipDir: File = assemblyOption.assemblyUnzipDirectory.getOrElse(defaultAssemblyDir)
val assemblyDir: Option[File] = if (isCacheOnly) None else Some(defaultAssemblyDir)
val isSameDir: Boolean = assemblyDir.exists{ _ == assemblyUnzipDir }

if (!assemblyUnzipDir.exists) IO.createDirectory(assemblyUnzipDir)
if (assemblyDir.isDefined && !assemblyDir.get.exists) IO.createDirectory(assemblyDir.get)

val unzippingIntoMessage: String = if (isCacheOnly && !isSameDir) "unzip cache" else "output cache"

val useHardLinks: Boolean = assemblyOption.cacheUseHardLinks && !isCacheOnly && {
if (isSameDir) {
log.warn(s"cacheUseHardLinks is enabled, but assemblyUnzipDirectory is the same as assemblyDirectory ($assemblyUnzipDirectory)")
false
} else {
val isHardLinkSupported = AssemblyUtils.isHardLinkSupported(sourceDir = assemblyUnzipDir, destDir = assemblyDir.get)
if (!isHardLinkSupported) log.warn(s"cacheUseHardLinks is enabled, but file system doesn't support hardlinks between from $assemblyUnzipDir to ${assemblyDir.get}")
isHardLinkSupported
}
}

for(jar <- libsFiltered.par) yield {
val jarName = jar.data.asFile.getName
val jarRules = assemblyOption.shadeRules
.filter(r => r.isApplicableToAll ||
jar.metadata.get(moduleID.key)
.map(m => ModuleCoordinate(m.organization, m.name, m.revision))
.exists(r.isApplicableTo))
val hash = sha1name(jar.data) + "_" + sha1content(jar.data) + "_" + sha1rules(jarRules)

val jarNameFinalPath = assemblyDir.getOrElse(assemblyUnzipDir) / (hash + ".jarName")
val jarNameCachePath = assemblyUnzipDir / (hash + ".jarName")
val jarCacheDir = assemblyUnzipDir / hash
val jarOutputDir = assemblyDir.getOrElse(assemblyUnzipDir) / hash
// If the jar name path does not exist, or is not for this jar, unzip the jar
if (!jarNameFinalPath.exists || IO.read(jarNameFinalPath) != jar.data.getCanonicalPath )
{
log.info("Including: %s".format(jarName))

// Copy/Link from cache location if cache exists and is current
if (assemblyOption.cacheUnzip &&
jarNameCachePath.exists && IO.read(jarNameCachePath) == jar.data.getCanonicalPath &&
!jarNameFinalPath.exists
) {
if (useHardLinks) log.info("Creating hardlinks from unzip cache: %s".format(jarName))
else log.info("Copying from unzip cache: %s".format(jarName))
AssemblyUtils.copyDirectory(jarCacheDir, jarOutputDir, hardLink = useHardLinks)
IO.delete(jarNameFinalPath) // write after merge/shade rules applied
// Unzip into cache dir and copy over
} else if (assemblyOption.cacheUnzip && jarNameFinalPath != jarNameCachePath) {
IO.delete(jarCacheDir)
IO.createDirectory(jarCacheDir)

log.info("Unzipping into unzip cache: %s".format(jarName))
AssemblyUtils.unzip(jar.data, jarCacheDir, log)

if (useHardLinks) log.info("Creating hardlinks from unzip cache: %s".format(jarName))
else log.info("Copying from unzip cache: %s".format(jarName))
AssemblyUtils.copyDirectory(jarCacheDir, jarOutputDir, hardLink = useHardLinks)
// Don't use cache dir, just unzip to output cache
} else {
IO.delete(jarOutputDir)
IO.createDirectory(jarOutputDir)
log.info("Unzipping into %s: %s".format(unzippingIntoMessage, jarName))
AssemblyUtils.unzip(jar.data, jarOutputDir, log)
}

if (!isCacheOnly) {
IO.delete(assemblyOption.excludedFiles(Seq(jarOutputDir)))
if (jarRules.nonEmpty) {
val mappings = ((jarOutputDir ** (-DirectoryFilter)).get pair relativeTo(jarOutputDir)) map {
case (k, v) => k.toPath -> v
}
val dirRules: Seq[ShadeRule] = assemblyOption.shadeRules.filter(_.isApplicableToCompiling)
Shader.shadeDirectory(dirRules, jarOutputDir.toPath, mappings, assemblyOption.level == Level.Debug)
}
}

// Write the jarNamePath at the end to minimise the chance of having a
// corrupt cache if the user aborts the build midway through
if (jarNameFinalPath != jarNameCachePath && !jarNameCachePath.exists)
IO.write(jarNameCachePath, jar.data.getCanonicalPath, IO.utf8, false)

IO.write(jarNameFinalPath, jar.data.getCanonicalPath, IO.utf8, false)
} else {
if (isCacheOnly) log.info("Unzip cache is up to date for: %s".format(jarName))
else log.info("Including from output cache: %s".format(jarName))
}
(jarOutputDir, jar.data)
}
}

private[sbtassembly] def getFilteredLibsAndDirs(
classpath: Classpath,
dependencies: Classpath,
assemblyOption: AssemblyOption
): (Vector[Attributed[File]], Vector[Attributed[File]]) = {
val (libs: Vector[Attributed[File]], dirs: Vector[Attributed[File]]) =
classpath.toVector.sortBy(_.data.getCanonicalPath).partition(c => ClasspathUtilities.isArchive(c.data))

val depLibs: Set[File] = dependencies.map(_.data).toSet.filter(ClasspathUtilities.isArchive)
val excludedJars: Seq[File] = assemblyOption.excludedJars map {_.data}

val scalaLibraries: Vector[String] = {
val scalaVersionParts = VersionNumber(assemblyOption.scalaVersion)
val isScala213AndLater = scalaVersionParts.numbers.length>=2 && scalaVersionParts._1.get>=2 && scalaVersionParts._2.get>=13
if (isScala213AndLater) scala213AndLaterLibraries else scalaPre213Libraries
}

val libsFiltered: Vector[Attributed[File]] = libs flatMap {
case jar if excludedJars contains jar.data.asFile => None
case jar if isScalaLibraryFile(scalaLibraries, jar.data.asFile) =>
if (assemblyOption.includeScala) Some(jar) else None
case jar if depLibs contains jar.data.asFile =>
if (assemblyOption.includeDependency) Some(jar) else None
case jar =>
if (assemblyOption.includeBin) Some(jar) else None
}

(libsFiltered, dirs)
}

private[sbtassembly] def sha1 = MessageDigest.getInstance("SHA-1")
private[sbtassembly] def sha1content(f: File): String = {
Using.fileInputStream(f) { in =>
Expand Down
3 changes: 3 additions & 0 deletions src/main/scala/sbtassembly/AssemblyKeys.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ trait AssemblyKeys {
lazy val assembleArtifact = settingKey[Boolean]("Enables (true) or disables (false) assembling an artifact")
lazy val assemblyOption = taskKey[AssemblyOption]("Configuration for making a deployable über JAR")
lazy val assembledMappings = taskKey[Seq[MappingSet]]("Keeps track of jar origins for each source")
lazy val assemblyCacheDependency = taskKey[Boolean]("Caches the unzipped products of the dependency JAR files. Requires assemblyCacheUnzip (true) and AssemblyOption.assemblyUnzipCacheDir to be provided.")

lazy val assemblyUnzipDirectory = settingKey[Option[File]]("Specify a directory to unzip the products of dependency JAR files (e.g. assemblyUnzipDirectory := Some(localCacheDirectory.value / \"sbt-assembly\" / \"dependencies\"). Default None (uses default assembly directory).")
lazy val assemblyPackageScala = taskKey[File]("Produces the Scala artifact")
lazy val assemblyPackageDependency = taskKey[File]("Produces the dependency artifact")
lazy val assemblyJarName = taskKey[String]("name of the über jar")
Expand All @@ -22,6 +24,7 @@ trait AssemblyKeys {
lazy val assemblyMaxHashLength = settingKey[Int]("Length of SHA-1 fingerprint used for the assembly file name")
lazy val assemblyCacheUnzip = settingKey[Boolean]("Enables (true) or disables (false) cacheing the unzipped products of the dependency JAR files")
lazy val assemblyCacheOutput = settingKey[Boolean]("Enables (true) or disables (false) cacheing the output if the content has not changed")
lazy val assemblyCacheUseHardLinks = settingKey[Boolean]("Experimental. Enables (true) or disables (false) using Files.createLink from the unzipped dependency cache to the assembly directory. Requires both paths to be on the same physical filesystem. Default false.")
lazy val assemblyPrependShellScript = settingKey[Option[Seq[String]]]("A launch script to prepend to the über JAR")
}

Expand Down
Loading

0 comments on commit a8226a4

Please sign in to comment.