Skip to content

Commit 34a88a1

Browse files
authored
Merge pull request #64 from lucidsoftware/fix-instance-cache
Fix AnnexScalaInstance jar cache misses due to ordering and path comparison problems
2 parents 547a889 + 5b052ec commit 34a88a1

File tree

2 files changed

+43
-10
lines changed

2 files changed

+43
-10
lines changed

src/main/scala/higherkindness/rules_scala/workers/common/AnnexScalaInstance.scala

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ import java.net.URLClassLoader
77
import java.nio.file.{Files, Path, Paths}
88
import java.util.Properties
99
import java.util.concurrent.ConcurrentHashMap
10-
import scala.collection.immutable.Map
10+
import scala.collection.immutable.TreeMap
1111

1212
object AnnexScalaInstance {
1313
// See the comment on getAnnexScalaInstance as to why this is necessary
14-
private val instanceCache: ConcurrentHashMap[String, AnnexScalaInstance] =
15-
new ConcurrentHashMap[String, AnnexScalaInstance]()
14+
private val instanceCache: ConcurrentHashMap[Set[Path], AnnexScalaInstance] =
15+
new ConcurrentHashMap[Set[Path], AnnexScalaInstance]()
1616

1717
/**
1818
* We only need to care about minimizing the number of AnnexScalaInstances we create if things are being run as a
@@ -48,7 +48,7 @@ object AnnexScalaInstance {
4848
* from a prior compilation in combination with a new ScalaInstance for the current compilation. Or there's some issue
4949
* with Scala's classpath caching.
5050
*
51-
* If all three caches are enabled (this cache + Zinc claspath + Scala compiler), then the non-determinism seems go
51+
* If all three caches are enabled (this cache + Zinc claspath + Scala compiler), then the non-determinism seems to go
5252
* away.
5353
*
5454
* If this cache is not used, but the Zinc classpath + Scala compiler caches are used, then non-determinsm shows up in
@@ -67,11 +67,22 @@ object AnnexScalaInstance {
6767
* any different from leaving Zinc's cache enabled.
6868
*/
6969
private def getAnnexScalaInstance(allJars: Array[File], workDir: Path): AnnexScalaInstance = {
70-
// We need to remove the sandbox prefix from the paths in order to compare them.
70+
// We want to compare short paths to avoid the Bazel sandbox prefix and arch/config specific parts of the path
71+
// We use a set because we don't care about ordering for comparison purposes
7172
val mapBuilder = Map.newBuilder[Path, Path]
73+
val keyBuilder = Set.newBuilder[Path]
74+
val absoluteWorkDir = workDir.toAbsolutePath().normalize()
7275
allJars.foreach { jar =>
73-
val comparableJarPath = jar.toPath().toAbsolutePath().normalize()
74-
mapBuilder.addOne(jar.toPath -> workDir.toAbsolutePath().normalize().relativize(comparableJarPath))
76+
val absoluteJarPath = jar.toPath().toAbsolutePath().normalize()
77+
78+
// Remove the arch/config specific parts of the path.
79+
val comparablePath = FileUtil.bazelShortPath(
80+
// Remove the sandbox prefix from the path
81+
absoluteWorkDir.relativize(absoluteJarPath),
82+
replaceExternal = false,
83+
)
84+
mapBuilder.addOne(jar.toPath -> comparablePath)
85+
keyBuilder.addOne(comparablePath)
7586
}
7687
val workRequestJarToWorkerJar = mapBuilder.result()
7788

@@ -84,10 +95,10 @@ object AnnexScalaInstance {
8495
// but that would require hashing the all the classpath jars on every compilation request. I
8596
// imagine that would cause a performance hit.
8697
//
87-
// I also imagine it is extremeley rare to be mutating the contents of compiler classpath jars
98+
// I also imagine it is extremely rare to be mutating the contents of compiler classpath jars
8899
// while keeping the names the same, e.g., generating new scala library jar for scala 2.13.14.
89100
// As a result I'm leaving this string based for now.
90-
val key = workRequestJarToWorkerJar.values.mkString(":")
101+
val key = keyBuilder.result()
91102

92103
Option(instanceCache.get(key)).getOrElse {
93104
// Copy all the jars to the worker's directory because in a sandboxed world the

src/main/scala/higherkindness/rules_scala/workers/common/FileUtil.scala

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ package workers.common
44
import scala.annotation.tailrec
55
import java.io.{File, IOException}
66
import java.nio.channels.FileChannel
7-
import java.nio.file.{FileAlreadyExistsException, FileVisitResult, Files, Path, SimpleFileVisitor, StandardCopyOption, StandardOpenOption}
7+
import java.nio.file.{FileAlreadyExistsException, FileVisitResult, Files, Path, Paths, SimpleFileVisitor, StandardCopyOption, StandardOpenOption}
88
import java.nio.file.attribute.BasicFileAttributes
99
import java.util.zip.{ZipEntry, ZipInputStream, ZipOutputStream}
1010

@@ -59,6 +59,28 @@ object FileUtil {
5959
path.getFileName().toString().stripPrefix("header_").stripPrefix("processed_")
6060
}
6161

62+
/**
63+
* Given a Bazel path to the bazel output directory, return a path excluding the bazel configuration specific parts of
64+
* the path. This is analogous to Bazel's File.short_path function. Fair warning: this function is super good enough,
65+
* but is likely not perfect.
66+
*/
67+
def bazelShortPath(path: Path, replaceExternal: Boolean = true): Path = {
68+
val nameCount = path.getNameCount()
69+
70+
val shortPath = if (path.startsWith("bazel-out") && nameCount >= 4) {
71+
path.subpath(3, nameCount)
72+
} else {
73+
path
74+
}
75+
76+
// Handle difference between Bazel's external directory being referred to as .. in the short_path
77+
if (replaceExternal && shortPath.subpath(0, 1) == Paths.get("external")) {
78+
Paths.get("..").resolve(shortPath.subpath(1, shortPath.getNameCount() - 1))
79+
} else {
80+
shortPath
81+
}
82+
}
83+
6284
def copy(source: Path, target: Path) = Files.walkFileTree(source, new CopyFileVisitor(source, target))
6385

6486
def delete(path: Path) = Files.walkFileTree(path, new DeleteFileVisitor)

0 commit comments

Comments
 (0)