@@ -18,7 +18,7 @@ package kafka.log.remote
18
18
19
19
import com .github .benmanes .caffeine .cache .{Cache , Caffeine , RemovalCause }
20
20
import kafka .log .UnifiedLog
21
- import kafka .log .remote .RemoteIndexCache .{DirName , remoteLogIndexCacheCleanerThread }
21
+ import kafka .log .remote .RemoteIndexCache .{DirName , offsetFromRemoteIndexFileName , RemoteLogIndexCacheCleanerThread , remoteLogSegmentIdFromRemoteIndexFileName , remoteOffsetIndexFile , remoteTimeIndexFile , remoteTransactionIndexFile }
22
22
import kafka .utils .CoreUtils .{inReadLock , inWriteLock }
23
23
import kafka .utils .{CoreUtils , Logging , threadsafe }
24
24
import org .apache .kafka .common .Uuid
@@ -38,7 +38,60 @@ import java.util.concurrent.locks.ReentrantReadWriteLock
38
38
object RemoteIndexCache {
39
39
val DirName = " remote-log-index-cache"
40
40
val TmpFileSuffix = " .tmp"
41
- val remoteLogIndexCacheCleanerThread = " remote-log-index-cleaner"
41
+ val RemoteLogIndexCacheCleanerThread = " remote-log-index-cleaner"
42
+
43
+ def remoteLogSegmentIdFromRemoteIndexFileName (fileName : String ): Uuid = {
44
+ val underscoreIndex = fileName.indexOf(" _" )
45
+ val dotIndex = fileName.indexOf(" ." )
46
+ Uuid .fromString(fileName.substring(underscoreIndex + 1 , dotIndex))
47
+ }
48
+
49
+ def offsetFromRemoteIndexFileName (fileName : String ): Long = {
50
+ fileName.substring(0 , fileName.indexOf(" _" )).toLong
51
+ }
52
+
53
+ /**
54
+ * Generates prefix for file name for the on-disk representation of remote indexes.
55
+ *
56
+ * Example of file name prefix is 45_dsdsd where 45 represents the base offset for the segment and
57
+ * sdsdsd represents the unique [[RemoteLogSegmentId ]]
58
+ *
59
+ * @param remoteLogSegmentMetadata remote segment for the remote indexes
60
+ * @return string which should be used as prefix for on-disk representation of remote indexes
61
+ */
62
+ private def generateFileNamePrefixForIndex (remoteLogSegmentMetadata : RemoteLogSegmentMetadata ): String = {
63
+ val startOffset = remoteLogSegmentMetadata.startOffset
64
+ val segmentId = remoteLogSegmentMetadata.remoteLogSegmentId().id
65
+ // uuid.toString uses URL encoding which is safe for filenames and URLs.
66
+ s " ${startOffset.toString}_ ${segmentId.toString}"
67
+ }
68
+
69
+ def remoteOffsetIndexFile (dir : File , remoteLogSegmentMetadata : RemoteLogSegmentMetadata ): File = {
70
+ new File (dir, remoteOffsetIndexFileName(remoteLogSegmentMetadata))
71
+ }
72
+
73
+ def remoteOffsetIndexFileName (remoteLogSegmentMetadata : RemoteLogSegmentMetadata ): String = {
74
+ val prefix = generateFileNamePrefixForIndex(remoteLogSegmentMetadata)
75
+ prefix + UnifiedLog .IndexFileSuffix
76
+ }
77
+
78
+ def remoteTimeIndexFile (dir : File , remoteLogSegmentMetadata : RemoteLogSegmentMetadata ): File = {
79
+ new File (dir, remoteTimeIndexFileName(remoteLogSegmentMetadata))
80
+ }
81
+
82
+ def remoteTimeIndexFileName (remoteLogSegmentMetadata : RemoteLogSegmentMetadata ): String = {
83
+ val prefix = generateFileNamePrefixForIndex(remoteLogSegmentMetadata)
84
+ prefix + UnifiedLog .TimeIndexFileSuffix
85
+ }
86
+
87
+ def remoteTransactionIndexFile (dir : File , remoteLogSegmentMetadata : RemoteLogSegmentMetadata ): File = {
88
+ new File (dir, remoteTransactionIndexFileName(remoteLogSegmentMetadata))
89
+ }
90
+
91
+ def remoteTransactionIndexFileName (remoteLogSegmentMetadata : RemoteLogSegmentMetadata ): String = {
92
+ val prefix = generateFileNamePrefixForIndex(remoteLogSegmentMetadata)
93
+ prefix + UnifiedLog .TxnIndexFileSuffix
94
+ }
42
95
}
43
96
44
97
@ threadsafe
@@ -104,12 +157,19 @@ class Entry(val offsetIndex: OffsetIndex, val timeIndex: TimeIndex, val txnIndex
104
157
inWriteLock(lock) {
105
158
// close is no-op if entry is already marked for cleanup. Mmap resources are released during cleanup.
106
159
if (! markedForCleanup) {
107
- Utils .closeQuietly(offsetIndex, " Closing the offset index. " )
108
- Utils .closeQuietly(timeIndex, " Closing the time index. " )
109
- Utils .closeQuietly(txnIndex, " Closing the transaction index. " )
160
+ Utils .closeQuietly(offsetIndex, " offset index" )
161
+ Utils .closeQuietly(timeIndex, " time index" )
162
+ Utils .closeQuietly(txnIndex, " transaction index" )
110
163
}
111
164
}
112
165
}
166
+
167
+ override def toString : String = {
168
+ s " RemoteIndexCacheEntry( " +
169
+ s " timeIndex= ${timeIndex.file.getName}, " +
170
+ s " txnIndex= ${txnIndex.file.getName}, " +
171
+ s " offsetIndex= ${offsetIndex.file.getName}) "
172
+ }
113
173
}
114
174
115
175
/**
@@ -168,11 +228,11 @@ class RemoteIndexCache(maxSize: Int = 1024, remoteStorageManager: RemoteStorageM
168
228
.maximumSize(maxSize)
169
229
// removeListener is invoked when either the entry is invalidated (means manual removal by the caller) or
170
230
// evicted (means removal due to the policy)
171
- .removalListener((_ : Uuid , entry : Entry , _ : RemovalCause ) => {
231
+ .removalListener((key : Uuid , entry : Entry , _ : RemovalCause ) => {
172
232
// Mark the entries for cleanup and add them to the queue to be garbage collected later by the background thread.
173
233
entry.markForCleanup()
174
234
if (! expiredIndexes.offer(entry)) {
175
- error(s " Error while inserting entry $entry into the cleaner queue " )
235
+ error(s " Error while inserting entry $entry for key $key into the cleaner queue because queue is full. " )
176
236
}
177
237
})
178
238
.build[Uuid , Entry ]()
@@ -198,25 +258,22 @@ class RemoteIndexCache(maxSize: Int = 1024, remoteStorageManager: RemoteStorageM
198
258
})
199
259
200
260
Files .list(cacheDir.toPath).forEach((path: Path ) => {
201
- val pathStr = path.getFileName.toString
202
- val name = pathStr.substring(0 , pathStr.lastIndexOf(" _" ) + 1 )
203
-
204
- // Create entries for each path if all the index files exist.
205
- val firstIndex = name.indexOf('_' )
206
- val offset = name.substring(0 , firstIndex).toInt
207
- val uuid = Uuid .fromString(name.substring(firstIndex + 1 , name.lastIndexOf('_' )))
208
-
261
+ val indexFileName = path.getFileName.toString
262
+ val uuid = remoteLogSegmentIdFromRemoteIndexFileName(indexFileName)
209
263
// It is safe to update the internalCache non-atomically here since this function is always called by a single
210
264
// thread only.
211
265
if (! internalCache.asMap().containsKey(uuid)) {
212
- val offsetIndexFile = new File (cacheDir, name + UnifiedLog .IndexFileSuffix )
213
- val timestampIndexFile = new File (cacheDir, name + UnifiedLog .TimeIndexFileSuffix )
214
- val txnIndexFile = new File (cacheDir, name + UnifiedLog .TxnIndexFileSuffix )
266
+ val fileNameWithoutDotExtensions = indexFileName.substring(0 , indexFileName.indexOf(" ." ))
267
+ val offsetIndexFile = new File (cacheDir, fileNameWithoutDotExtensions + UnifiedLog .IndexFileSuffix )
268
+ val timestampIndexFile = new File (cacheDir, fileNameWithoutDotExtensions + UnifiedLog .TimeIndexFileSuffix )
269
+ val txnIndexFile = new File (cacheDir, fileNameWithoutDotExtensions + UnifiedLog .TxnIndexFileSuffix )
215
270
271
+ // Create entries for each path if all the index files exist.
216
272
if (Files .exists(offsetIndexFile.toPath) &&
217
273
Files .exists(timestampIndexFile.toPath) &&
218
274
Files .exists(txnIndexFile.toPath)) {
219
275
276
+ val offset = offsetFromRemoteIndexFileName(indexFileName)
220
277
val offsetIndex = new OffsetIndex (offsetIndexFile, offset, Int .MaxValue , false )
221
278
offsetIndex.sanityCheck()
222
279
@@ -240,26 +297,29 @@ class RemoteIndexCache(maxSize: Int = 1024, remoteStorageManager: RemoteStorageM
240
297
init()
241
298
242
299
// Start cleaner thread that will clean the expired entries
243
- private [remote] var cleanerThread : ShutdownableThread = new ShutdownableThread (remoteLogIndexCacheCleanerThread ) {
300
+ private [remote] var cleanerThread : ShutdownableThread = new ShutdownableThread (RemoteLogIndexCacheCleanerThread ) {
244
301
setDaemon(true )
245
302
246
303
override def doWork (): Unit = {
304
+ var expiredEntryOpt : Option [Entry ] = None
247
305
try {
248
- while ( ! isRemoteIndexCacheClosed.get ()) {
249
- val entry = expiredIndexes.take()
250
- debug(s " Cleaning up index entry $entry " )
251
- entry .cleanup()
252
- }
306
+ expiredEntryOpt = Some (expiredIndexes.take ())
307
+ expiredEntryOpt.foreach( expiredEntry => {
308
+ log. debug(s " Cleaning up index entry $expiredEntry " )
309
+ expiredEntry .cleanup()
310
+ })
253
311
} catch {
254
- case ex : InterruptedException =>
312
+ case ie : InterruptedException =>
255
313
// cleaner thread should only be interrupted when cache is being closed, else it's an error
256
314
if (! isRemoteIndexCacheClosed.get()) {
257
- error(" Cleaner thread received interruption but remote index cache is not closed" , ex)
258
- throw ex
315
+ log.error(" Cleaner thread received interruption but remote index cache is not closed" , ie)
316
+ // propagate the InterruptedException outside to correctly close the thread.
317
+ throw ie
259
318
} else {
260
- debug(" Cleaner thread was interrupted on cache shutdown" )
319
+ log. debug(" Cleaner thread was interrupted on cache shutdown" )
261
320
}
262
- case ex : Exception => error(" Error occurred while fetching/cleaning up expired entry" , ex)
321
+ // do not exit for exceptions other than InterruptedException
322
+ case ex : Throwable => log.error(s " Error occurred while cleaning up expired entry $expiredEntryOpt" , ex)
263
323
}
264
324
}
265
325
}
@@ -273,16 +333,20 @@ class RemoteIndexCache(maxSize: Int = 1024, remoteStorageManager: RemoteStorageM
273
333
}
274
334
275
335
inReadLock(lock) {
336
+ // while this thread was waiting for lock, another thread may have changed the value of isRemoteIndexCacheClosed.
337
+ // check for index close again
338
+ if (isRemoteIndexCacheClosed.get()) {
339
+ throw new IllegalStateException (s " Unable to fetch index for " +
340
+ s " segment id= ${remoteLogSegmentMetadata.remoteLogSegmentId().id()}. Index instance is already closed. " )
341
+ }
342
+
276
343
val cacheKey = remoteLogSegmentMetadata.remoteLogSegmentId().id()
277
- internalCache.get(cacheKey, (uuid : Uuid ) => {
278
- def loadIndexFile [T ](fileName : String ,
279
- suffix : String ,
344
+ internalCache.get(cacheKey, (_ : Uuid ) => {
345
+ def loadIndexFile [T ](indexFile : File ,
280
346
fetchRemoteIndex : RemoteLogSegmentMetadata => InputStream ,
281
347
readIndex : File => T ): T = {
282
- val indexFile = new File (cacheDir, fileName + suffix)
283
-
284
348
def fetchAndCreateIndex (): T = {
285
- val tmpIndexFile = new File (cacheDir, fileName + suffix + RemoteIndexCache .TmpFileSuffix )
349
+ val tmpIndexFile = new File (cacheDir, indexFile.getName + RemoteIndexCache .TmpFileSuffix )
286
350
287
351
val inputStream = fetchRemoteIndex(remoteLogSegmentMetadata)
288
352
try {
@@ -311,26 +375,26 @@ class RemoteIndexCache(maxSize: Int = 1024, remoteStorageManager: RemoteStorageM
311
375
}
312
376
313
377
val startOffset = remoteLogSegmentMetadata.startOffset()
314
- // uuid.toString uses URL encoding which is safe for filenames and URLs.
315
- val fileName = startOffset.toString + " _" + uuid.toString + " _"
316
-
317
- val offsetIndex : OffsetIndex = loadIndexFile(fileName, UnifiedLog .IndexFileSuffix ,
378
+ val offsetIndexFile = remoteOffsetIndexFile(cacheDir, remoteLogSegmentMetadata)
379
+ val offsetIndex : OffsetIndex = loadIndexFile(offsetIndexFile,
318
380
rlsMetadata => remoteStorageManager.fetchIndex(rlsMetadata, IndexType .OFFSET ),
319
381
file => {
320
382
val index = new OffsetIndex (file, startOffset, Int .MaxValue , false )
321
383
index.sanityCheck()
322
384
index
323
385
})
324
386
325
- val timeIndex : TimeIndex = loadIndexFile(fileName, UnifiedLog .TimeIndexFileSuffix ,
387
+ val timeIndexFile = remoteTimeIndexFile(cacheDir, remoteLogSegmentMetadata)
388
+ val timeIndex : TimeIndex = loadIndexFile(timeIndexFile,
326
389
rlsMetadata => remoteStorageManager.fetchIndex(rlsMetadata, IndexType .TIMESTAMP ),
327
390
file => {
328
391
val index = new TimeIndex (file, startOffset, Int .MaxValue , false )
329
392
index.sanityCheck()
330
393
index
331
394
})
332
395
333
- val txnIndex : TransactionIndex = loadIndexFile(fileName, UnifiedLog .TxnIndexFileSuffix ,
396
+ val txnIndexFile = remoteTransactionIndexFile(cacheDir, remoteLogSegmentMetadata)
397
+ val txnIndex : TransactionIndex = loadIndexFile(txnIndexFile,
334
398
rlsMetadata => remoteStorageManager.fetchIndex(rlsMetadata, IndexType .TRANSACTION ),
335
399
file => {
336
400
val index = new TransactionIndex (startOffset, file)
@@ -367,7 +431,7 @@ class RemoteIndexCache(maxSize: Int = 1024, remoteStorageManager: RemoteStorageM
367
431
info(s " Close initiated for RemoteIndexCache. Cache stats= ${internalCache.stats}. " +
368
432
s " Cache entries pending delete= ${expiredIndexes.size()}" )
369
433
// Initiate shutdown for cleaning thread
370
- val shutdownRequired = cleanerThread.initiateShutdown()
434
+ val shutdownRequired = cleanerThread.initiateShutdown
371
435
// Close all the opened indexes to force unload mmap memory. This does not delete the index files from disk.
372
436
internalCache.asMap().forEach((_, entry) => entry.close())
373
437
// wait for cleaner thread to shutdown
@@ -379,4 +443,4 @@ class RemoteIndexCache(maxSize: Int = 1024, remoteStorageManager: RemoteStorageM
379
443
}
380
444
}
381
445
}
382
- }
446
+ }
0 commit comments