@@ -423,31 +423,38 @@ public void close()
423423 }
424424
425425 /**
426- * Creates a monotonically increasing dictionary ID by combining timestamp and dictionary ID.
426+ * Custom epoch for dictionary ID timestamps: October 20, 2025 00:00:00 UTC
427+ * Calculated as: Instant.parse("2025-10-20T00:00:00Z").toEpochMilli()
428+ * This allows using signed 32-bit seconds for ±68 years (~1957 to ~2093)
429+ */
430+ public static final long CUSTOM_EPOCH_MILLIS = 1760889600000L ;
431+
432+ /**
433+ * Creates a monotonically increasing dictionary ID by combining timestamp and Zstd dictionary ID.
434+ * This is a public API to support external dictionary imports.
427435 * <p>
428436 * The resulting dictionary ID has the following structure:
429- * - Upper 32 bits: timestamp in minutes (signed int)
430- * - Lower 32 bits: Zstd dictionary ID (unsigned int, passed as long due to Java limitations )
437+ * - Upper 32 bits (first 4 bytes) : timestamp in seconds since custom epoch (signed int)
438+ * - Lower 32 bits (last 4 bytes) : Zstd dictionary ID (unsigned int, passed as long)
431439 * <p>
432- * This ensures dictionary IDs are monotonically increasing over time, which helps to identify
433- * the latest dictionary.
440+ * Custom epoch: October 20, 2025 00:00:00 UTC ({@link #CUSTOM_EPOCH_MILLIS})
441+ * <p>
442+ * Using signed 32-bit seconds allows representing ±2,147,483,648 seconds (±68.1 years),
443+ * giving a valid range from ~1957 to ~2093, which is sufficient for the software's lifespan.
434444 * <p>
435- * The implementation assumes that dictionary training frequency is significantly larger than
436- * every minute, which a healthy system should do. In the scenario when multiple dictionaries
437- * are trained in the same minute (only possible using manual training), there should not be
438- * correctness concerns since the dictionary is attached to the SSTables, but leads to performance
439- * hit from having too many dictionary. Therefore, such scenario should be avoided at the best.
445+ * This ensures dictionary IDs are monotonically increasing over time, helping to identify
446+ * the latest dictionary.
440447 *
441- * @param currentTimeMillis the current time in milliseconds
442- * @param dictId dictionary ID (unsigned 32-bit value represented as long)
448+ * @param currentTimeMillis the current time in milliseconds since Unix epoch
449+ * @param dictId Zstd dictionary ID (unsigned 32-bit value represented as long)
443450 * @return combined dictionary ID that is monotonically increasing over time
444451 */
445- static long makeDictionaryId (long currentTimeMillis , long dictId )
452+ public static long makeDictionaryId (long currentTimeMillis , long dictId )
446453 {
447- // timestamp in minutes since Unix epoch. Good until year 6053
448- long timestampMinutes = currentTimeMillis / 1000 / 60 ;
449- // Convert timestamp to long and shift to upper 32 bits
450- long combined = timestampMinutes << 32 ;
454+ // timestamp in seconds since custom epoch
455+ long timestampSeconds = ( currentTimeMillis - CUSTOM_EPOCH_MILLIS ) / 1000 ;
456+ // Shift timestamp to upper 32 bits
457+ long combined = timestampSeconds << 32 ;
451458
452459 // Add the unsigned int (already as long) to lower 32 bits
453460 combined |= (dictId & 0xFFFFFFFFL );
0 commit comments