Skip to content

Commit 95c575a

Browse files
authored
Added a fallback mechanism for the reading metadata (#1588)
* Force consistent handling of BIGINT as Long across JDBC metadata and MariaDB driver, improve fallback mechanisms for unsupported JDBC metadata methods, and update relevant tests. * Commented out BIGINT type mapping for investigation and adjusted minor code details * Linter
1 parent 0f9062b commit 95c575a

File tree

4 files changed

+129
-34
lines changed

4 files changed

+129
-34
lines changed

dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbType.kt

Lines changed: 83 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,10 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) {
380380
tableColumnMetadata.jdbcType == Types.NUMERIC &&
381381
tableColumnMetadata.javaClassName == "java.lang.Double" -> Double::class
382382

383+
// Force BIGINT to always be Long, regardless of javaClassName
384+
// Some JDBC drivers (e.g., MariaDB) may report Integer for small BIGINT values
385+
// TODO: tableColumnMetadata.jdbcType == Types.BIGINT -> Long::class
386+
383387
else -> jdbcTypeToKTypeMapping[tableColumnMetadata.jdbcType] ?: String::class
384388
}
385389

@@ -402,14 +406,22 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) {
402406
/**
403407
* Retrieves column metadata from a JDBC ResultSet.
404408
*
405-
* By default, this method reads column metadata from [ResultSetMetaData],
406-
* which is fast and supported by most JDBC drivers.
407-
* If the driver does not provide sufficient information (e.g., `isNullable` unknown),
408-
* it falls back to using [DatabaseMetaData.getColumns] for affected columns.
409+
* This method reads column metadata from [ResultSetMetaData] with graceful fallbacks
410+
* for JDBC drivers that throw [java.sql.SQLFeatureNotSupportedException] for certain methods
411+
* (e.g., Apache Hive).
412+
*
413+
* Fallback behavior for unsupported methods:
414+
* - `getColumnName()` → `getColumnLabel()` → `"column_N"`
415+
* - `getTableName()` → extract from column name if contains '.' → `null`
416+
* - `isNullable()` → [DatabaseMetaData.getColumns] → `true` (assume nullable)
417+
* - `getColumnTypeName()` → `"OTHER"`
418+
* - `getColumnType()` → [java.sql.Types.OTHER]
419+
* - `getColumnDisplaySize()` → `0`
420+
* - `getColumnClassName()` → `"java.lang.Object"`
409421
*
410422
* Override this method in subclasses to provide database-specific behavior
411423
* (for example, to disable fallback for databases like Teradata or Oracle
412-
* where `DatabaseMetaData.getColumns` is known to be slow).
424+
* where [DatabaseMetaData.getColumns] is known to be slow).
413425
*
414426
* @param resultSet The [ResultSet] containing query results.
415427
* @return A list of [TableColumnMetadata] objects.
@@ -418,16 +430,44 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) {
418430
val rsMetaData = resultSet.metaData
419431
val connection = resultSet.statement.connection
420432
val dbMetaData = connection.metaData
421-
val catalog = connection.catalog.takeUnless { it.isNullOrBlank() }
422-
val schema = connection.schema.takeUnless { it.isNullOrBlank() }
433+
434+
// Some JDBC drivers (e.g., Hive) throw SQLFeatureNotSupportedException
435+
val catalog = try {
436+
connection.catalog.takeUnless { it.isNullOrBlank() }
437+
} catch (_: Exception) {
438+
null
439+
}
440+
441+
val schema = try {
442+
connection.schema.takeUnless { it.isNullOrBlank() }
443+
} catch (_: Exception) {
444+
null
445+
}
423446

424447
val columnCount = rsMetaData.columnCount
425448
val columns = mutableListOf<TableColumnMetadata>()
426449
val nameCounter = mutableMapOf<String, Int>()
427450

428451
for (index in 1..columnCount) {
429-
val columnName = rsMetaData.getColumnName(index)
430-
val tableName = rsMetaData.getTableName(index)
452+
// Try to getColumnName, fallback to getColumnLabel, then generate name
453+
val columnName = try {
454+
rsMetaData.getColumnName(index)
455+
} catch (_: Exception) {
456+
try {
457+
rsMetaData.getColumnLabel(index)
458+
} catch (_: Exception) {
459+
"column$index"
460+
}
461+
}
462+
463+
// Some JDBC drivers (e.g., Apache Hive) throw SQLFeatureNotSupportedException
464+
val tableName = try {
465+
rsMetaData.getTableName(index).takeUnless { it.isBlank() }
466+
} catch (_: Exception) {
467+
// Fallback: try to extract table name from column name if it contains '.'
468+
val dotIndex = columnName.lastIndexOf('.')
469+
if (dotIndex > 0) columnName.take(dotIndex) else null
470+
}
431471

432472
// Try to detect nullability from ResultSetMetaData
433473
val isNullable = try {
@@ -436,25 +476,48 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) {
436476

437477
ResultSetMetaData.columnNullable -> true
438478

439-
ResultSetMetaData.columnNullableUnknown -> {
440-
// Unknown nullability: assume it nullable, may trigger fallback
441-
true
442-
}
479+
// Unknown nullability: assume it nullable, may trigger fallback
480+
ResultSetMetaData.columnNullableUnknown -> true
443481

444482
else -> true
445483
}
446484
} catch (_: Exception) {
447485
// Some drivers may throw for unsupported features
448-
// In that case, fallback to DatabaseMetaData
449-
dbMetaData.getColumns(catalog, schema, tableName, columnName).use { cols ->
450-
if (cols.next()) !cols.getString("IS_NULLABLE").equals("NO", ignoreCase = true) else true
486+
// Try fallback to DatabaseMetaData, with additional safety
487+
try {
488+
dbMetaData.getColumns(catalog, schema, tableName, columnName).use { cols ->
489+
if (cols.next()) !cols.getString("IS_NULLABLE").equals("NO", ignoreCase = true) else true
490+
}
491+
} catch (_: Exception) {
492+
// Fallback failed, assume nullable as the safest default
493+
true
451494
}
452495
}
453496

454-
val columnType = rsMetaData.getColumnTypeName(index)
455-
val jdbcType = rsMetaData.getColumnType(index)
456-
val displaySize = rsMetaData.getColumnDisplaySize(index)
457-
val javaClassName = rsMetaData.getColumnClassName(index)
497+
// adding fallbacks to avoid SQLException
498+
val columnType = try {
499+
rsMetaData.getColumnTypeName(index)
500+
} catch (_: Exception) {
501+
"OTHER"
502+
}
503+
504+
val jdbcType = try {
505+
rsMetaData.getColumnType(index)
506+
} catch (_: Exception) {
507+
Types.OTHER
508+
}
509+
510+
val displaySize = try {
511+
rsMetaData.getColumnDisplaySize(index)
512+
} catch (_: Exception) {
513+
0
514+
}
515+
516+
val javaClassName = try {
517+
rsMetaData.getColumnClassName(index)
518+
} catch (_: Exception) {
519+
"java.lang.Object"
520+
}
458521

459522
val uniqueName = manageColumnNameDuplication(nameCounter, columnName)
460523

dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/MariaDb.kt

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,22 @@ public object MariaDb : DbType("mariadb") {
1818
get() = "org.mariadb.jdbc.Driver"
1919

2020
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
21+
// Force BIGINT to always be Long, regardless of javaClassName
22+
// MariaDB JDBC driver may report Integer for small BIGINT values
23+
// TODO: investigate the corner case
24+
25+
// if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) {
26+
// val kType = Long::class.createType(nullable = tableColumnMetadata.isNullable)
27+
// return ColumnSchema.Value(kType)
28+
// }
29+
30+
if (tableColumnMetadata.sqlTypeName == "INTEGER UNSIGNED" ||
31+
tableColumnMetadata.sqlTypeName == "INT UNSIGNED"
32+
) {
33+
val kType = Long::class.createType(nullable = tableColumnMetadata.isNullable)
34+
return ColumnSchema.Value(kType)
35+
}
36+
2137
if (tableColumnMetadata.sqlTypeName == "SMALLINT" && tableColumnMetadata.javaClassName == "java.lang.Short") {
2238
val kType = Short::class.createType(nullable = tableColumnMetadata.isNullable)
2339
return ColumnSchema.Value(kType)
@@ -35,6 +51,19 @@ public object MariaDb : DbType("mariadb") {
3551
)
3652

3753
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
54+
// Force BIGINT to always be Long, regardless of javaClassName
55+
// MariaDB JDBC driver may report Integer for small BIGINT values
56+
// TODO: investigate the corner case
57+
// if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) {
58+
// return Long::class.createType(nullable = tableColumnMetadata.isNullable)
59+
// }
60+
61+
if (tableColumnMetadata.sqlTypeName == "INTEGER UNSIGNED" ||
62+
tableColumnMetadata.sqlTypeName == "INT UNSIGNED"
63+
) {
64+
return Long::class.createType(nullable = tableColumnMetadata.isNullable)
65+
}
66+
3867
if (tableColumnMetadata.sqlTypeName == "SMALLINT" && tableColumnMetadata.javaClassName == "java.lang.Short") {
3968
return Short::class.createType(nullable = tableColumnMetadata.isNullable)
4069
}

dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/commonTestScenarios.kt

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,48 +13,51 @@ import java.sql.Connection
1313
import java.sql.ResultSet
1414
import kotlin.reflect.typeOf
1515

16+
private const val TEST_TABLE_NAME = "testtable123"
17+
1618
internal fun inferNullability(connection: Connection) {
19+
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS $TEST_TABLE_NAME") }
20+
1721
// prepare tables and data
1822
@Language("SQL")
1923
val createTestTable1Query = """
20-
CREATE TABLE TestTable1 (
24+
CREATE TABLE $TEST_TABLE_NAME (
2125
id INT PRIMARY KEY,
2226
name VARCHAR(50),
2327
surname VARCHAR(50),
2428
age INT NOT NULL
2529
)
2630
"""
2731

28-
connection.createStatement().execute(createTestTable1Query)
32+
connection.createStatement().use { st -> st.execute(createTestTable1Query) }
2933

3034
connection.createStatement()
31-
.execute("INSERT INTO TestTable1 (id, name, surname, age) VALUES (1, 'John', 'Crawford', 40)")
35+
.execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (1, 'John', 'Crawford', 40)")
3236
connection.createStatement()
33-
.execute("INSERT INTO TestTable1 (id, name, surname, age) VALUES (2, 'Alice', 'Smith', 25)")
37+
.execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (2, 'Alice', 'Smith', 25)")
3438
connection.createStatement()
35-
.execute("INSERT INTO TestTable1 (id, name, surname, age) VALUES (3, 'Bob', 'Johnson', 47)")
39+
.execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (3, 'Bob', 'Johnson', 47)")
3640
connection.createStatement()
37-
.execute("INSERT INTO TestTable1 (id, name, surname, age) VALUES (4, 'Sam', NULL, 15)")
41+
.execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (4, 'Sam', NULL, 15)")
3842

3943
// start testing `readSqlTable` method
4044

4145
// with default inferNullability: Boolean = true
42-
val tableName = "TestTable1"
43-
val df = DataFrame.readSqlTable(connection, tableName)
46+
val df = DataFrame.readSqlTable(connection, TEST_TABLE_NAME)
4447
df.schema().columns["id"]!!.type shouldBe typeOf<Int>()
4548
df.schema().columns["name"]!!.type shouldBe typeOf<String>()
4649
df.schema().columns["surname"]!!.type shouldBe typeOf<String?>()
4750
df.schema().columns["age"]!!.type shouldBe typeOf<Int>()
4851

49-
val dataSchema = DataFrameSchema.readSqlTable(connection, tableName)
52+
val dataSchema = DataFrameSchema.readSqlTable(connection, TEST_TABLE_NAME)
5053
dataSchema.columns.size shouldBe 4
5154
dataSchema.columns["id"]!!.type shouldBe typeOf<Int>()
5255
dataSchema.columns["name"]!!.type shouldBe typeOf<String?>()
5356
dataSchema.columns["surname"]!!.type shouldBe typeOf<String?>()
5457
dataSchema.columns["age"]!!.type shouldBe typeOf<Int>()
5558

5659
// with inferNullability: Boolean = false
57-
val df1 = DataFrame.readSqlTable(connection, tableName, inferNullability = false)
60+
val df1 = DataFrame.readSqlTable(connection, TEST_TABLE_NAME, inferNullability = false)
5861
df1.schema().columns["id"]!!.type shouldBe typeOf<Int>()
5962

6063
// this column changed a type because it doesn't contain nulls
@@ -70,7 +73,7 @@ internal fun inferNullability(connection: Connection) {
7073
@Language("SQL")
7174
val sqlQuery =
7275
"""
73-
SELECT name, surname, age FROM TestTable1
76+
SELECT name, surname, age FROM $TEST_TABLE_NAME
7477
""".trimIndent()
7578

7679
val df2 = DataFrame.readSqlQuery(connection, sqlQuery)
@@ -97,7 +100,7 @@ internal fun inferNullability(connection: Connection) {
97100

98101
connection.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_UPDATABLE).use { st ->
99102
@Language("SQL")
100-
val selectStatement = "SELECT * FROM TestTable1"
103+
val selectStatement = "SELECT * FROM $TEST_TABLE_NAME"
101104

102105
st.executeQuery(selectStatement).use { rs ->
103106
// ith default inferNullability: Boolean = true
@@ -130,7 +133,7 @@ internal fun inferNullability(connection: Connection) {
130133
}
131134
// end testing `readResultSet` method
132135

133-
connection.createStatement().execute("DROP TABLE TestTable1")
136+
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS $TEST_TABLE_NAME") }
134137
}
135138

136139
/**

dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/mariadbTest.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ class MariadbTest {
338338
val result = df1.filter { it[Table1MariaDb::id] == 1 }
339339
result[0][26] shouldBe "textValue1"
340340
val byteArray = "tinyblobValue".toByteArray()
341-
(result[0][22] as Blob).getBytes(1, byteArray.size) contentEquals byteArray
341+
result[0][22] shouldBe byteArray
342342

343343
val schema = DataFrameSchema.readSqlTable(connection, "table1")
344344
schema.columns["id"]!!.type shouldBe typeOf<Int>()

0 commit comments

Comments
 (0)