Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ val modulesUsingJava11 = with(projects) {
dataframeJupyter,
dataframeGeo,
examples.ideaExamples.titanic,
examples.ideaExamples.unsupportedDataSources,
tests,
plugins.dataframeGradlePlugin,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ dependencies {
implementation(libs.exposed.json)
implementation(libs.exposed.money)

// Hibernate + H2 + HikariCP (for Hibernate example)
implementation(libs.hibernate.core)
implementation(libs.hibernate.hikaricp)
implementation(libs.hikaricp)

implementation(libs.h2db)
implementation(libs.sl4jsimple)

// (kotlin) spark support
implementation(libs.kotlin.spark)
compileOnly(libs.spark)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package org.jetbrains.kotlinx.dataframe.examples.hibernate

import jakarta.persistence.Column
import jakarta.persistence.Entity
import jakarta.persistence.GeneratedValue
import jakarta.persistence.GenerationType
import jakarta.persistence.Id
import jakarta.persistence.Table
import org.jetbrains.kotlinx.dataframe.annotations.ColumnName
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema

@Entity
@Table(name = "Albums")
class AlbumsEntity(
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "AlbumId")
var albumId: Int? = null,

@Column(name = "Title", length = 160, nullable = false)
var title: String = "",

@Column(name = "ArtistId", nullable = false)
var artistId: Int = 0,
)

@Entity
@Table(name = "Artists")
class ArtistsEntity(
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "ArtistId")
var artistId: Int? = null,

@Column(name = "Name", length = 120, nullable = false)
var name: String = "",
)

@Entity
@Table(name = "Customers")
class CustomersEntity(
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "CustomerId")
var customerId: Int? = null,

@Column(name = "FirstName", length = 40, nullable = false)
var firstName: String = "",

@Column(name = "LastName", length = 20, nullable = false)
var lastName: String = "",

@Column(name = "Company", length = 80)
var company: String? = null,

@Column(name = "Address", length = 70)
var address: String? = null,

@Column(name = "City", length = 40)
var city: String? = null,

@Column(name = "State", length = 40)
var state: String? = null,

@Column(name = "Country", length = 40)
var country: String? = null,

@Column(name = "PostalCode", length = 10)
var postalCode: String? = null,

@Column(name = "Phone", length = 24)
var phone: String? = null,

@Column(name = "Fax", length = 24)
var fax: String? = null,

@Column(name = "Email", length = 60, nullable = false)
var email: String = "",

@Column(name = "SupportRepId")
var supportRepId: Int? = null,
)

// DataFrame schema to get typed accessors similar to Exposed example
@DataSchema
data class DfCustomers(
@ColumnName("Address") val address: String?,
@ColumnName("City") val city: String?,
@ColumnName("Company") val company: String?,
@ColumnName("Country") val country: String?,
@ColumnName("CustomerId") val customerId: Int,
@ColumnName("Email") val email: String,
@ColumnName("Fax") val fax: String?,
@ColumnName("FirstName") val firstName: String,
@ColumnName("LastName") val lastName: String,
@ColumnName("Phone") val phone: String?,
@ColumnName("PostalCode") val postalCode: String?,
@ColumnName("State") val state: String?,
@ColumnName("SupportRepId") val supportRepId: Int?,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
package org.jetbrains.kotlinx.dataframe.examples.hibernate

import jakarta.persistence.Tuple
import jakarta.persistence.criteria.CriteriaBuilder
import jakarta.persistence.criteria.CriteriaDelete
import jakarta.persistence.criteria.CriteriaQuery
import jakarta.persistence.criteria.Expression
import jakarta.persistence.criteria.Root
import org.hibernate.FlushMode
import org.hibernate.SessionFactory
import org.hibernate.cfg.Configuration
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.api.asSequence
import org.jetbrains.kotlinx.dataframe.api.count
import org.jetbrains.kotlinx.dataframe.api.describe
import org.jetbrains.kotlinx.dataframe.api.groupBy
import org.jetbrains.kotlinx.dataframe.api.print
import org.jetbrains.kotlinx.dataframe.api.sortByDesc
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
import org.jetbrains.kotlinx.dataframe.size

/**
* Example showing Kotlin DataFrame with Hibernate ORM + H2 in-memory DB.
* Mirrors logic from the Exposed example: load data, convert to DataFrame, group/describe, write back.
*/
fun main() {
val sessionFactory: SessionFactory = buildSessionFactory()

sessionFactory.insertSampleData()

val df = sessionFactory.loadCustomersAsDataFrame()

// Pure Hibernate + Criteria API approach for counting customers per country
println("=== Hibernate + Criteria API Approach ===")
sessionFactory.countCustomersPerCountryWithHibernate()

println("\n=== DataFrame Approach ===")
df.analyzeAndPrintResults()

sessionFactory.replaceCustomersFromDataFrame(df)

sessionFactory.close()
}

private fun SessionFactory.insertSampleData() {
withTransaction { session ->
// a few artists and albums (minimal, not used further; just demo schema)
val artist1 = ArtistsEntity(name = "AC/DC")
val artist2 = ArtistsEntity(name = "Queen")
session.persist(artist1)
session.persist(artist2)
session.flush()

session.persist(AlbumsEntity(title = "High Voltage", artistId = artist1.artistId!!))
session.persist(AlbumsEntity(title = "Back in Black", artistId = artist1.artistId!!))
session.persist(AlbumsEntity(title = "A Night at the Opera", artistId = artist2.artistId!!))
// customers we'll analyze using DataFrame
session.persist(
CustomersEntity(
firstName = "John",
lastName = "Doe",
email = "[email protected]",
country = "USA",
),
)
session.persist(
CustomersEntity(
firstName = "Jane",
lastName = "Smith",
email = "[email protected]",
country = "USA",
),
)
session.persist(
CustomersEntity(
firstName = "Alice",
lastName = "Wang",
email = "[email protected]",
country = "Canada",
),
)
}
}

private fun SessionFactory.loadCustomersAsDataFrame(): DataFrame<DfCustomers> {
return withReadOnlyTransaction { session ->
val criteriaBuilder: CriteriaBuilder = session.criteriaBuilder
val criteriaQuery: CriteriaQuery<CustomersEntity> = criteriaBuilder.createQuery(CustomersEntity::class.java)
val root: Root<CustomersEntity> = criteriaQuery.from(CustomersEntity::class.java)
criteriaQuery.select(root)

session.createQuery(criteriaQuery)
.resultList
.map { c ->
DfCustomers(
address = c.address,
city = c.city,
company = c.company,
country = c.country,
customerId = c.customerId ?: -1,
email = c.email,
fax = c.fax,
firstName = c.firstName,
lastName = c.lastName,
phone = c.phone,
postalCode = c.postalCode,
state = c.state,
supportRepId = c.supportRepId,
)
}
.toDataFrame()
}
}

/** DTO used for aggregation projection. */
private data class CountryCountDto(
val country: String,
val customerCount: Long,
)

/**
* **Hibernate + Criteria API:**
* - ✅ Database-level aggregation (efficient)
* - ✅ Type-safe queries
* - ❌ Verbose syntax
* - ❌ Limited to SQL-like operations
*/
private fun SessionFactory.countCustomersPerCountryWithHibernate() {
withReadOnlyTransaction { session ->
val cb = session.criteriaBuilder
val cq: CriteriaQuery<CountryCountDto> = cb.createQuery(CountryCountDto::class.java)
val root: Root<CustomersEntity> = cq.from(CustomersEntity::class.java)

val countryPath = root.get<String>("country")
val idPath = root.get<Long>("customerId")

val countExpr = cb.count(idPath)

cq.select(
cb.construct(
CountryCountDto::class.java,
countryPath, // country
countExpr, // customerCount
),
)
cq.groupBy(countryPath)
cq.orderBy(cb.desc(countExpr))

val results = session.createQuery(cq).resultList
results.forEach { dto ->
println("${dto.country}: ${dto.customerCount} customers")
}
}
}

/**
* **DataFrame approach: **
* - ✅ Rich analytical operations
* - ✅ Fluent, readable API
* - ✅ Flexible data transformations
* - ❌ In-memory processing (less efficient for large datasets)
*/
private fun DataFrame<DfCustomers>.analyzeAndPrintResults() {
println(size())

// same operation as Exposed example: customers per country
groupBy { country }.count()
.sortByDesc { "count"<Int>() }
.print(columnTypes = true, borders = true)

// general statistics
describe()
.print(columnTypes = true, borders = true)
}

private fun SessionFactory.replaceCustomersFromDataFrame(df: DataFrame<DfCustomers>) {
withTransaction { session ->
val criteriaBuilder: CriteriaBuilder = session.criteriaBuilder
val criteriaDelete: CriteriaDelete<CustomersEntity> =
criteriaBuilder.createCriteriaDelete(CustomersEntity::class.java)
criteriaDelete.from(CustomersEntity::class.java)

session.createMutationQuery(criteriaDelete).executeUpdate()
}

withTransaction { session ->
df.asSequence().forEach { row ->
session.persist(row.toCustomersEntity())
}
}
}

private fun DataRow<DfCustomers>.toCustomersEntity(): CustomersEntity {
return CustomersEntity(
customerId = null, // let DB generate
firstName = this.firstName,
lastName = this.lastName,
company = this.company,
address = this.address,
city = this.city,
state = this.state,
country = this.country,
postalCode = this.postalCode,
phone = this.phone,
fax = this.fax,
email = this.email,
supportRepId = this.supportRepId,
)
}

private inline fun <T> SessionFactory.withSession(block: (session: org.hibernate.Session) -> T): T {
return openSession().use(block)
}

private inline fun SessionFactory.withTransaction(block: (session: org.hibernate.Session) -> Unit) {
withSession { session ->
session.beginTransaction()
try {
block(session)
session.transaction.commit()
} catch (e: Exception) {
session.transaction.rollback()
throw e
}
}
}

/** Read-only transaction helper for SELECT queries to minimize overhead. */
private inline fun <T> SessionFactory.withReadOnlyTransaction(block: (session: org.hibernate.Session) -> T): T {
return withSession { session ->
session.beginTransaction()
// Minimize overhead for read operations
session.isDefaultReadOnly = true
session.hibernateFlushMode = FlushMode.MANUAL
try {
val result = block(session)
session.transaction.commit()
result
} catch (e: Exception) {
session.transaction.rollback()
throw e
}
}
}


private fun buildSessionFactory(): SessionFactory {
// Load configuration from resources/hibernate/hibernate.cfg.xml
return Configuration().configure("hibernate/hibernate.cfg.xml").buildSessionFactory()
}
Loading