-
Notifications
You must be signed in to change notification settings - Fork 49
Open
Description
In the latest version of the package, dplyr::slice_sample now breaks for n of a million or more. I think this would likely be fixed by using the native duckdb SAMPLE https://duckdb.org/docs/stable/sql/samples for the translation
Current version
library(duckdb)
#> Loading required package: DBI
packageVersion("duckdb")
#> [1] '1.5.0'
con <- dbConnect(duckdb::duckdb(), ":memory:")
person_df <- data.frame(
person_id = 1:200000
)
dbWriteTable(con, "person", person_df)
dplyr::tbl(con, "person") |>
dplyr::slice_sample(n = 1000000) |>
dplyr::show_query()
#> <SQL>
#> SELECT person_id
#> FROM (
#> SELECT person.*, ROW_NUMBER() OVER (ORDER BY RANDOM()) AS col01
#> FROM person
#> ) q01
#> WHERE (col01 <= 1000000)
dplyr::tbl(con, "person") |>
dplyr::slice_sample(n = 1000000)
#> Error in `collect()`:
#> ! Failed to collect lazy table.
#> Caused by error in `duckdb_result()`:
#> ! Invalid Error: Invalid Input Error: Invalid input for arg_min/arg_max: n value must be < 1000000
#> ℹ Context: rapi_execute
#> ℹ Error type: INVALIDCreated on 2026-03-24 with reprex v2.1.1
Older version
library(duckdb)
#> Loading required package: DBI
packageVersion("duckdb")
#> [1] '1.4.1'
con <- dbConnect(duckdb::duckdb(), ":memory:")
person_df <- data.frame(
person_id = 1:200000
)
dbWriteTable(con, "person", person_df)
dplyr::tbl(con, "person") |>
dplyr::slice_sample(n = 1000000) |>
dplyr::show_query()
#> <SQL>
#> SELECT person_id
#> FROM (
#> SELECT person.*, ROW_NUMBER() OVER (ORDER BY RANDOM()) AS col01
#> FROM person
#> ) q01
#> WHERE (col01 <= 1000000)
dplyr::tbl(con, "person") |>
dplyr::slice_sample(n = 1000000)
#> # Source: SQL [?? x 1]
#> # Database: DuckDB 1.4.1 [root@Darwin 25.3.0:R 4.5.2/:memory:]
#> person_id
#> <int>
#> 1 177113
#> 2 30560
#> 3 116833
#> 4 109803
#> 5 2931
#> 6 80664
#> 7 60003
#> 8 27179
#> 9 42264
#> 10 18916
#> # ℹ more rowsCreated on 2026-03-24 with reprex v2.1.1
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels