Skip to content

Commit d827438

Browse files
authored
feat: add checkSortedness option to joinAsOf (#374)
Context: pola-rs/polars#21724 Adds `checkSortedness` option so that warnings can be suppressed when `by` options are provided, since polars is unable to check sortedness. Keeps the default behavior of `true`.
1 parent 07fe862 commit d827438

File tree

4 files changed

+39
-2
lines changed

4 files changed

+39
-2
lines changed

__tests__/dataframe.test.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1933,6 +1933,24 @@ describe("join", () => {
19331933
expect(out.shape).toEqual({ height: 15, width: 4 });
19341934
});
19351935
});
1936+
describe("joinAsOf", () => {
1937+
const df = pl.DataFrame({ a: [1, 1, 1, 2, 2, 2], b: [2, 1, 3, 1, 2, 3] });
1938+
test("errors when not sorted", () => {
1939+
expect(() => df.joinAsof(df, { on: "b" })).toThrow(/sorted/i);
1940+
});
1941+
1942+
test("does not error when not sorted but by is specified", () => {
1943+
expect(() => df.joinAsof(df, { on: "b", by: "a" })).not.toThrow();
1944+
});
1945+
1946+
test("skips sortedness check when checkSortedness is false", () => {
1947+
const df = pl.DataFrame({ a: [1, 1, 1, 2, 2, 2], b: [2, 1, 3, 1, 2, 3] });
1948+
1949+
expect(() =>
1950+
df.joinAsof(df, { on: "b", checkSortedness: false }),
1951+
).not.toThrow();
1952+
});
1953+
});
19361954
describe("io", () => {
19371955
const df = pl.DataFrame([
19381956
pl.Series("foo", [1, 2, 9], pl.Int16),

polars/dataframe.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -990,6 +990,10 @@ export interface DataFrame<S extends Schema = any>
990990
* - "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
991991
* @param options.allowParallel Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel.
992992
* @param options.forceParallel Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel.
993+
* @param options.checkSortedness
994+
* Check the sortedness of the asof keys. If the keys are not sorted Polars
995+
* will error, or in case of 'by' argument raise a warning. This might become
996+
* a hard error in the future.
993997
*
994998
* @example
995999
* ```
@@ -1045,6 +1049,7 @@ export interface DataFrame<S extends Schema = any>
10451049
tolerance?: number | string;
10461050
allowParallel?: boolean;
10471051
forceParallel?: boolean;
1052+
checkSortedness?: boolean;
10481053
},
10491054
): DataFrame;
10501055
lazy(): LazyDataFrame<S>;

polars/lazy/dataframe.ts

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,10 @@ export interface LazyDataFrame<S extends Schema = any>
362362
- "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
363363
@param options.allowParallel Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel.
364364
@param options.forceParallel Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel.
365+
@param options.checkSortedness
366+
Check the sortedness of the asof keys. If the keys are not sorted Polars
367+
will error, or in case of 'by' argument raise a warning. This might become
368+
a hard error in the future.
365369
366370
367371
@example
@@ -418,6 +422,7 @@ export interface LazyDataFrame<S extends Schema = any>
418422
tolerance?: number | string;
419423
allowParallel?: boolean;
420424
forceParallel?: boolean;
425+
checkSortedness?: boolean;
421426
},
422427
): LazyDataFrame;
423428
/**
@@ -1046,9 +1051,16 @@ export const _LazyDataFrame = (_ldf: any): LazyDataFrame => {
10461051
allowParallel: true,
10471052
forceParallel: false,
10481053
strategy: "backward",
1054+
checkSortedness: true,
10491055
...options,
10501056
};
1051-
const { suffix, strategy, allowParallel, forceParallel } = options;
1057+
const {
1058+
suffix,
1059+
strategy,
1060+
allowParallel,
1061+
forceParallel,
1062+
checkSortedness,
1063+
} = options;
10521064
let leftOn: string | undefined;
10531065
let rightOn: string | undefined;
10541066
if (!other?._ldf) {
@@ -1105,6 +1117,7 @@ export const _LazyDataFrame = (_ldf: any): LazyDataFrame => {
11051117
strategy,
11061118
toleranceNum,
11071119
toleranceStr,
1120+
checkSortedness ?? true,
11081121
);
11091122

11101123
return _LazyDataFrame(ldf);

src/lazy/dataframe.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ impl JsLazyFrame {
312312
strategy: String,
313313
tolerance: Option<Wrap<AnyValue<'_>>>,
314314
tolerance_str: Option<String>,
315+
check_sortedness: bool,
315316
) -> JsLazyFrame {
316317
let strategy = match strategy.as_ref() {
317318
"forward" => AsofStrategy::Forward,
@@ -340,7 +341,7 @@ impl JsLazyFrame {
340341
}),
341342
tolerance_str: tolerance_str.map(|s| s.into()),
342343
allow_eq: true,
343-
check_sortedness: true,
344+
check_sortedness,
344345
})))
345346
.suffix(suffix)
346347
.finish()

0 commit comments

Comments
 (0)