Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 68 additions & 18 deletions src/checkedframe/_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,6 @@ class S(cf.Schema):
customer_id: 2 error(s)
- `nullable=False` failed for 1 / 2 (50.00%) rows: Must not be null
- is_not_null failed for 1 / 2 (50.00%) rows: Must not be null

"""
return Check(
func=_is_not_null,
Expand Down Expand Up @@ -615,7 +614,6 @@ class S(cf.Schema):
SchemaError: Found 1 error(s)
balances: 1 error(s)
- is_not_nan failed for 1 / 3 (33.33%) rows: Must not be NaN

"""
return Check(
func=_is_not_nan,
Expand Down Expand Up @@ -656,7 +654,6 @@ class S(cf.Schema):
SchemaError: Found 1 error(s)
balances: 1 error(s)
- is_not_inf failed for 1 / 3 (33.33%) rows: Must not be inf/-inf

"""
return Check(
func=_is_not_inf,
Expand Down Expand Up @@ -721,7 +718,6 @@ class S(cf.Schema):
- is_between failed for 1 / 3 (33.33%) rows: Must be in range [0, 128]
med_balance: 1 error(s)
- is_between failed for 2 / 3 (66.67%) rows: Must be in range [min_balance, max_balance]

"""
if closed == "both":
l_paren, r_paren = ("[", "]")
Expand Down Expand Up @@ -793,7 +789,6 @@ class S(cf.Schema):
age: 2 error(s)
- less_than failed for 2 / 3 (66.67%) rows: Must be < 10
- less_than failed for 1 / 3 (33.33%) rows: Must be < max_age

"""
return Check(
func=functools.partial(_lt, other=_numeric_to_expr(other)),
Expand Down Expand Up @@ -850,7 +845,6 @@ class S(cf.Schema):
SchemaError: Found 1 error(s)
age: 1 error(s)
- less_than_or_equal_to failed for 1 / 3 (33.33%) rows: Must be <= 10

"""
return Check(
func=functools.partial(_le, other=_numeric_to_expr(other)),
Expand Down Expand Up @@ -908,7 +902,6 @@ class S(cf.Schema):
age: 2 error(s)
- greater_than failed for 2 / 3 (66.67%) rows: Must be > 10
- greater_than failed for 1 / 3 (33.33%) rows: Must be > min_age

"""
return Check(
func=functools.partial(_gt, other=_numeric_to_expr(other)),
Expand Down Expand Up @@ -966,7 +959,6 @@ class S(cf.Schema):
age: 2 error(s)
- greater_than_or_equal_to failed for 1 / 3 (33.33%) rows: Must be >= 10
- greater_than_or_equal_to failed for 1 / 3 (33.33%) rows: Must be >= min_age

"""
return Check(
func=functools.partial(_ge, other=_numeric_to_expr(other)),
Expand Down Expand Up @@ -1012,7 +1004,6 @@ class S(cf.Schema):
SchemaError: Found 1 error(s)
group: 1 error(s)
- equal_to failed for 1 / 3 (33.33%) rows: Must be = A

"""
return Check(
func=functools.partial(_eq, other=_numeric_to_expr(other)),
Expand Down Expand Up @@ -1069,7 +1060,6 @@ class S(cf.Schema):
SchemaError: Found 1 error(s)
prob: 1 error(s)
- approximately_equal_to failed for 1 / 3 (33.33%) rows: Must be approximately equal to 0.5 (rtol=1e-05, atol=1e-08, nan_equal=False)

"""
return Check(
func=functools.partial(
Expand Down Expand Up @@ -1121,7 +1111,6 @@ class S(cf.Schema):
SchemaError: Found 1 error(s)
business_type: 1 error(s)
- is_in failed for 1 / 3 (33.33%) rows: Must be in allowed values ['tech', 'finance']

"""
return Check(
func=functools.partial(_is_in, other=other),
Expand Down Expand Up @@ -1162,7 +1151,6 @@ class S(cf.Schema):
SchemaError: Found 1 error(s)
balances: 1 error(s)
- is_finite failed for 1 / 3 (33.33%) rows: All values must be finite

"""
return Check(
func=_is_finite,
Expand Down Expand Up @@ -1208,7 +1196,6 @@ class S(cf.Schema):
SchemaError: Found 1 error(s)
emails: 1 error(s)
- ends_with failed for 1 / 2 (50.00%) rows: Must end with @gmail.com

"""
return Check(
func=functools.partial(_str_ends_with, suffix=suffix),
Expand Down Expand Up @@ -1254,7 +1241,6 @@ class S(cf.Schema):
SchemaError: Found 1 error(s)
ids: 1 error(s)
- starts_with failed for 1 / 2 (50.00%) rows: Must start with user_

"""
return Check(
func=functools.partial(_str_starts_with, prefix=prefix),
Expand Down Expand Up @@ -1302,7 +1288,6 @@ class S(cf.Schema):
SchemaError: Found 1 error(s)
domains: 1 error(s)
- contains failed for 1 / 2 (50.00%) rows: Must contain \.com$

"""
return Check(
func=functools.partial(_str_contains, pattern=pattern, literal=literal),
Expand Down Expand Up @@ -1348,7 +1333,6 @@ class S(cf.Schema):
SchemaError: Found 1 error(s)
timestamps: 1 error(s)
- is_sorted failed: Must be sorted in ascending order

"""
order = "descending" if descending else "ascending"

Expand Down Expand Up @@ -1406,7 +1390,6 @@ class MySchema(cf.Schema):

SchemaError: Found 1 error(s)
* is_sorted_by failed for 3 / 3 (100.00%) rows: Must be sorted by timestamps, where descending is False

"""
return Check(
func=functools.partial(
Expand Down Expand Up @@ -1455,7 +1438,6 @@ class MySchema(cf.Schema):

SchemaError: Found 1 error(s)
* is_id failed for 3 / 3 (100.00%) rows: group must uniquely identify the DataFrame

"""
return Check(
func=functools.partial(_is_id, subset=subset),
Expand All @@ -1474,6 +1456,74 @@ def cardinality_ratio(
by: str | list[str] | None = None,
allow_duplicates: bool = False,
) -> Check:
"""Tests whether the `left` and `right` columns have the specified cardinality
ratio. The three possible cardinality ratios are '1:1' (each entity in `left` is
related to exactly one entity in `right`), '1:m' (each entity in `left` can be
related to many entities in `right`, but each entity in `right` can only be
related to one entity in `left`), and 'm:1', which is the same '1:m', except
with `left` and `right` swapped.

Parameters
----------
left : str
The left column
right : str
The right column
cardinality : CardinalityRatio
The cardinality
by : str | list[str] | None, optional
Variables to group by. If specified, the cardinality ratio is checked within
each group, by default None
allow_duplicates : bool, optional
Whether to allow duplicates by `left` and `right`. For example, if
duplicates are allowed, [1, 1, 1], [2, 2, 2] is considered a '1:1'
relationship, by default False

Returns
-------
Check

Examples
--------
.. code-block:: python

import checkedframe as cf
import polars as pl


class MySchema(cf.Schema):
feature = cf.String()
special_value = cf.Int64()
imputed = cf.String(nullable=True)
reason = cf.String()

_cardinality_check = cf.Check.cardinality_ratio(
"imputed",
"reason",
cardinality="m:1",
by="feature",
allow_duplicates=True,
)


df = pl.DataFrame(
{
"feature": ["f1", "f1", "f1", "f2"],
"special_value": [-1, -6, -4, -7],
"imputed": [None, None, "MAX_WIN_P1", None],
"reason": ["o1", "o1", "o2", "o3"],
}
)

MySchema.validate(df)

Output::

..code-block:: text

SchemaError: Found 1 error(s)
* cardinality_ratio failed for 3 / 4 (75.00%) rows: The relationship between imputed and reason must be m:1 (by=feature, allow_duplicates=True)
"""
return Check(
func=functools.partial(
_cardinality_ratio,
Expand Down
Loading