Skip to content
14 changes: 10 additions & 4 deletions python/datafusion/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@

__all__ = [
"Expr",
"RawExpr",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To remove.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think removing this caused the test to fail. I'll add this again and run the tests locally again.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its removal is causing the tests to fail.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i added a line in the test:

if isinstance(internal_attr, list):
            assert isinstance(wrapped_attr, list)
            for val in internal_attr:
                if isinstance(val, str) and val.startswith("Raw"): #<---- added this line since we check for all Raw* classes in the previous part of the code
                    continue
                assert val in wrapped_attr

"Column",
"Literal",
"BinaryExpr",
Expand Down Expand Up @@ -193,10 +194,15 @@ class Expr:
:ref:`Expressions` in the online documentation for more information.
"""

def __init__(self, expr: expr_internal.Expr) -> None:
def __init__(self, expr: expr_internal.RawExpr) -> None:
"""This constructor should not be called by the end user."""
self.expr = expr

@property
def raw_expr(self) -> expr_internal.RawExpr:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need this.

"""Returns the underlying RawExpr instance."""
return self.expr

def to_variant(self) -> Any:
"""Convert this expression into a python object if possible."""
return self.expr.to_variant()
Expand Down Expand Up @@ -383,7 +389,7 @@ def literal(value: Any) -> Expr:
value = pa.scalar(value, type=pa.string_view())
if not isinstance(value, pa.Scalar):
value = pa.scalar(value)
return Expr(expr_internal.Expr.literal(value))
return Expr(expr_internal.RawExpr.literal(value))

@staticmethod
def string_literal(value: str) -> Expr:
Expand All @@ -398,13 +404,13 @@ def string_literal(value: str) -> Expr:
"""
if isinstance(value, str):
value = pa.scalar(value, type=pa.string())
return Expr(expr_internal.Expr.literal(value))
return Expr(expr_internal.RawExpr.literal(value))
return Expr.literal(value)

@staticmethod
def column(value: str) -> Expr:
"""Creates a new expression representing a column."""
return Expr(expr_internal.Expr.column(value))
return Expr(expr_internal.RawExpr.column(value))

def alias(self, name: str) -> Expr:
"""Assign a name to the expression."""
Expand Down
10 changes: 8 additions & 2 deletions python/tests/test_wrapper_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,15 @@ def missing_exports(internal_obj, wrapped_obj) -> None:
return

for attr in dir(internal_obj):
if attr in ["_global_ctx"]:
# Skip internal context and RawExpr (which is handled by Expr class)
if attr in ["_global_ctx", "RawExpr"]:
continue

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you apply ruff it should correct these whitespace errors. I recommend turning on pre-commit in your local workspace to catch them before they get to CI.

# Check if RawExpr functionality is covered by Expr class
if attr == "RawExpr" and hasattr(wrapped_obj, "Expr"):
expr_class = getattr(wrapped_obj, "Expr")
assert hasattr(expr_class, "raw_expr"), "Expr class must provide raw_expr property"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of special casing it like this, we should be able to write a more general solution that checks all class names. For the internal representation they can begin with Raw and be converted to their non-raw counterparts.

Copy link
Contributor Author

@Spaarsh Spaarsh Mar 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I am sorry. I thought that I could include that after renaming other classes. I am now simply extracting the base_class name from the Raw attr name via string slicing and then use it to check if the base_class has such an attribute:

if attr in ["_global_ctx"]:
   continue
        
# Check if Raw* classes have corresponding wrapper classes
if attr.startswith("Raw"):
   base_class = attr[3:]  # Remove "Raw" prefix
   assert hasattr(wrapped_obj, base_class), f"Missing implementation for {attr}"
   continue

Sorry for being this clumsy. Thanks a lot for your patience.

continue
assert attr in dir(wrapped_obj)

internal_attr = getattr(internal_obj, attr)
wrapped_attr = getattr(wrapped_obj, attr)
Expand Down
2 changes: 1 addition & 1 deletion src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ pub mod window;
use sort_expr::{to_sort_expressions, PySortExpr};

/// A PyExpr that can be used on a DataFrame
#[pyclass(name = "Expr", module = "datafusion.expr", subclass)]
#[pyclass(name = "RawExpr", module = "datafusion.expr", subclass)]
#[derive(Debug, Clone)]
pub struct PyExpr {
pub expr: Expr,
Expand Down