-
Notifications
You must be signed in to change notification settings - Fork 112
fix: type checking #993
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: type checking #993
Changes from 9 commits
124d3ea
a933fdb
9897b63
768b6c0
3250848
b165ef3
2c8ea03
a7993a1
0d6cd07
d771d3b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -713,7 +713,7 @@ def register_table(self, name: str, table: Table) -> None: | |
name: Name of the resultant table. | ||
table: DataFusion table to add to the session context. | ||
""" | ||
self.ctx.register_table(name, table) | ||
self.ctx.register_table(name, table.table) | ||
|
||
def deregister_table(self, name: str) -> None: | ||
"""Remove a table from the session.""" | ||
|
@@ -752,7 +752,7 @@ def register_parquet( | |
file_extension: str = ".parquet", | ||
skip_metadata: bool = True, | ||
schema: pyarrow.Schema | None = None, | ||
file_sort_order: list[list[Expr]] | None = None, | ||
file_sort_order: list[list[SortExpr]] | None = None, | ||
) -> None: | ||
"""Register a Parquet file as a table. | ||
|
||
|
@@ -783,7 +783,9 @@ def register_parquet( | |
file_extension, | ||
skip_metadata, | ||
schema, | ||
file_sort_order, | ||
[[expr.raw_sort for expr in exprs] for exprs in file_sort_order] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use the same helper function There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated |
||
if file_sort_order is not None | ||
else None, | ||
) | ||
|
||
def register_csv( | ||
|
@@ -919,7 +921,7 @@ def register_udwf(self, udwf: WindowUDF) -> None: | |
|
||
def catalog(self, name: str = "datafusion") -> Catalog: | ||
"""Retrieve a catalog by name.""" | ||
return self.ctx.catalog(name) | ||
return Catalog(self.ctx.catalog(name)) | ||
|
||
@deprecated( | ||
"Use the catalog provider interface ``SessionContext.Catalog`` to " | ||
|
@@ -1039,7 +1041,7 @@ def read_parquet( | |
file_extension: str = ".parquet", | ||
skip_metadata: bool = True, | ||
schema: pyarrow.Schema | None = None, | ||
file_sort_order: list[list[Expr]] | None = None, | ||
file_sort_order: list[list[Expr | SortExpr]] | None = None, | ||
) -> DataFrame: | ||
"""Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. | ||
|
||
|
@@ -1063,6 +1065,11 @@ def read_parquet( | |
""" | ||
if table_partition_cols is None: | ||
table_partition_cols = [] | ||
file_sort_order = ( | ||
[sort_list_to_raw_sort_list(f) for f in file_sort_order] | ||
if file_sort_order is not None | ||
else None | ||
) | ||
return DataFrame( | ||
self.ctx.read_parquet( | ||
str(path), | ||
|
@@ -1106,7 +1113,7 @@ def read_table(self, table: Table) -> DataFrame: | |
:py:class:`~datafusion.catalog.ListingTable`, create a | ||
:py:class:`~datafusion.dataframe.DataFrame`. | ||
""" | ||
return DataFrame(self.ctx.read_table(table)) | ||
return DataFrame(self.ctx.read_table(table.table)) | ||
|
||
def execute(self, plan: ExecutionPlan, partitions: int) -> RecordBatchStream: | ||
"""Execute the ``plan`` and return the results.""" | ||
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -85,7 +85,7 @@ class ScalarUDF: | |||
|
||||
def __init__( | ||||
self, | ||||
name: Optional[str], | ||||
name: str, | ||||
func: Callable[..., _R], | ||||
input_types: pyarrow.DataType | list[pyarrow.DataType], | ||||
return_type: _R, | ||||
|
@@ -158,7 +158,7 @@ def state(self) -> List[pyarrow.Scalar]: | |||
pass | ||||
|
||||
@abstractmethod | ||||
def update(self, *values: pyarrow.Array) -> None: | ||||
def update(self, values: pyarrow.Array) -> None: | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks wrong to me. I think we want/need to allow a variable number of values passed for udafs that have multiple state elements. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry, My mistake |
||||
"""Evaluate an array of values and update state.""" | ||||
pass | ||||
|
||||
|
@@ -182,7 +182,7 @@ class AggregateUDF: | |||
|
||||
def __init__( | ||||
self, | ||||
name: Optional[str], | ||||
name: str, | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why remove the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it seems that in rust binding, it's not optional Line 92 in e6f6e66
|
||||
accumulator: Callable[[], Accumulator], | ||||
input_types: list[pyarrow.DataType], | ||||
return_type: pyarrow.DataType, | ||||
|
@@ -277,6 +277,7 @@ def sum_bias_10() -> Summarize: | |||
) | ||||
if name is None: | ||||
name = accum.__call__().__class__.__qualname__.lower() | ||||
assert name is not None | ||||
if isinstance(input_types, pyarrow.DataType): | ||||
input_types = [input_types] | ||||
return AggregateUDF( | ||||
|
@@ -462,7 +463,7 @@ class WindowUDF: | |||
|
||||
def __init__( | ||||
self, | ||||
name: Optional[str], | ||||
name: str, | ||||
func: Callable[[], WindowEvaluator], | ||||
input_types: list[pyarrow.DataType], | ||||
return_type: pyarrow.DataType, | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is changing the user-facing API, no?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i checked the test,i think we should use @ property, but previously we didnt do wrapping somewhere, raw table is returned that's why test passed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've become convinced that the current code is broken and that this is the right change. Thank you @chenkovsky for doing this. I do think we need to put some notes to that effect in the section of user facing changes of the PR description.