Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unpin spark requirements #2661

Merged
merged 15 commits into from
Feb 15, 2024
2 changes: 1 addition & 1 deletion .github/workflows/build_docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python_version: ["3.8", "3.10"]
python_version: ["3.8","3.9", "3.10", "3.11"]
steps:
- name: Checkout repository
uses: actions/checkout@v3
Expand Down
11 changes: 0 additions & 11 deletions docs/source/api_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,6 @@ Deep Feature Synthesis
dfs
get_valid_primitives

Wrappers
~~~~~~~~
.. currentmodule:: featuretools

scikit-learn (BETA)
-------------------
.. autosummary::
:toctree: generated/

wrappers.DFSTransformer
tamargrey marked this conversation as resolved.
Show resolved Hide resolved

Timedelta
~~~~~~~~~
.. currentmodule:: featuretools
Expand Down
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Future Release
* Add workaround for pandas 2.2.0 bug with nunique and unpin pandas (:pr:`2657`)
* Changes
* Fix deprecation warnings with is_categorical_dtype (:pr:`2641`)
* Remove woodwork, pyarrow, numpy, and pandas pins for spark installation (:pr:`2661`)
* Documentation Changes
* Update Featuretools logo to display properly in dark mode (:pr:`2632`)
* Testing Changes
Expand Down
12 changes: 8 additions & 4 deletions featuretools/entityset/entityset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1269,11 +1269,15 @@ def add_last_time_indexes(self, updated_dataframes=None):
lti_df = pd.Series([], dtype="object")
else:
if lti_is_spark:
lti_df["last_time"] = ps.to_datetime(lti_df["last_time"])
lti_df["last_time_old"] = ps.to_datetime(
lti_df["last_time_old"],
)
# TODO: Figure out a workaround for fillna and replace
if lti_df["last_time_old"].dtype != "datetime64[ns]":
lti_df["last_time_old"] = ps.to_datetime(
lti_df["last_time_old"],
)
if lti_df["last_time"].dtype != "datetime64[ns]":
lti_df["last_time"] = ps.to_datetime(
tamargrey marked this conversation as resolved.
Show resolved Hide resolved
lti_df["last_time"],
)
lti_df = lti_df.max(axis=1)
else:
lti_df["last_time"] = lti_df["last_time"].astype(
Expand Down
2 changes: 1 addition & 1 deletion featuretools/primitives/standard/aggregation/num_unique.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,6 @@ def finalize(s):

return dd.Aggregation(self.name, chunk=chunk, agg=agg, finalize=finalize)

if self.use_string_for_pd_calc:
if self.use_string_for_pd_calc or agg_type == Library.SPARK:
tamargrey marked this conversation as resolved.
Show resolved Hide resolved
return "nunique"
return pd.Series.nunique
2 changes: 2 additions & 0 deletions featuretools/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ def pd_latlong_df():

@pytest.fixture
def dask_latlong_df(pd_latlong_df):
dask = pytest.importorskip("dask", reason="Dask not installed, skipping")
dask.config.set({"dataframe.convert-string": False})
dd = pytest.importorskip("dask.dataframe", reason="Dask not installed, skipping")
return dd.from_pandas(pd_latlong_df.reset_index(drop=True), npartitions=4)

Expand Down
2 changes: 2 additions & 0 deletions featuretools/tests/entityset_tests/test_ww_es.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,8 @@ def latlong_df(request):


def test_replace_dataframe_data_transformation(latlong_df):
dask = pytest.importorskip("dask", reason="Dask not installed, skipping")
dask.config.set({"dataframe.convert-string": False})
initial_df = latlong_df.copy()
initial_df.ww.init(
name="latlongs",
Expand Down
2 changes: 2 additions & 0 deletions featuretools/tests/primitive_tests/test_transform_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,8 @@ def pd_simple_es():
@pytest.fixture
def dd_simple_es(pd_simple_es):
dd = pytest.importorskip("dask.dataframe", reason="Dask not installed, skipping")
dask = pytest.importorskip("dask", reason="Dask not installed, skipping")
dask.config.set({"dataframe.convert-string": False})
dataframes = {}
for df in pd_simple_es.dataframes:
dataframes[df.ww.name] = (
Expand Down
4 changes: 4 additions & 0 deletions featuretools/tests/testing_utils/generate_fake_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from datetime import datetime as dt

import pandas as pd
import pytest
import woodwork.type_sys.type_system as ww_type_system
from woodwork import logical_types

Expand Down Expand Up @@ -40,6 +41,9 @@ def generate_fake_dataframe(
n_rows=10,
df_name="df",
):
dask = pytest.importorskip("dask", reason="Dask not installed, skipping")
dask.config.set({"dataframe.convert-string": False})

def randomize(values_):
random.seed(10)
values = values_.copy()
Expand Down
4 changes: 4 additions & 0 deletions featuretools/tests/testing_utils/mock_ds.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import numpy as np
import pandas as pd
import pytest
from woodwork.logical_types import (
URL,
Boolean,
Expand Down Expand Up @@ -36,6 +37,9 @@ def make_ecommerce_entityset(with_integer_time_index=False):
\\ / .
L Log
"""
dask = pytest.importorskip("dask", reason="Dask not installed, skipping")
dask.config.set({"dataframe.convert-string": False})
tamargrey marked this conversation as resolved.
Show resolved Hide resolved

dataframes = make_ecommerce_dataframes(
with_integer_time_index=with_integer_time_index,
)
Expand Down
12 changes: 5 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ test = [
"graphviz >= 0.8.4",
"moto[all] >= 5.0.0",
"pip >= 21.3.1",
"pyarrow >= 3.0.0, <13.0.0",
"pyarrow >= 14.0.1",
"pympler >= 0.8",
"pytest >= 7.1.2",
"pytest-cov >= 3.0.0",
Expand All @@ -71,11 +71,9 @@ dask = [
"woodwork[dask] >= 0.23.0",
]
spark = [
"woodwork[spark] >= 0.23.0, <0.28.0",
"woodwork[spark] >= 0.23.0",
"pyspark >= 3.2.2",
"numpy >= 1.21.0, < 1.24.0",
"pandas >= 1.5.0, < 2.0.0",
"pyarrow >= 3.0.0, < 13.0.0",
"pyarrow >= 14.0.1",
]
updater = [
"alteryx-open-src-update-checker >= 2.1.0"
Expand All @@ -84,7 +82,7 @@ tsfresh = [
"featuretools-tsfresh-primitives >= 1.0.0",
]
premium = [
"premium_primitives @ git+https://github.com/alteryx/premium_primitives.git@main",
"premium_primitives @ git+https://github.com/alteryx/premium_primitives.git@99e42b78e163abb0519d65184027515923a94576",
]
autonormalize = [
"autonormalize >= 2.0.1",
Expand All @@ -109,7 +107,7 @@ docs = [
"myst-parser == 0.18.0",
"autonormalize >= 2.0.1",
"click >= 7.0.0",
"featuretools[sklearn,dask,spark,test,premium]",
"featuretools[dask,spark,test,premium]",
]
dev = [
"ruff >= 0.1.6",
Expand Down
Loading