Skip to content

Commit

Permalink
Initial work to enable developing Sematic with uv (#1136)
Browse files Browse the repository at this point in the history
As part of #1135, we need an alternative way to manage python
dependencies besides bazel. This PR introduces the foundation required
to use [`uv`](https://docs.astral.sh/uv/) to serve this purpose.
Introducing `uv` at python3.12 (a python version currently untested via
bazel) necessitated upgrading some packages, including linting tools, so
there are changes which may not directly appear to be related to
packaging. However, all changes in this PR should not impact logic
(beyond test logic, usually changes to this are noted).

This PR is the first in an expected series of PRs.

---------

Co-authored-by: Josh <[email protected]>
  • Loading branch information
augray and Josh authored Dec 12, 2024
1 parent dc3d3e3 commit d87d47a
Show file tree
Hide file tree
Showing 128 changed files with 6,555 additions and 600 deletions.
3 changes: 0 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ commands:
- run:
name: Black
command: black sematic --diff --check
- run:
name: ISort
command: isort sematic --diff --check
- run:
name: ESlint
working_directory: ./sematic/ui
Expand Down
14 changes: 12 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,23 @@ pre-commit:
python3 -m flake8
python3 -m mypy sematic
python3 -m black sematic --check
python3 -m isort sematic --diff --check
pushd sematic/ui && npm run lint && popd

fix:
isort sematic
black sematic

.PHONY: py-prep
py-prep:
uv --version || curl -LsSf https://astral.sh/uv/install.sh | sh
rm -rf ".venv" || echo "No virtualenv yet"
uv venv --python 3.12
uv sync --extra examples
uv tool install --force ruff==0.8.2

.PHONY: py-sync
py-sync:
uv sync --extra examples --extra ray

.PHONY: update-schema
update-schema:
bazel run //sematic/db:migrate -- dump --schema-file ${PWD}/sematic/db/schema.sql.sqlite
Expand Down
160 changes: 145 additions & 15 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,17 +1,147 @@
[tool.pyright]
exclude = [
"bazel-out",
"bazel-bin",
"bazel-sematic",
"bazel-testlogs",
"docs",
"build",
[project]
name = "sematic"
description = "Sematic ML orchestration tool"
version = "0.41.0"
requires-python = ">=3.9, <=3.12"
dependencies = [
# System
"ipython==8.2.0",
"setuptools==58.1.0",

# DB
"SQLAlchemy>=2.0",
"psycopg2-binary>=2.9.5",

# Code
"pyyaml>=6.0.1",

# Git integration
"git-python>=1.0.3",

# Build system
"docker>=6.0.0",

# Client
"websocket-client>=1.5.1",
"python-socketio>=5.7.2",

# API server
"flask>=2.2.2",
"flask-cors>=3.0.10",
"cloudpickle>=2.2.1",
"requests>=2.28.2",
"werkzeug>=2.2.3",
"python-dateutil>=2.8.2",
"starlette>=0.25.0",
"google-auth>=2.16.0",
"uvicorn[standard]>=0.20.0",
"asgiref>=3.7.2",

# CLI
"click>=8.1.3",

# Cloud execution
"kubernetes>=25.3.0",
"boto3>=1.26.82",
"google-cloud-storage>=2.10.0",
"types-google-cloud-ndb>=2.2.0.0",
]

[build-system]
requires = ["setuptools", "setuptools-scm"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
py-modules = [
"sematic",
]

[tool.isort]
profile = "black"
import_heading_stdlib="Standard Library"
import_heading_firstparty="Sematic"
import_heading_thirdparty="Third-party"
known_third_party = ["flask", "psycopg2", "matplotlib", "cloudpickle"]
multi_line_output = 3
[project.optional-dependencies]
examples = [
# Examples
"snowflake-connector-python==3.12.4",
"pyOpenSSL>=23.0.0",
"pyarrow>=12.0.0",
"python-magic==0.4.27",
"torch>=1.13.1",
"torchvision>=0.14.1",
"pytorch-lightning>=1.6.5",
"ray-lightning>=0.3.0",
"plotly==5.13.0",
"pandas>=1.5.3",
"seaborn>=0.12.2",
"matplotlib>=3.7.0",
"statsmodels>=0.13.5",
"scikit-learn>=1.2.1",
"numpy>=1.24.0",
"xgboost>=1.7.3",
"accelerate==0.19.0",
"datasets>=2.12.0",
"huggingface-hub>=0.14.1",
"peft>=0.3.0",
"transformers>=4.29.2",
"gradio>=3.35.2",
"trafilatura>=1.6.0",
"cohere>=4.9.0",
"openai>=0.27.8",
]

ray = [
# External Resource Plugins
## Ray
# Note: just because we depend on ray[air] here does NOT
# mean our wheel depends on it, nor does it imply that users
# will/will not have ray[air]. This is only about what is present
# in Sematic's dev workspace.
"ray[default,air]>=2.3.0",
]

all = [
"ray[default,air]>=2.3.0",
]

[tool.uv]
dev-dependencies = [
"testing-postgresql>=1.3.0",
"debugpy>=1.6.6",
"pandas-stubs>=2.2.2",
"mypy==1.11.1",
"ruff-lsp==0.0.35",
"python-lsp-ruff>=2.2.2",
"python-lsp-server>=1.11.0",
"pytest==7.4.0",
"pathspec==0.11.2",
"mypy-extensions==1.0.0",
"platformdirs==3.10.0",
"responses==0.18.0",
]

[tool.ruff]
line-length = 90

[tool.ruff.lint]
select = ["E", "F", "I"]

[tool.ruff.lint.isort]
known-first-party = ["sematic"]

# Use a single line after each import block.
lines-after-imports = 2

[tool.pylsp.plugins.ruff]
enabled = true

[[tool.mypy.overrides]]
module = "yaml.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "transformers.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "torch.*"
ignore_missing_imports = true

[tool.uv.sources]
sematic = { workspace = true }
8 changes: 4 additions & 4 deletions requirements/ci-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@

# Linters and such. Pin them so that different devs
# don't get different results from using them.
flake8==5.0.4
mypy==0.982
black==22.6.0
isort==5.10.1
flake8==7.1.1
mypy==1.13.0
black==24.10.0
isort==5.13.2

pytest
# Required for pip-compile on the CI worker
Expand Down
11 changes: 5 additions & 6 deletions sematic/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Sematic Public API
"""

# Standard Library
import os # noqa: E402
import platform # noqa: E402
Expand Down Expand Up @@ -38,10 +39,10 @@
from sematic.future_context import SematicContext, context # noqa: F401,E402
from sematic.plugins.external_resource.timed_message import ( # noqa: F401,E402
TimedMessage,
)
) # noqa: F401
from sematic.plugins.kuberay_wrapper.standard import ( # noqa: F401,E402
StandardKuberayWrapper,
)
) # noqa: F401,E402
from sematic.resolver import Resolver # noqa: F401,E402
from sematic.resolvers.cloud_resolver import CloudResolver # noqa: F401,E402
from sematic.resolvers.local_resolver import LocalResolver # noqa: F401,E402
Expand All @@ -62,8 +63,6 @@
from sematic.runners.cloud_runner import CloudRunner # noqa: F401,E402
from sematic.runners.local_runner import LocalRunner, RerunMode # noqa: F401,E402
from sematic.runners.silent_runner import SilentRunner # noqa: F401,E402
from sematic.utils.exceptions import ( # noqa: F401,E402
KubernetesError,
PipelineRunError,
)
from sematic.utils.exceptions import KubernetesError # noqa: F401,E402
from sematic.utils.exceptions import PipelineRunError # noqa: F401,E402
from sematic.versions import CURRENT_VERSION_STR as __version__ # noqa: F401,E402
1 change: 1 addition & 0 deletions sematic/abstract_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
This is needed to avoid circular dependencies between
modules for Function and Future.
"""

# Standard Library
import abc
import typing
Expand Down
17 changes: 11 additions & 6 deletions sematic/abstract_future.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
`AbstractFuture` is needed to resolve circular dependencies
between `Future` and `Resolver`.
"""

# Standard Library
import abc
import enum
Expand Down Expand Up @@ -393,12 +394,16 @@ def clone_future(future: AbstractFuture) -> AbstractFuture:
kwargs=dict(future.kwargs),
standalone=future.props.standalone,
cache=future.props.cache,
resource_requirements=None
if future.props.resource_requirements is None
else future.props.resource_requirements.clone(),
retry_settings=None
if future.props.retry_settings is None
else replace(future.props.retry_settings),
resource_requirements=(
None
if future.props.resource_requirements is None
else future.props.resource_requirements.clone()
),
retry_settings=(
None
if future.props.retry_settings is None
else replace(future.props.retry_settings)
),
base_image_tag=future.props.base_image_tag,
timeout_mins=future.props.timeout_mins,
)
Expand Down
1 change: 1 addition & 0 deletions sematic/abstract_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Plug-ins are imported at runtime based on user's or server's settings stored in
their corresponding yaml files.
"""

# Standard Library
import abc
import enum
Expand Down
7 changes: 3 additions & 4 deletions sematic/api/endpoints/payloads.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Augment or modify the returned json for ORM models for returns from API calls."""

# Standard Library
import logging
from typing import Any, Dict, List, Protocol, Sequence
Expand All @@ -15,11 +16,9 @@

class _JSONEncodableWithUser(Protocol):
@property
def user_id(self) -> declared_attr[String]:
...
def user_id(self) -> declared_attr[String]: ...

def to_json_encodable(self) -> Dict[str, Any]:
...
def to_json_encodable(self) -> Dict[str, Any]: ...


def _get_payload_with_user(item: _JSONEncodableWithUser) -> Dict[str, Any]:
Expand Down
10 changes: 6 additions & 4 deletions sematic/api/endpoints/request_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class SearchRequestParameters:
order: Callable[[Any], Any]
cursor: Optional[str]
group_by: Optional[sqlalchemy.Column]
filters: Optional[ColumnElement[bool]]
filters: Optional[ColumnElement[bool]] # type: ignore
fields: Optional[List[str]]


Expand Down Expand Up @@ -324,7 +324,7 @@ def _get_sql_predicates(
filters: Filters,
column_mapping: ColumnMapping,
model: type,
) -> ColumnElement[bool]:
) -> ColumnElement[bool]: # type: ignore
"""
Basic support for AND and OR filter predicates.
Expand Down Expand Up @@ -353,15 +353,17 @@ def _get_sql_predicates(
filters = cast(BooleanPredicate, filters)
operand = cast(Literal["AND", "OR"], operand)
operator = dict(AND=sqlalchemy.and_, OR=sqlalchemy.or_)[operand]
return operator(
return operator( # type: ignore
*[
_extract_predicate(filter_, column_mapping, model)
for filter_ in filters[operand]
]
)
else:
filter_ = cast(ColumnPredicate, filters)
return sqlalchemy.and_(_extract_predicate(filter_, column_mapping, model))
return sqlalchemy.and_( # type: ignore
_extract_predicate(filter_, column_mapping, model)
)


def _extract_predicate(
Expand Down
4 changes: 2 additions & 2 deletions sematic/api/endpoints/runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,8 @@ def _make_cursor(key: str) -> str:

def _generate_search_predicate(
search_string: str,
) -> ColumnElement[bool]:
return sqlalchemy.or_(
) -> ColumnElement[bool]: # type: ignore
return sqlalchemy.or_( # type: ignore
Run.name.ilike(f"%{search_string}%"),
Run.function_path.ilike(f"%{search_string}%"),
Run.description.ilike(f"%{search_string}%"),
Expand Down
2 changes: 2 additions & 0 deletions sematic/api/endpoints/tests/BUILD
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pytest_test(
name = "test_external_resources",
srcs = ["test_external_resources.py"],
py_versions = [PY3.PY3_9, PY3.PY3_10, PY3.PY3_11],
pip_deps = [
"flask",
],
Expand Down Expand Up @@ -38,6 +39,7 @@ pytest_test(
pytest_test(
name = "test_runs",
srcs = ["test_runs.py"],
py_versions = [PY3.PY3_9, PY3.PY3_10, PY3.PY3_11],
pip_deps = [
"flask",
],
Expand Down
7 changes: 2 additions & 5 deletions sematic/api/endpoints/tests/test_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,8 @@
# Sematic
from sematic.api.app import sematic_api
from sematic.api.endpoints.auth import authenticate
from sematic.api.tests.fixtures import ( # noqa: F401
mock_requests,
mock_server_settings,
test_client,
)
from sematic.api.tests.fixtures import mock_requests # noqa: F401
from sematic.api.tests.fixtures import mock_server_settings, test_client # noqa: F401
from sematic.config.server_settings import ServerSettingsVar
from sematic.db.models.user import User
from sematic.db.queries import get_user
Expand Down
Loading

0 comments on commit d87d47a

Please sign in to comment.