|
| 1 | +from collections.abc import Awaitable, Sequence |
| 2 | +from dataclasses import dataclass |
| 3 | +from typing import Any, Generic, Literal, Protocol, TypeAlias, TypeVar |
| 4 | + |
| 5 | +from typing_extensions import NotRequired, TypedDict |
| 6 | + |
| 7 | +from .generated_types import ObjectReference |
| 8 | +from .logger import Metadata |
| 9 | +from .trace import Trace |
| 10 | + |
| 11 | + |
| 12 | +__all__ = [ |
| 13 | + "DatasetPipeline", |
| 14 | + "DatasetPipelineDefinition", |
| 15 | + "DatasetPipelineRow", |
| 16 | + "DatasetPipelineScope", |
| 17 | + "DatasetPipelineSource", |
| 18 | + "DatasetPipelineTarget", |
| 19 | + "DatasetPipelineTransform", |
| 20 | + "DatasetPipelineTransformArgs", |
| 21 | + "DatasetPipelineTransformResult", |
| 22 | + "get_registered_dataset_pipelines", |
| 23 | +] |
| 24 | + |
| 25 | + |
| 26 | +DatasetPipelineScope: TypeAlias = Literal["span", "trace"] |
| 27 | + |
| 28 | + |
| 29 | +class DatasetPipelineSource(TypedDict, total=False): |
| 30 | + project_id: str |
| 31 | + project_name: str |
| 32 | + org_name: str |
| 33 | + filter: str |
| 34 | + scope: DatasetPipelineScope |
| 35 | + |
| 36 | + |
| 37 | +class DatasetPipelineTarget(TypedDict): |
| 38 | + dataset_name: str |
| 39 | + project_id: NotRequired[str] |
| 40 | + project_name: NotRequired[str] |
| 41 | + org_name: NotRequired[str] |
| 42 | + description: NotRequired[str] |
| 43 | + metadata: NotRequired[Metadata] |
| 44 | + |
| 45 | + |
| 46 | +class DatasetPipelineRow(TypedDict, total=False): |
| 47 | + id: str |
| 48 | + input: Any | None |
| 49 | + expected: Any | None |
| 50 | + tags: Sequence[str] | None |
| 51 | + metadata: Metadata | None |
| 52 | + origin: ObjectReference |
| 53 | + |
| 54 | + |
| 55 | +Row = TypeVar("Row", bound=DatasetPipelineRow, covariant=True) |
| 56 | + |
| 57 | + |
| 58 | +class DatasetPipelineTransformArgs(TypedDict, total=False): |
| 59 | + input: Any | None |
| 60 | + output: Any | None |
| 61 | + metadata: Metadata | None |
| 62 | + expected: Any | None |
| 63 | + trace: Trace |
| 64 | + |
| 65 | + |
| 66 | +DatasetPipelineTransformResult: TypeAlias = Row | Sequence[Row] | None |
| 67 | + |
| 68 | + |
| 69 | +class DatasetPipelineTransform(Protocol[Row]): |
| 70 | + def __call__( |
| 71 | + self, |
| 72 | + input: Any | None = None, |
| 73 | + output: Any | None = None, |
| 74 | + metadata: Metadata | None = None, |
| 75 | + expected: Any | None = None, |
| 76 | + trace: Trace | None = None, |
| 77 | + ) -> DatasetPipelineTransformResult[Row] | Awaitable[DatasetPipelineTransformResult[Row]]: ... |
| 78 | + |
| 79 | + |
| 80 | +@dataclass(frozen=True) |
| 81 | +class DatasetPipelineDefinition(Generic[Row]): |
| 82 | + source: DatasetPipelineSource |
| 83 | + transform: DatasetPipelineTransform[Row] |
| 84 | + target: DatasetPipelineTarget |
| 85 | + name: str | None = None |
| 86 | + |
| 87 | + |
| 88 | +_DATASET_PIPELINES: list[DatasetPipelineDefinition[Any]] = [] |
| 89 | + |
| 90 | + |
| 91 | +def get_registered_dataset_pipelines() -> list[DatasetPipelineDefinition[Any]]: |
| 92 | + return list(_DATASET_PIPELINES) |
| 93 | + |
| 94 | + |
| 95 | +def DatasetPipeline( |
| 96 | + name: str | None = None, |
| 97 | + *, |
| 98 | + source: DatasetPipelineSource, |
| 99 | + transform: DatasetPipelineTransform[DatasetPipelineRow], |
| 100 | + target: DatasetPipelineTarget, |
| 101 | +) -> DatasetPipelineDefinition[DatasetPipelineRow]: |
| 102 | + definition = DatasetPipelineDefinition( |
| 103 | + name=name, |
| 104 | + source=source.copy(), |
| 105 | + transform=transform, |
| 106 | + target=target.copy(), |
| 107 | + ) |
| 108 | + _DATASET_PIPELINES.append(definition) |
| 109 | + return definition |
0 commit comments