Skip to content

Commit 319eaf1

Browse files
mesejodlovell
andauthored
chore(docs): update quickstart.qmd (#1299)
Additionally, this switches on the execution of docstrings, since the ibis examples are working again. closes #1298 --------- Co-authored-by: dlovell <[email protected]>
1 parent 82d78e1 commit 319eaf1

File tree

4 files changed

+121
-14
lines changed

4 files changed

+121
-14
lines changed

docs/_renderer.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,15 @@ def render(self, el: qd.ast.ExampleCode) -> str:
2020
prompt = ">>> "
2121
continuation = "..."
2222

23+
skip_doctest = "doctest: +SKIP"
2324
expect_failure = "quartodoc: +EXPECTED_FAILURE"
2425
quartodoc_skip_doctest = "quartodoc: +SKIP"
2526

2627
def chunker(line):
2728
return line.startswith((prompt, continuation))
2829

2930
def should_skip(line):
30-
return True
31+
return quartodoc_skip_doctest in line or skip_doctest in line
3132

3233
for chunk in toolz.partitionby(chunker, lines):
3334
first, *rest = chunk
@@ -59,6 +60,13 @@ def should_skip(line):
5960

6061
# if we expect failures, don't fail the notebook execution and
6162
# render the error message
63+
if expect_failure in first or any(
64+
expect_failure in line for line in rest
65+
):
66+
assert start and end, (
67+
"expected failure should never occur alongside a skipped doctest example"
68+
)
69+
result.append("#| error: true")
6270

6371
# remove the quartodoc markers from the rendered code
6472
result.append(

docs/tutorials/getting_started/quickstart.qmd

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ xorq run builds/7061dd65ff3c --limit 10
6767

6868
To serve your pipeline as an endpoint, you can use the `xorq serve-unbound` command:
6969
```bash
70-
xorq serve-unbound builds/7061dd65ff3c --host localhost --port 8001 --cache-dir penguins_example b2370a29c19df8e1e639c63252dacd0e
70+
xorq serve-unbound builds/7061dd65ff3c --host localhost --port 8001 --cache-dir penguins_example 405154f690d20f4adbcc375252628b75
7171
# This replaces a specific node hash with an exchanger input and serves the unbound expr as do_exchange
7272
```
7373

@@ -233,19 +233,19 @@ For deployments, you can serve a specific built expression as an endpoint using
233233
Catalog service:
234234

235235
```bash
236-
xorq serve-unbound builds/7061dd65ff3c --host localhost --port 8001 --cache-dir penguins_example b2370a29c19df8e1e639c63252dacd0e
236+
xorq serve-unbound builds/7061dd65ff3c --host localhost --port 8001 --cache-dir penguins_example 405154f690d20f4adbcc375252628b75
237237
```
238238

239239
**Understanding the command:**
240240

241241
- `builds/7061dd65ff3c`: Your built pipeline directory
242242
- `--host localhost --port 8001`: Server configuration
243243
- `--cache-dir penguins_example`: Directory for caching results
244-
- `b2370a29c19df8e1e639c63252dacd0e`: The specific node hash to serve
244+
- `405154f690d20f4adbcc375252628b75`: The specific node hash to serve
245245

246246
### Finding the Node Hash
247247

248-
The node hash (like `b2370a29c19df8e1e639c63252dacd0e`) identifies a specific
248+
The node hash (like `405154f690d20f4adbcc375252628b75`) identifies a specific
249249
expression node in your pipeline. You can find this hash using:
250250

251251
```python

python/xorq/expr/ml/split_lib.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def _calculate_bounds(
6363

6464
def calc_split_conditions(
6565
table: ir.Table,
66-
unique_key: str | tuple[str] | list[str],
66+
unique_key: str | tuple[str] | list[str] | Selector,
6767
test_sizes: Iterable[float] | float,
6868
num_buckets: int = 10000,
6969
random_seed: int | None = None,
@@ -73,7 +73,7 @@ def calc_split_conditions(
7373
----------
7474
table : ir.Table
7575
The input Ibis table to be split.
76-
unique_key : str | tuple[str] | list[str]
76+
unique_key : str | tuple[str] | list[str] | Selector
7777
The column name(s) that uniquely identify each row in the table. This
7878
unique_key is used to create a deterministic split of the dataset
7979
through a hashing process.
@@ -149,7 +149,7 @@ def calc_split_conditions(
149149

150150
def calc_split_column(
151151
table: ir.Table,
152-
unique_key: str | tuple[str] | list[str],
152+
unique_key: str | tuple[str] | list[str] | Selector,
153153
test_sizes: Iterable[float] | float,
154154
num_buckets: int = 10000,
155155
random_seed: int | None = None,
@@ -160,7 +160,7 @@ def calc_split_column(
160160
----------
161161
table : ir.Table
162162
The input Ibis table to be split.
163-
unique_key : str | tuple[str] | list[str]
163+
unique_key : str | tuple[str] | list[str] | Selector
164164
The column name(s) that uniquely identify each row in the table. This
165165
unique_key is used to create a deterministic split of the dataset
166166
through a hashing process.
@@ -235,15 +235,15 @@ def train_test_splits(
235235
----------
236236
table : ir.Table
237237
The input Ibis table to be split.
238-
unique_key : str | tuple[str] | list[str]
239-
The column name(s) that uniquely identify each row in the table. This
240-
unique_key is used to create a deterministic split of the dataset
241-
through a hashing process.
242238
test_sizes : Iterable[float] | float
243239
An iterable of floats representing the desired proportions for data splits.
244240
Each value should be between 0 and 1, and their sum must equal 1. The
245241
order of test sizes determines the order of the generated subsets. If float is passed
246242
it assumes that the value is for the test size and that a tradition tain test split of (1-test_size, test_size) is returned.
243+
unique_key : str | tuple[str] | list[str] | Selector, optional
244+
The column name(s) that uniquely identify each row in the table. This
245+
unique_key is used to create a deterministic split of the dataset
246+
through a hashing process.
247247
num_buckets : int, optional
248248
The number of buckets into which the data can be binned after being
249249
hashed (default is 10000). It controls how finely the data is divided
@@ -273,7 +273,7 @@ def train_test_splits(
273273
>>> table = xo.memtable({"key": range(100), "value": range(100,200)})
274274
>>> unique_key = "key"
275275
>>> test_sizes = [0.2, 0.3, 0.5]
276-
>>> splits = xo.train_test_splits(table, unique_key, test_sizes, num_buckets=10, random_seed=42)
276+
>>> splits = xo.train_test_splits(table, test_sizes, unique_key=unique_key, num_buckets=10, random_seed=42)
277277
>>> for i, split_table in enumerate(splits):
278278
... print(f"Split {i+1} size: {split_table.count().execute()}")
279279
... print(split_table.execute())

python/xorq/tests/test_cli.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from itertools import chain
55
from pathlib import Path
66

7+
import pandas as pd
78
import pytest
89

910
import xorq.api as xo
@@ -666,3 +667,101 @@ def test_serve_unbound_tag_get_exchange_udf(fixture_dir, tmp_path):
666667
assert not actual.empty
667668

668669
serve_popened.popen.terminate()
670+
671+
672+
@pytest.mark.slow(level=1)
673+
def test_serve_penguins_template(tmpdir, tmp_path):
674+
tmpdir = Path(tmpdir)
675+
path = tmpdir.joinpath("xorq-template-penguins")
676+
init_args = (
677+
"xorq",
678+
"init",
679+
"--path",
680+
str(path),
681+
"--template",
682+
"penguins",
683+
)
684+
685+
(returncode, stdout, stderr) = subprocess_run(init_args)
686+
687+
assert returncode == 0, stderr
688+
assert path.exists()
689+
assert path.joinpath("pyproject.toml").exists()
690+
assert path.joinpath("requirements.txt").exists()
691+
692+
target_dir = tmp_path / "build"
693+
build_args = [
694+
"xorq",
695+
"build",
696+
str(path / "expr.py"),
697+
"--builds-dir",
698+
str(target_dir),
699+
]
700+
(returncode, stdout, stderr) = subprocess_run(build_args)
701+
702+
assert "Building expr" in stderr.decode("ascii")
703+
assert returncode == 0, stderr
704+
705+
if match := re.search(f"{target_dir}/([0-9a-f]+)", stdout.decode("ascii")):
706+
serve_hash = "405154f690d20f4adbcc375252628b75"
707+
708+
serve_args = (
709+
"xorq",
710+
"serve-unbound",
711+
str(target_dir / match.group()),
712+
"--to_unbind_hash",
713+
serve_hash,
714+
)
715+
serve_popened = Popened(serve_args, deferred=False)
716+
port = peek_port(serve_popened)
717+
718+
# Create sample penguin data using memtable instead of reading from URL
719+
sample_data = pd.DataFrame(
720+
{
721+
"bill_length_mm": [
722+
39.1,
723+
39.5,
724+
40.3,
725+
36.7,
726+
39.3,
727+
38.9,
728+
39.2,
729+
34.1,
730+
42.0,
731+
37.8,
732+
],
733+
"bill_depth_mm": [
734+
18.7,
735+
17.4,
736+
18.0,
737+
19.3,
738+
20.6,
739+
17.8,
740+
19.6,
741+
18.1,
742+
20.2,
743+
17.1,
744+
],
745+
"species": [
746+
"Adelie",
747+
"Adelie",
748+
"Adelie",
749+
"Adelie",
750+
"Adelie",
751+
"Chinstrap",
752+
"Chinstrap",
753+
"Chinstrap",
754+
"Gentoo",
755+
"Gentoo",
756+
],
757+
}
758+
)
759+
760+
expr = xo.memtable(sample_data, name="penguins")
761+
762+
actual = hit_server(port=port, expr=expr)
763+
assert not actual.empty
764+
assert actual["predicted"].isin(("Adelie", "Chinstrap", "Gentoo")).all()
765+
assert len(actual) == len(sample_data)
766+
else:
767+
raise AssertionError("No expression hash")

0 commit comments

Comments
 (0)